ansible/library/nagios

706 lines
24 KiB
Text
Raw Normal View History

2012-08-27 07:50:52 +02:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is largely copied from the Nagios module included in the
# Func project. Original copyright follows:
#
# func-nagios - Schedule downtime and enables/disable notifications
2012-08-27 17:55:38 +02:00
# Copyright 2011, Red Hat, Inc.
2012-08-27 07:50:52 +02:00
# Tim Bielawa <tbielawa@redhat.com>
#
# This software may be freely redistributed under the terms of the GNU
# general public license version 2.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
2012-08-27 17:39:52 +02:00
Action Summaries (all must be delegate_to'd a nagios server):
* Set 30 minutes of downtime for a single service:
action: nagios action=downtime minutes=30 service=httpd host=$inventory_hostname
* Set 30 minutes of downtime for a few services:
action: nagios action=downtime minutes=30 services=httpd,git,nfs host=$inventory_hostname
* (Un)Silence all alerts from a host/service:
action: nagios action=silence host=$inventory_hostname
action: nagios action=unsilence host=$inventory_hostname
* Enable/Disable alerts for a host or service(s):
action: nagios action=enable_alerts service=host host=$inventory_hostname
action: nagios action=disable_alerts services=httpd,git,nfs host=$inventory_hostname
Note: 'service' is an alias for 'services'.
Separate multiple services with commas.
2012-08-27 07:50:52 +02:00
Configuration:
2012-08-27 07:50:52 +02:00
If your nagios cmdfile is not /var/spool/nagios/cmd/nagios.cmd you
can configure ansible (on your nagios server) to use the correct
one by making a file called /etc/ansible/modules/nagios.conf that
looks like this:
2012-08-27 07:50:52 +02:00
[main]
cmdfile = /path/to/your/nagios.cmd
2012-08-27 07:50:52 +02:00
When calling this module via ansible, use the 'cmdfile' parameter to
set it explicitly.
2012-08-27 07:50:52 +02:00
"""
import ConfigParser
import types
import time
MODULE_CONFIG = '/etc/ansible/modules/nagios.conf'
DEFAULT_CMDFILE = '/var/spool/nagios/cmd/nagios.cmd'
######################################################################
def which_cmdfile():
try:
config = ConfigParser.SafeConfigParser({'cmdfile': DEFAULT_CMDFILE})
config.read(MODULE_CONFIG)
return config.get('main', 'cmdfile')
except:
return DEFAULT_CMDFILE
######################################################################
def main():
ACTION_CHOICES = [
'downtime',
'silence',
'unsilence',
'enable_alerts',
'disable_alerts'
]
module = AnsibleModule(
argument_spec=dict(
action=dict(required=True, default=None, choices=ACTION_CHOICES),
author=dict(default='Ansible'),
host=dict(required=True, default=None),
minutes=dict(default=30),
cmdfile=dict(default=which_cmdfile()),
services=dict(default=None, aliases=['service']),
)
)
action = module.params['action']
minutes = module.params['minutes']
services = module.params['services']
##################################################################
# Required args per action:
# downtime = (minutes, service, host)
# (un)silence = (host)
# (enable/disable)_alerts = (service, host)
#
# AnsibleModule will verify most stuff, we need to verify
# 'minutes' and 'service' manually.
##################################################################
if action == 'downtime':
# Make sure there's an actual service selected
if not services:
module.fail_json(msg='no service selected to set downtime for')
# Make sure minutes is a number
try:
m = int(minutes)
if not isinstance(m, types.IntType):
module.fail_json(msg='minutes must be a number')
except:
module.fail_json(msg='invalid entry for minutes')
##################################################################
if action in ['enable_alerts', 'disable_alerts']:
if not services:
module.fail_json(msg='a service is required when setting alerts')
##################################################################
ansible_nagios = Nagios(module, **module.params)
ansible_nagios.act()
##################################################################
2012-08-27 07:50:52 +02:00
######################################################################
2012-08-27 07:50:52 +02:00
class Nagios(object):
"""
Perform common tasks in Nagios related to downtime and
notifications.
The complete set of external commands Nagios handles is documented
on their website:
http://old.nagios.org/developerinfo/externalcommands/commandlist.php
Note that in the case of `schedule_svc_downtime`,
`enable_svc_notifications`, and `disable_svc_notifications`, the
service argument should be passed as a list.
"""
def __init__(self, module, **kwargs):
self.module = module
self.action = kwargs['action']
self.author = kwargs['author']
self.host = kwargs['host']
self.minutes = int(kwargs['minutes'])
self.cmdfile = kwargs['cmdfile']
if (kwargs['services'] is None) or (kwargs['services'] == 'host'):
self.services = kwargs['services']
else:
self.services = kwargs['services'].split(',')
self.command_results = []
def _now(self):
"""
The time in seconds since 12:00:00AM Jan 1, 1970
"""
return int(time.time())
def _write_command(self, cmd):
"""
Write the given command to the Nagios command file
"""
try:
fp = open(self.cmdfile, 'w')
2012-08-27 07:50:52 +02:00
fp.write(cmd)
fp.flush()
fp.close()
self.command_results.append(cmd.strip())
except IOError:
self.module.fail_json(msg='unable to write to nagios command file',
cmdfile=self.cmdfile)
2012-08-27 07:50:52 +02:00
def _fmt_dt_str(self, cmd, host, duration, author=None,
comment="Scheduling downtime", start=None,
svc=None, fixed=1, trigger=0):
"""
Format an external-command downtime string.
cmd - Nagios command ID
host - Host schedule downtime on
duration - Minutes to schedule downtime for
author - Name to file the downtime as
comment - Reason for running this command (upgrade, reboot, etc)
start - Start of downtime in seconds since 12:00AM Jan 1 1970
Default is to use the entry time (now)
svc - Service to schedule downtime for, omit when for host downtime
fixed - Start now if 1, start when a problem is detected if 0
trigger - Optional ID of event to start downtime from. Leave as 0 for
fixed downtime.
Syntax: [submitted] COMMAND;<host_name>;[<service_description>]
<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;
<comment>
"""
entry_time = self._now()
if start is None:
start = entry_time
hdr = "[%s] %s;%s;" % (entry_time, cmd, host)
duration_s = (duration * 60)
end = start + duration_s
if not author:
author = self.author
if svc is not None:
dt_args = [svc, str(start), str(end), str(fixed), str(trigger),
str(duration_s), author, comment]
else:
# Downtime for a host if no svc specified
dt_args = [str(start), str(end), str(fixed), str(trigger),
str(duration_s), author, comment]
dt_arg_str = ";".join(dt_args)
dt_str = hdr + dt_arg_str + "\n"
return dt_str
def _fmt_notif_str(self, cmd, host, svc=None):
"""
Format an external-command notification string.
cmd - Nagios command ID.
host - Host to en/disable notifications on..
svc - Service to schedule downtime for. A value is not required
for host downtime.
Syntax: [submitted] COMMAND;<host_name>[;<service_description>]
"""
entry_time = self._now()
if svc is not None:
notif_str = "[%s] %s;%s;%s\n" % (entry_time, cmd, host, svc)
else:
# Downtime for a host if no svc specified
notif_str = "[%s] %s;%s\n" % (entry_time, cmd, host)
return notif_str
def schedule_svc_downtime(self, host, services=[], minutes=30):
"""
This command is used to schedule downtime for a particular
service.
During the specified downtime, Nagios will not send
notifications out about the service.
Syntax: SCHEDULE_SVC_DOWNTIME;<host_name>;<service_description>
<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;
<comment>
"""
cmd = "SCHEDULE_SVC_DOWNTIME"
for service in services:
dt_cmd_str = self._fmt_dt_str(cmd, host, minutes, svc=service)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_host_downtime(self, host, minutes=30):
"""
This command is used to schedule downtime for a particular
host.
During the specified downtime, Nagios will not send
notifications out about the host.
Syntax: SCHEDULE_HOST_DOWNTIME;<host_name>;<start_time>;<end_time>;
<fixed>;<trigger_id>;<duration>;<author>;<comment>
"""
cmd = "SCHEDULE_HOST_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, host, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_hostgroup_host_downtime(self, hostgroup, minutes=30):
"""
This command is used to schedule downtime for all hosts in a
particular hostgroup.
During the specified downtime, Nagios will not send
notifications out about the hosts.
Syntax: SCHEDULE_HOSTGROUP_HOST_DOWNTIME;<hostgroup_name>;<start_time>;
<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment>
"""
cmd = "SCHEDULE_HOSTGROUP_HOST_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, hostgroup, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_hostgroup_svc_downtime(self, hostgroup, minutes=30):
"""
This command is used to schedule downtime for all services in
a particular hostgroup.
During the specified downtime, Nagios will not send
notifications out about the services.
Note that scheduling downtime for services does not
automatically schedule downtime for the hosts those services
are associated with.
Syntax: SCHEDULE_HOSTGROUP_SVC_DOWNTIME;<hostgroup_name>;<start_time>;
<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment>
"""
cmd = "SCHEDULE_HOSTGROUP_SVC_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, hostgroup, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_servicegroup_host_downtime(self, servicegroup, minutes=30):
"""
This command is used to schedule downtime for all hosts in a
particular servicegroup.
During the specified downtime, Nagios will not send
notifications out about the hosts.
Syntax: SCHEDULE_SERVICEGROUP_HOST_DOWNTIME;<servicegroup_name>;
<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;
<comment>
"""
cmd = "SCHEDULE_SERVICEGROUP_HOST_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, servicegroup, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def schedule_servicegroup_svc_downtime(self, servicegroup, minutes=30):
"""
This command is used to schedule downtime for all services in
a particular servicegroup.
During the specified downtime, Nagios will not send
notifications out about the services.
Note that scheduling downtime for services does not
automatically schedule downtime for the hosts those services
are associated with.
Syntax: SCHEDULE_SERVICEGROUP_SVC_DOWNTIME;<servicegroup_name>;
<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;
<comment>
"""
cmd = "SCHEDULE_SERVICEGROUP_SVC_DOWNTIME"
dt_cmd_str = self._fmt_dt_str(cmd, servicegroup, minutes)
self._write_command(dt_cmd_str)
2012-08-27 07:50:52 +02:00
def disable_host_svc_notifications(self, host):
"""
This command is used to prevent notifications from being sent
out for all services on the specified host.
Note that this command does not disable notifications from
being sent out about the host.
Syntax: DISABLE_HOST_SVC_NOTIFICATIONS;<host_name>
"""
cmd = "DISABLE_HOST_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, host)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_host_notifications(self, host):
"""
This command is used to prevent notifications from being sent
out for the specified host.
Note that this command does not disable notifications for
services associated with this host.
Syntax: DISABLE_HOST_NOTIFICATIONS;<host_name>
"""
cmd = "DISABLE_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, host)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_svc_notifications(self, host, services=[]):
"""
This command is used to prevent notifications from being sent
out for the specified service.
Note that this command does not disable notifications from
being sent out about the host.
Syntax: DISABLE_SVC_NOTIFICATIONS;<host_name>;<service_description>
"""
cmd = "DISABLE_SVC_NOTIFICATIONS"
for service in services:
notif_str = self._fmt_notif_str(cmd, host, svc=service)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_servicegroup_host_notifications(self, servicegroup):
"""
This command is used to prevent notifications from being sent
out for all hosts in the specified servicegroup.
Note that this command does not disable notifications for
services associated with hosts in this service group.
Syntax: DISABLE_SERVICEGROUP_HOST_NOTIFICATIONS;<servicegroup_name>
"""
cmd = "DISABLE_SERVICEGROUP_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, servicegroup)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_servicegroup_svc_notifications(self, servicegroup):
"""
This command is used to prevent notifications from being sent
out for all services in the specified servicegroup.
Note that this does not prevent notifications from being sent
out about the hosts in this servicegroup.
Syntax: DISABLE_SERVICEGROUP_SVC_NOTIFICATIONS;<servicegroup_name>
"""
cmd = "DISABLE_SERVICEGROUP_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, servicegroup)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_hostgroup_host_notifications(self, hostgroup):
"""
Disables notifications for all hosts in a particular
hostgroup.
Note that this does not disable notifications for the services
associated with the hosts in the hostgroup - see the
DISABLE_HOSTGROUP_SVC_NOTIFICATIONS command for that.
Syntax: DISABLE_HOSTGROUP_HOST_NOTIFICATIONS;<hostgroup_name>
"""
cmd = "DISABLE_HOSTGROUP_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, hostgroup)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def disable_hostgroup_svc_notifications(self, hostgroup):
"""
Disables notifications for all services associated with hosts
in a particular hostgroup.
Note that this does not disable notifications for the hosts in
the hostgroup - see the DISABLE_HOSTGROUP_HOST_NOTIFICATIONS
command for that.
Syntax: DISABLE_HOSTGROUP_SVC_NOTIFICATIONS;<hostgroup_name>
"""
cmd = "DISABLE_HOSTGROUP_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, hostgroup)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def enable_host_notifications(self, host):
"""
Enables notifications for a particular host.
Note that this command does not enable notifications for
services associated with this host.
Syntax: ENABLE_HOST_NOTIFICATIONS;<host_name>
"""
cmd = "ENABLE_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, host)
self._write_command(notif_str)
2012-08-27 07:50:52 +02:00
def enable_host_svc_notifications(self, host):
"""
Enables notifications for all services on the specified host.
Note that this does not enable notifications for the host.
Syntax: ENABLE_HOST_SVC_NOTIFICATIONS;<host_name>
"""
cmd = "ENABLE_HOST_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, host)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def enable_svc_notifications(self, host, services=[]):
"""
Enables notifications for a particular service.
Note that this does not enable notifications for the host.
Syntax: ENABLE_SVC_NOTIFICATIONS;<host_name>;<service_description>
"""
cmd = "ENABLE_SVC_NOTIFICATIONS"
nagios_return = True
return_str_list = []
for service in services:
notif_str = self._fmt_notif_str(cmd, host, svc=service)
nagios_return = self._write_command(notif_str) and nagios_return
2012-08-27 07:50:52 +02:00
return_str_list.append(notif_str)
if nagios_return:
return return_str_list
else:
return "Fail: could not write to the command file"
def enable_hostgroup_host_notifications(self, hostgroup):
"""
Enables notifications for all hosts in a particular hostgroup.
Note that this command does not enable notifications for
services associated with the hosts in this hostgroup.
Syntax: ENABLE_HOSTGROUP_HOST_NOTIFICATIONS;<hostgroup_name>
"""
cmd = "ENABLE_HOSTGROUP_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, hostgroup)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def enable_hostgroup_svc_notifications(self, hostgroup):
"""
Enables notifications for all services that are associated
with hosts in a particular hostgroup.
Note that this does not enable notifications for the hosts in
this hostgroup.
Syntax: ENABLE_HOSTGROUP_SVC_NOTIFICATIONS;<hostgroup_name>
"""
cmd = "ENABLE_HOSTGROUP_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, hostgroup)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def enable_servicegroup_host_notifications(self, servicegroup):
"""
Enables notifications for all hosts that have services that
are members of a particular servicegroup.
Note that this command does not enable notifications for
services associated with the hosts in this servicegroup.
Syntax: ENABLE_SERVICEGROUP_HOST_NOTIFICATIONS;<servicegroup_name>
"""
cmd = "ENABLE_SERVICEGROUP_HOST_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, servicegroup)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def enable_servicegroup_svc_notifications(self, servicegroup):
"""
Enables notifications for all services that are members of a
particular servicegroup.
Note that this does not enable notifications for the hosts in
this servicegroup.
Syntax: ENABLE_SERVICEGROUP_SVC_NOTIFICATIONS;<servicegroup_name>
"""
cmd = "ENABLE_SERVICEGROUP_SVC_NOTIFICATIONS"
notif_str = self._fmt_notif_str(cmd, servicegroup)
nagios_return = self._write_command(notif_str)
if nagios_return:
return notif_str
else:
return "Fail: could not write to the command file"
def silence_host(self, host):
"""
This command is used to prevent notifications from being sent
out for the host and all services on the specified host.
This is equivalent to calling disable_host_svc_notifications
and disable_host_notifications.
Syntax: DISABLE_HOST_SVC_NOTIFICATIONS;<host_name>
Syntax: DISABLE_HOST_NOTIFICATIONS;<host_name>
"""
cmd = [
"DISABLE_HOST_SVC_NOTIFICATIONS",
"DISABLE_HOST_NOTIFICATIONS"
]
nagios_return = True
return_str_list = []
for c in cmd:
notif_str = self._fmt_notif_str(c, host)
nagios_return = self._write_command(notif_str) and nagios_return
2012-08-27 07:50:52 +02:00
return_str_list.append(notif_str)
if nagios_return:
return return_str_list
else:
return "Fail: could not write to the command file"
def unsilence_host(self, host):
"""
This command is used to enable notifications for the host and
all services on the specified host.
This is equivalent to calling enable_host_svc_notifications
and enable_host_notifications.
Syntax: ENABLE_HOST_SVC_NOTIFICATIONS;<host_name>
Syntax: ENABLE_HOST_NOTIFICATIONS;<host_name>
"""
cmd = [
"ENABLE_HOST_SVC_NOTIFICATIONS",
"ENABLE_HOST_NOTIFICATIONS"
]
nagios_return = True
return_str_list = []
for c in cmd:
notif_str = self._fmt_notif_str(c, host)
nagios_return = self._write_command(notif_str) and nagios_return
2012-08-27 07:50:52 +02:00
return_str_list.append(notif_str)
if nagios_return:
return return_str_list
else:
return "Fail: could not write to the command file"
def act(self):
"""
Figure out what you want to do from ansible, and then do the
needful (at the earliest).
"""
# host or service downtime?
if self.action == 'downtime':
if self.services == 'host':
self.schedule_host_downtime(self.host, self.minutes)
else:
self.schedule_svc_downtime(self.host,
services=self.services,
minutes=self.minutes)
# toggle the host AND service alerts
elif self.action == 'silence':
self.silence_host(self.host)
elif self.action == 'unsilence':
self.unsilence_host(self.host)
# toggle host/svc alerts
elif self.action == 'enable_alerts':
if self.services == 'host':
self.enable_host_notifications(self.host)
else:
self.enable_svc_notifications(self.host,
services=self.services)
elif self.action == 'disable_alerts':
if self.services == 'host':
self.disable_host_notifications(self.host)
else:
self.disable_svc_notifications(self.host,
services=self.services)
# wtf?
else:
self.module.fail_json(msg="unknown action specified: '%s'" % \
self.action)
self.module.exit_json(nagios_commands=self.command_results,
changed=True)
######################################################################
# this is magic, see lib/ansible/module_common.py
#<<INCLUDE_ANSIBLE_MODULE_COMMON>>
main()