diff --git a/Makefile b/Makefile index f928d0af260633aa3af3224d9d493d8922426f18..de0a2b6dc7d893dbb2a02d3df5aa6740573f61f4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .DEFAULT_GOAL = all ######################################################################### -# Copyright © 2018 Thomas Bellman, Linköping, Sweden +# Copyright © 2018-2019 Thomas Bellman, Linköping, Sweden # Licensed under the GNU LGPL v3+; see the README file for more information. @@ -31,7 +31,7 @@ include Version.inc ######################################################################### # Internal variables, not intended to be overridden by users -_PYPLUGINS = check_ospf_nbr +_PYPLUGINS = check_ospf_nbr check_ping_multiaddr _PYLIBS = trh_nagioslib _MIBFILES = OSPFV3-MIB.txt _RPM_SPECFILE = $(PKGNAME)-$(VERSION).spec diff --git a/README b/README index 1045067855e6e08c83c727dc0f0b9fe513d026c8..f3213811fbb4123300a4d8532851fa5b17254c7f 100644 --- a/README +++ b/README @@ -1,7 +1,7 @@ -*- mode: text; fill-column: 70; -*- This is a collection of Nagios plugins. -Copyright © 2018 Thomas Bellman, and any other authors. +Copyright © 2018-2019 Thomas Bellman, and any other authors. ===== LICENSING ===== @@ -28,12 +28,13 @@ SNMP MIB files are likely to be licensed under different terms. ===== WHAT IS THIS ===== This is a collection of Nagios plugins, with a focus on network -monitoring. Currently only a single one, though: +monitoring. Currently only two, though: - check_ospf_nbr Check that an OSPF neighbour of a router is present and in state FULL. Both OSPF v2 (for IPv4) and OSPF v3 (for IPv6) is supported. - + - check_ping_multiaddr Check that multiple IP addresses (both IPv4 + and IPv6, at the same time) respond to ping. ===== INSTALLATION ===== @@ -78,6 +79,7 @@ modifications of the spec file You will need at least the Net-SNMP (http://www.net-snmp.org/) Python bindings to run these plugins. You will also need several SNMP MIB files installed, e.g. the ones that come with Net-SNMP. +The check_ping_multiaddr plugin requires fping(1) to be installed. GNU Make is assumed for building. diff --git a/check_ping_multiaddr.py b/check_ping_multiaddr.py new file mode 100755 index 0000000000000000000000000000000000000000..44564751f3d9579da015f75d9277a3da9a9404e1 --- /dev/null +++ b/check_ping_multiaddr.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8; indent-tabs-mode: nil -*- + +# Copyright © 2019 Thomas Bellman, Linköping, Sweden +# Licensed under the GNU LGPL v3+; see the README file for more information. + + +# Explicitly assign to __doc__ to avoid doc string being optimized out. +__doc__ = """\ +Check that one or more IP addresses responds to ping. + +The intent of this Nagios plugin is to check that a host is alive and +reachable on all of its addresses, in particular both IPv4 and IPv6 +addresses, which can be tested at the same time. + +Addresses can be specified either as numeric IP addresses, or as host +names that will be resolved at runtime. By default, only the first +address the resolver library returns for each host name will be used, +but the -A/--all flag will cause all of the addresses returned by the +resolver to be used. You need to give at least one of --ipv4 or --ipv6, +but both can be specified at the same time. + +Note that Nagios does not allow you to declare multiple addresses for +a host. To use this in a check_command for a host, you need to either +let %prog resolve addresses at runtime (thus depending on +DNS), or you need to generate a unique command declaration for each +host; something like: + + define host { + host_name smurf + address 198.51.100.23 + check_command check-host-alive::smurf + } + define command { + command_name check-host-alive::smurf + command_line $USER1$/check_ping_multiaddr -A46 -- 198.51.100.23 2001:db8:4711:17::1:23 + } +""" + +__version__ = '<#VERSION#>' + + +import sys +import re +import os +import optparse +import ipaddr +import socket +import subprocess + +import trh_nagioslib + + + +class ProgramFailure(Exception): + def __init__(self, status, msg): + Exception.__init__(self) + self.status = status + self.msg = msg + + +class Options(optparse.OptionParser): + + def __init__(self): + global __doc__, __version__ + optparse.OptionParser.__init__( + self, + usage="%prog {-4|-6} [options] -- address ...", + formatter=trh_nagioslib.NoReflowHelpFormatter(), + version=__version__, + description=__doc__) + self.add_option( + '-4', '--ipv4', action='store_true', default=False, + help=("Use IPv4 [default: %default]." + " At least one of --ipv4 and --ipv6 must be given.")) + self.add_option( + '-6', '--ipv6', action='store_true', default=False, + help=("Use IPv6 [default: %default]." + " At least one of --ipv4 and --ipv6 must be given.")) + self.add_option( + '-A', '--all-addresses', action='store_true', default=False, + help=("Check all addresses each hostname resolves to." + " By default, only the first address for each host is" + " checked.")) + self.add_option( + '-r', '--retries', action='store', type='int', default=9, + help=("Number of ICMP ECHO retries to send [default: %default]." + " Must be in the range 1 <= RETRIES <= 20.")) + self.add_option( + '-d', '--debug', action='count', default=0, + help=("Increase debug level [default: %default].")) + + def get_version(self): + progname = self.get_prog_name() + pkgname = "<#PKGNAME#>" + version = self.version + vinfo = "%s (%s) version %s" % (progname, pkgname, version) + return vinfo + + def check_values(self, values, args): + if len(args) < 1: + self.error("At least one IP address is required") + if not values.ipv4 and not values.ipv6: + self.error("At least one IP version must be specified (-4, -6)") + if values.retries < 1 or values.retries > 20: + self.error( + "Retries (-r) must be in range 1 <= RETRIES <= 20") + return values,args + + def exit(self, status=0, msg=None): + if msg: + sys.stderr.write(msg) + # Exit with EX_USAGE, unless status==0 (which happens for --help) + raise ProgramFailure(status=(status and os.EX_USAGE), msg=msg) + + +OPTIONS,_ = Options().parse_args(['-A', '-6', '-4', '--', 'localhost']) + + +def fail(status, fmt, *args): + progname = os.path.basename(sys.argv[0] or "check_ospf_nbr") + msg = progname + ": " + fmt % args + "\n" + sys.stderr.write(msg) + raise ProgramFailure(status=status, msg=msg) + + +def chatter(level, fmt, *args, **kwargs): + if level <= OPTIONS.debug: + msg = fmt % (kwargs or args) + sys.stderr.write("#" + " " * level + msg + "\n") + + +def eai_errno_to_symbol(errno): + for symbol in dir(socket): + if symbol.startswith('EAI_') and getattr(socket, symbol) == errno: + return symbol + return + + + +def collect_addresses(hosts, all_addresses, do_v4, do_v6): + if do_v4 and do_v6: + ipfamily = socket.AF_UNSPEC ; ipversion = None + elif do_v4: + ipfamily = socket.AF_INET ; ipversion = 4 + elif do_v6: + ipfamily = socket.AF_INET6 ; ipversion = 6 + else: + raise ValueError("Neither IPv4 nor IPv6 selected") + + lookupflags = 0 + lookupflags |= getattr(socket, 'AI_IDN', 0) + + addresses = { 4: set(), 6: set() } + for host in hosts: + # Try it as a numerical IP address first + try: + addr = ipaddr.IPAddress(host, ipversion) + except ValueError: + pass + else: + addresses[addr.version].add(addr) + continue + + # And if that failed, try resolving the name + try: + ipres = socket.getaddrinfo(host, None, ipfamily, 0, 0, lookupflags) + except socket.gaierror as e: + fail(os.EX_NOHOST, "%s, %s", e.strerror, host) + for ai in ipres: + ipfam = ai[0] ; ip = ai[4][0] + addr = ipaddr.IPAddress(ip) + addresses[addr.version].add(addr) + if not all_addresses: + break + + return addresses + + + +__fping_parser_re = re.compile(r"^([0-9a-f.:]+) is ([a-z]+)$") + +def parse_fping_output(output, expected_addrs): + alive = set() + unreachable = set() + for line in filter(bool, output): + match = __fping_parser_re.match(line) + if not match: + continue + ip = ipaddr.IPAddress(match.group(1)) + status = match.group(2) + if status == 'alive': + alive.add(ip) + elif status == 'unreachable': + unreachable.add(ip) + else: + raise RuntimeError( + "Unexpected status line from fping, " + repr(line)) + not_reported = expected_addrs - alive - unreachable + unexpected = (alive | unreachable) - expected_addrs + + return (alive, unreachable, not_reported, unexpected) + + +def ping_addresses(addresses): + fpingcmds = { + 4: ['fping'], + 6: ['fping6'], + } + fpingflags = [ + # These settings, with default 9 retries, gives ca 5 seconds timeout + # for unreachable addresses + '-i10', # -i10 is the fastest fping allows without being root + '-t250', + '-B1.125', + '-r%d' % (OPTIONS.retries,), + ] + all_output = [] + for ipver,addrs in addresses.items(): + if not addrs: + continue + cmd = fpingcmds[ipver] + fpingflags + map(str, addrs) + chatter(1, "Running %r", cmd) + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output,errors = p.communicate() + output = output.split("\n"); errors = errors.split("\n") + chatter(3, "Received output %r", output) + chatter(3, "Received errors %r", errors) + all_output += output + + alive, unreachable, not_reported, unexpected = parse_fping_output( + all_output, set().union(*addresses.values())) + + return (alive, unreachable, not_reported, unexpected) + + + +def main(argv): + global OPTIONS + OPTIONS, arg_addresses = Options().parse_args(argv[1:]) + + addresses = collect_addresses( + arg_addresses, OPTIONS.all_addresses, OPTIONS.ipv4, OPTIONS.ipv6) + + for ipver,addrs in addresses.items(): + chatter(2, "IPv%d addresses: %s", ipver, " ".join(map(str, addrs))) + + (alive, unreachable, not_reported, unexpected) = \ + ping_addresses(addresses) + + ping_statuses = { + 'OK': [ '%s is alive' % (ip,) for ip in alive ], + 'WARNING': [ '%s was reported despite not being pinged' % (ip,) + for ip in unexpected ], + 'CRITICAL': [ '%s is unreachable' % (ip,) for ip in unreachable ], + 'UNKNOWN': [ '%s has no information' % (ip,) for ip in not_reported ], + } + lvl,message = trh_nagioslib.nagios_report(ping_statuses) + sys.stdout.write(message) + return lvl + + + +if __name__ == '__main__': + try: + code = main(sys.argv) + sys.exit(code) + except ProgramFailure as failure: + sys.exit(failure.status) + except Exception: + # An exception would normally cause Python to exit with code == 1, + # but that would be a WARNING for Nagios. Avoid that. + (exc_type, exc_value, exc_traceback) = sys.exc_info() + import traceback + traceback.print_exception(exc_type, exc_value, exc_traceback) + sys.exit(os.EX_SOFTWARE) diff --git a/pkg/rpm.spec.in b/pkg/rpm.spec.in index 1b48a58663edebf970af9980681871da31c27d4e..d8e8576c43539afcb44a6876de31bf8a240ae0c3 100644 --- a/pkg/rpm.spec.in +++ b/pkg/rpm.spec.in @@ -27,6 +27,7 @@ Requires: net-snmp-python Requires: python-ipaddr # net-snmp-libs and libsmi contain needed MIB files Requires: net-snmp-libs, libsmi +Requires: fping %description diff --git a/trh_nagioslib.py b/trh_nagioslib.py index 8eb5661ae84ca9dcae04a2e8db7879ce11f6a77f..ba64ef14fffc7dca22cbd155d9524a70bc9df197 100644 --- a/trh_nagioslib.py +++ b/trh_nagioslib.py @@ -13,6 +13,8 @@ import re import collections import netsnmp import subprocess +import optparse +import textwrap # Cache of mappings from short names to fully qualified names. @@ -251,3 +253,21 @@ def nagios_report(statuses): message = "UNKNOWN: No status report\n" return max_level, message + + + +class NoReflowHelpFormatter(optparse.IndentedHelpFormatter): + """A HelpFormatter for optparse that does not re-wrap/reflow text. + + Intended for command descriptions that are already properly + pre-formatted. + """ + def format_description(self, description): + if not description: + return "" + desc_width = min(70, self.width - self.current_indent) + indent = " " * self.current_indent + summary,body = (description.strip().split("\n", 1) + [""])[:2] + body = textwrap.dedent(body) + description = summary + "\n" + body + "\n" + return description