From 0fd9c340589362a6a32bd595d8216a3b13248ada Mon Sep 17 00:00:00 2001
From: Per Cederqvist <ceder@lysator.liu.se>
Date: Tue, 9 Jan 2007 14:19:02 +0000
Subject: [PATCH] Added check_nfs_server.

---
 Makefile         |   3 +-
 check_nfs_server | 134 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100755 check_nfs_server

diff --git a/Makefile b/Makefile
index 2463a59..99179fd 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,8 @@ LIBEXECDIR = /usr/local/nagios/libexec/
 SCRIPTS = check_cups check_glsa check_saned check_lpd check_hddtemp \
 	check_link_status check_true check_lysrdiff check_syslog \
 	check_ping check_enodia_monitored check_hostextinfo \
-	check_hydra check_datorhandbok check_no_server check_iostatE
+	check_hydra check_datorhandbok check_no_server check_iostatE \
+	check_nfs_server
 
 all:;
 
diff --git a/check_nfs_server b/check_nfs_server
new file mode 100755
index 0000000..bb45a3a
--- /dev/null
+++ b/check_nfs_server
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+
+# Read the last few messages from all syslog files and look for lines
+# matching "NFS server (.*) not responding" that are not followed by
+# "NFS server \1 OK".  Report any problems found in an aggregated way.
+#
+# This script assumes that syslog log files are created using the
+# following hierarchy:
+#
+#   /misc/syslogs/2006-12/2006-12-18/sellafield-130.236.254.103
+#
+# where 2006 is a year, 12 a month, 18 a day, and sellafield a
+# hostname with its IP address appended.
+
+import sets
+import time
+import os
+import sys
+import re
+
+bad_re = re.compile("NFS server ([^ ]*) not responding")
+good_re = re.compile("NFS server ([^ ]*) OK")
+time_re = re.compile("((?P<year>[1-9][0-9]{3})-"
+                     "(?P<mon>[0-9]{2})-"
+                     "(?P<day>[0-9]{2}) "
+                     "(?P<hour>[0-9]{2}):"
+                     "(?P<min>[0-9]{2}):"
+                     "(?P<sec>[0-9]{2}))")
+
+def critical(msg):
+    print "CRITICAL - %s" % msg
+    sys.exit(2)
+
+def warning(msg):
+    print "WARNING - %s" % msg
+    sys.exit(1)
+
+def ok(msg):
+    print "OK - %s" % msg
+    sys.exit(0)
+
+def dirname(y, m, d):
+    return "/misc/syslogs/%04d-%02d/%04d-%02d-%02d" % (
+        y, m, y, m, d)
+
+def checkfile(fn, now):
+    fp = open(fn, "r")
+    filesize = os.fstat(fp.fileno()).st_size
+    fp.seek(-(min(filesize, 10 * 1024)), 2)
+
+    # Discard the first (possibly incomplete) line.
+    if fp.tell() != 0:
+        fp.readline()
+
+    res = {}
+
+    for line in fp:
+
+        m = time_re.match(line)
+        if m is None:
+            continue
+
+        t = time.mktime(time.strptime(m.group(1), "%Y-%m-%d %H:%M:%S"))
+        if now - t < 1800:
+            continue
+
+        bad = bad_re.search(line)
+        if bad is not None:
+            res[bad.group(1)] = res.get(bad.group(1), 0) + 1
+            continue
+
+        good = good_re.search(line)
+        if good is not None:
+            del res[bad.group(1)]
+
+    return res
+
+def derive_hostname(fn):
+    m = re.match("(.*)-[0-9.]", fn)
+    if m is None:
+        return fn
+    else:
+        return m.group(1)
+
+def checkall():
+    complaints = {}
+    complaining_hosts = {}
+    all = sets.Set()
+
+    now = time.time()
+    tm = time.localtime(now)
+    dn = dirname(tm.tm_year, tm.tm_mon, tm.tm_mday)
+
+    for fn in os.listdir(dn):
+
+        stopped = derive_hostname(fn)
+
+        for host, count in checkfile(os.path.join(dn, fn), now).iteritems():
+            if host not in complaints:
+                complaints[host] = 0
+                complaining_hosts[host] = sets.Set()
+            
+            complaints[host] += count
+            complaining_hosts[host].add(stopped)
+            all.add(stopped)
+
+    if len(complaints) == 0:
+        ok("No recent NFS issues found")
+
+    msg = []
+    for host in complaints.keys():
+        msg.append("%s (%d hosts complains %d times)" % (
+            host, len(complaining_hosts[host]), complaints[host]))
+
+    msg = ', '.join(msg) + "."
+    if len(msg) < 180:
+        msg += " Complaining hosts: "
+        need_comma = False
+        for fn in all:
+            if len(msg) > 200:
+                msg += " and others."
+                break
+            else:
+                if need_comma:
+                    msg += ", "
+                msg += fn
+                need_comma = True
+        else:
+            msg += "."
+
+    critical("Bad NFS servers: " + msg)
+
+if __name__ == '__main__':
+    checkall()
-- 
GitLab