Commits

Palmer, 2E0EOL committed 5e7862f Draft Merge

Add check_disks_zpool
Branch merge: f/201207-zfs

  • Participants
  • Parent commits 21a60de, 7cd9898
  • Branches develop

Comments (0)

Files changed (6)

File bin/check_disks_zpool

+#!/usr/bin/perl -w
+#
+# Daybo Logic 'Nagios' plugins collection
+# Copyright (c) 2012, David Duncan Ross Palmer, Daybo Logic
+# All rights reserved. No claims are made for 'Nagios' which is a project
+# we are merely contributing plugins to.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+#     * Neither the name of the Daybo Logic nor the names of its contributors
+#       may be used to endorse or promote products derived from this software
+#       without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+use strict;
+use warnings;
+use diagnostics;
+
+package main;
+use Data::Dumper;
+use Getopt::Std;
+
+my $zpool_info = undef;
+my %opts;
+my $Debug = 0;
+my ( $status, $statusMsg );
+
+sub ParseZpoolStatus($) {
+	my $zstatus = $_[0];
+	my %data = ( );
+	return undef unless $zstatus;
+
+	my @lines = @$zstatus;
+
+	my $key_regex = qr/^\s*(\w+):\s(.*)$/;
+	foreach my $l ( @lines ) {
+		printf(STDERR "%s\n", $l) if ( $Debug );
+		if ( $l =~ $key_regex ) {
+			if ( $1 eq 'pool' && $data{$1} ) {
+				printf(
+					STDERR "Seeing pool %s, but already saw pool %s\n",
+					$2, $data{$1}
+				) if ( $Debug );
+				print("UNKNOWN: No support for multiple zpools\n");
+				$data{exit} = 3;
+			}
+			$data{$1} = $2;
+		}
+	}
+	if ( scalar(keys(%data)) < 1 ) {
+		print("UNKNOWN: No valid data\n");
+		$data{exit} = 3;
+	}
+	return \%data;
+}
+
+sub ProcessZpoolStatus($)
+{
+	my $Info = $_[0];
+	if ( !$Info ) {
+		$Info->{state} = 'UNKNOWN' ;
+		$Info->{pool} = $Info->{state};
+	}
+	if ( $Info->{state} eq 'ONLINE' ) {
+		if ( $Info->{errors} ) {
+			if ( $Info->{errors} ne 'No known data errors' ) {
+				return ( 2, 'Pool error: ' . $Info->{errors} );
+			} elsif ( $Info->{status} ) {
+				return ( 2, $Info->{status} );
+			}
+			return ( 0, $Info->{errors} );
+		}
+		die 'FIXME';
+	}
+	return ( 1, "Pool $Info->{pool} is resilvering from state $Info->{state}" )
+		if ( $Info->{scan} =~ m/resilver in progress/ );
+	return ( 2, "Pool $Info->{pool} has gone to state " . $Info->{state} );
+}
+
+sub ReadStat($)
+{
+	my @ret = ( );
+	if ( $_[0] && open(my $h, '<', $_[0]) ) {
+		while ( my $line = <$h> ) {
+			chomp($line);
+			push(@ret, $line);
+		}
+		close($h);
+		return \@ret;
+	}
+	return undef;
+}
+
+sub ExecStat()
+{
+	my $output = `/sbin/zpool status`;
+	if ( $output ) {
+		my @lines = split(m'\n', $output);
+		return \@lines;
+	}
+	return $output;
+}
+
+sub main(@)
+{
+	my $zpool_status;
+	my %Opts = @_;
+	$Debug = 1 if ( $Opts{'d'} );
+	$zpool_status = ReadStat($Opts{'f'}) || ExecStat();
+	$zpool_info = ParseZpoolStatus($zpool_status);
+	print Dumper $zpool_info if ( $Debug );
+	return $zpool_info->{exit} if ( $zpool_info->{exit} ); # Early exit requested
+
+	( $status, $statusMsg ) = ProcessZpoolStatus($zpool_info);
+	printf("%s\n", $statusMsg) unless ( $Opts{'q'} );
+	return $status;
+}
+
+exit(3) if ( !getopts('df:', \%opts) );
+exit(main(%opts)) if ( !caller() );
+1;

File t/check_disks_zpool.t

+#!/usr/bin/perl -w
+
+package main;
+use strict;
+use warnings;
+use diagnostics;
+
+require 'bin/check_disks_zpool';
+
+use Test::More tests => 4;
+
+sub t_main()
+{
+	my %fn_ec_map = (
+		'degraded' => 2,
+		'rebuild'  => 1,
+		'optimal'  => 0
+	);
+
+	my $testdir = 'testdata/check_disks_zpool';
+	if ( opendir(my $d, $testdir) ) {
+		while ( my $ent = readdir($d) ) {
+			next if ( $ent =~ m/^\./ );
+			if ( $ent =~ m/^(\d)\_(\w+)\.txt$/ ) {
+				my $filename = "$testdir/$ent";
+				my $expect_exit = $fn_ec_map{$2};
+				if ( !defined($expect_exit) ) {
+					warn "Not running unit test (unknown filename): $ent";
+					next;
+				}
+				is(main(q => 1, f => $filename), $expect_exit, "$filename exits $expect_exit");
+			} else {
+				warn "Not running unit test $ent";
+			}
+		}
+		closedir($d);
+	}
+	return 0;
+}
+
+exit(t_main());

File testdata/check_disks_zpool/1_optimal.txt

+  pool: badger
+ state: ONLINE
+ scan: resilvered 270G in 22h41m with 0 errors on Sun Jul 29 11:44:10 2012
+config:
+
+	NAME        STATE     READ WRITE CKSUM
+	badger      ONLINE       0     0     0
+	  raidz1-0  ONLINE       0     0     0
+	    da0     ONLINE       0     0     0
+	    da3     ONLINE       0     0     0
+	    da1     ONLINE       0     0     0
+	    da2     ONLINE       0     0     0
+
+errors: No known data errors

File testdata/check_disks_zpool/2_optimal.txt

+  pool: badger
+ state: ONLINE
+ scan: scrub repaired 0 in 6h50m with 0 errors on Sat Dec 22 19:10:35 2012
+config:
+
+	NAME        STATE     READ WRITE CKSUM
+	badger      ONLINE       0     0     0
+	  raidz1-0  ONLINE       0     0     0
+	    da0     ONLINE       0     0     0
+	    da3     ONLINE       0     0     0
+	    da1     ONLINE       0     0     0
+	    da2     ONLINE       0     0     0
+
+errors: No known data errors

File testdata/check_disks_zpool/3_rebuild.txt

+  pool: badger
+ state: DEGRADED
+status: One or more devices is currently being resilvered.  The pool will
+	continue to function, possibly in a degraded state.
+action: Wait for the resilver to complete.
+ scan: resilver in progress since Sat Jul 28 13:03:05 2012
+    1.02T scanned out of 1.06T at 14.4M/s, 0h45m to go
+    260G resilvered, 96.44% done
+config:
+
+	NAME                        STATE     READ WRITE CKSUM
+	badger                      DEGRADED     0     0     0
+	  raidz1-0                  DEGRADED     0     0     0
+	    da0                     ONLINE       0     0     0
+	    replacing-1             REMOVED      0     0     0
+	      10240996456451386534  REMOVED      0     0     0  was /dev/da3/old
+	      da3                   ONLINE       0     0     0  (resilvering)
+	    da1                     ONLINE       0     0     0
+	    da2                     ONLINE       0     0     0
+
+errors: No known data errors

File testdata/check_disks_zpool/4_degraded.txt

+  pool: badger
+ state: DEGRADED
+status: One or more devices has been removed by the administrator.
+	Sufficient replicas exist for the pool to continue functioning in a
+	degraded state.
+action: Online the device using 'zpool online' or replace the device with
+	'zpool replace'.
+ scan: resilvered 8.60G in 3h18m with 0 errors on Sat Jul 21 13:42:20 2012
+config:
+
+	NAME                      STATE     READ WRITE CKSUM
+	badger                    DEGRADED     0     0     0
+	  raidz1-0                DEGRADED     0     0     0
+	    da0                   ONLINE       0     0     0
+	    10240996456451386534  REMOVED      0     0     0  was /dev/da3
+	    da1                   ONLINE       0     0     0
+	    da2                   ONLINE       0     0     0
+
+errors: No known data errors