Commits

Chip Schweiss  committed cd2e934 Merge

merge

  • Participants
  • Parent commits 2332aa6, a655775

Comments (0)

Files changed (6)

File utils/tools/README

+These dtrace scripts are useful for NFS, CIFS, and iSCSI servers.
+Richard.Elling@RichardElling.com

File utils/tools/Toptools.pdf

Binary file added.

File utils/tools/cifssvrtop

+#!/usr/bin/ksh
+#
+# cifsvsvrtop - display top CIFS I/O events on a server.
+#
+# This is measuring the response time between an incoming CIFS operation
+# and its response. In general, this measures the server's view of how
+# quickly it can respond to requests. By default, the list shows responses
+# to each client.
+# 	
+# Top-level fields:
+#   load    1 min load average
+#   read    total KB read during sample
+#   write   total KB sync writes during sample
+#
+# The following per-client and "all" clients fields are shown
+#   Client  Client IPv4 address or workstation name
+#   CIFSOPS CIFS operations per second
+#   Reads   Read operations per second
+#   Writes  Write operations per second
+#   Rd_bw   Read bandwidth KB/sec
+#   Wr_bw   Write bandwidth KB/sec
+#   Rd_t    Average read time in microseconds
+#   Wr_t    Average write time in microseconds
+#   Align%  Percentage of read/write operations that have offset aligned to
+#           blocksize (default=4096 bytes)
+#
+# Note: dtrace doesn't do floating point. A seemingly zero response or 
+# count can result due to integer division.
+# 
+#
+# INSPIRATION:  top(1) by William LeFebvre and iotop by Brendan Gregg
+#
+# Copyright 2011, Nexenta Systems, Inc. All rights reserved.
+# Copyright 2012, Richard Elling, All rights reserved.
+#
+# CDDL HEADER START
+#
+#  The contents of this file are subject to the terms of the
+#  Common Development and Distribution License, Version 1.0 only
+#  (the "License").  You may not use this file except in compliance
+#  with the License.
+#
+#  You can obtain a copy of the license at Docs/cddl1.txt
+#  or http://www.opensolaris.org/os/licensing.
+#  See the License for the specific language governing permissions
+#  and limitations under the License.
+#
+# CDDL HEADER END
+#
+# Author: Richard.Elling@Nexenta.com
+#
+# Revision:
+#   1.9	29-Nov-2012
+#
+# TODO: share filter
+# TODO: IPv6 support
+PATH=/usr/sbin:/usr/bin
+
+##############################
+# check to see if the NFS server module is loaded
+# if not, then the dtrace probes will fail ungracefully
+if [ "$(uname -s)" = "SunOS" ]; then
+	modinfo | awk '{print $6}' | grep -q smbsrv
+	if [ $? != 0 ]; then
+		echo "error: SMB server module is not loaded, are you serving SMB (CIFS)?"
+		exit 1
+	fi
+fi
+
+##############################
+# --- Process Arguments ---
+#
+
+### default variables
+opt_blocksize=4096  # blocksize for alignment measurements
+opt_client=0        # set if -c option set
+opt_clear=1         # set if screen to be cleared
+opt_json=0          # set if output is JSON
+opt_top=0           # set if list trimmed to top
+opt_wsname=0        # set if workstation name desired rather than IPv4 addr
+top=0               # number of lines trimmed
+interval=10         # default interval
+count=-1            # number of intervals to show
+
+### process options
+while getopts b:c:Cjt:w name
+do
+    case $name in
+        b)  opt_blocksize=$OPTARG ;;
+        c)  opt_client=1; client_ws=$OPTARG ;;
+        C)  opt_clear=0 ;;
+        j)  opt_json=1 ;;
+        t)  opt_top=1; top=$OPTARG ;;
+        w)  opt_wsname=1 ;;
+        h|?)    cat <<END >&2
+USAGE: cifssvrtop [-Cj] [-b blocksize] [-c client_ws] [-t top] 
+                 [interval [count]]
+             -b blocksize # alignment blocksize (default=4096)
+             -c client_ws # trace for this client only
+             -C           # don't clear the screen
+             -j           # print output in JSON format
+             -t top       # print top number of entries only
+             -w           # print workstation name instead of IPv4 addr
+   examples:
+     cifssvrtop         # default output, 10 second samples
+     cifssvrtop -b 1024 # check alignment on 1KB boundary
+     cifssvrtop 1       # 1 second samples
+     cifssvrtop -C 60   # 60 second samples, do not clear screen
+     cifssvrtop -t 20   # print top 20 lines only
+     cifssvrtop 5 12    # print 12 x 5 second samples
+END
+        exit 1
+    esac
+done
+
+shift $(($OPTIND - 1))
+
+### option logic
+if [ ! -z "$1" ]; then
+    interval=$1; shift
+fi
+if [ ! -z "$1" ]; then
+    count=$1; shift
+fi
+if [ $opt_clear = 1 ]; then
+    clearstr=$(clear)
+else
+    clearstr=""
+fi
+
+#################################
+# --- Main Program, DTrace ---
+#
+/usr/sbin/dtrace -Cn '
+/*
+ * Command line arguments
+ */
+inline int OPT_blocksize = '$opt_blocksize';
+inline int OPT_clear 	= '$opt_clear';
+inline int OPT_client   = '$opt_client';
+inline int OPT_top 	= '$opt_top';
+inline int OPT_json	= '$opt_json';
+inline int OPT_wsname = '$opt_wsname';
+inline int INTERVAL 	= '$interval';
+inline int COUNTER 	= '$count';
+inline int TOP          = '$top';
+inline string CLIENT	= "'$client_ws'";
+inline string CLEAR 	= "'$clearstr'";
+
+#pragma D option quiet
+
+/* increase dynvarsize if you get "dynamic variable drops" */
+#pragma D option dynvarsize=8m
+
+/*
+ * Print header
+ */
+dtrace:::BEGIN 
+{
+    /* starting values */
+    counts = COUNTER;
+    secs = INTERVAL;
+    total_read_bw = 0;
+    total_write_bw = 0;
+
+    printf("Tracing... Please wait.\n");
+}
+
+/*
+ * Filter as needed, based on starts
+ */
+sdt:smbsrv::-smb_op-*-start
+{
+    self->sr = (smb_request_t *)arg0;
+    self->ipaddr = inet_ntoa((ipaddr_t *)&self->sr->session->ipaddr);
+    self->wsname = stringof(self->sr->session->workstation);
+    self->me = OPT_wsname == 0 ? self->ipaddr : self->wsname;
+}
+
+sdt:smbsrv::-smb_op-*-start
+/self->sr && (OPT_client == 0 || CLIENT == self->me)/
+{ 
+    @c_cifsops[self->me] = count();
+    OPT_client == 0 ? @c_cifsops["all"] = count() : 1;
+}
+
+sdt:smbsrv::-smb_op-ReadX-start,
+sdt:smbsrv::-smb_op-ReadRaw-start,
+sdt:smbsrv::-smb_op-Read-start,
+sdt:smbsrv::-smb_op-WriteX-start,
+sdt:smbsrv::-smb_op-WriteRaw-start,
+sdt:smbsrv::-smb_op-Write-start
+/self->sr && (OPT_client == 0 || CLIENT == self->me)/
+{ 
+    self->startts = timestamp;
+}
+
+/*
+ * read
+ */
+sdt:smbsrv::-smb_op-ReadX-start,
+sdt:smbsrv::-smb_op-ReadRaw-start,
+sdt:smbsrv::-smb_op-Read-start
+/self->startts/
+{
+    self->rwp = (smb_rw_param_t *)arg1;
+    @c_read[self->me] = count();
+    OPT_client == 0 ? @c_read["all"] = count() : 1;
+    @read_bw[self->me] = sum(self->rwp->rw_count);
+    OPT_client == 0 ? @read_bw["all"] = sum(self->rwp->rw_count) : 1;
+    total_read_bw += self->rwp->rw_count;
+    @avg_aligned[self->me] = 
+        avg((self->rwp->rw_offset % OPT_blocksize) ? 0 : 100);
+    @avg_aligned["all"] = 
+        avg((self->rwp->rw_offset % OPT_blocksize) ? 0 : 100);
+}
+
+sdt:smbsrv::-smb_op-ReadX-done,
+sdt:smbsrv::-smb_op-ReadRaw-done,
+sdt:smbsrv::-smb_op-Read-done
+/self->startts && self->sr/
+{
+    t = timestamp - self->startts;
+    @avgtime_read[self->me] = avg(t);
+    OPT_client == 0 ? @avgtime_read["all"] = avg(t) : 1;
+    self->startts = 0;
+}
+
+/*
+ * write
+ */
+sdt:smbsrv::-smb_op-WriteX-start,
+sdt:smbsrv::-smb_op-WriteRaw-start,
+sdt:smbsrv::-smb_op-Write-start
+/self->startts && self->sr/
+{
+    self->rwp = (smb_rw_param_t *)arg1;
+    @c_write[self->me] = count();
+    OPT_client == 0 ? @c_write["all"] = count() : 1;
+    @write_bw[self->me] = sum(self->rwp->rw_count);
+    OPT_client == 0 ? @write_bw["all"] = sum(self->rwp->rw_count) : 1;
+    total_write_bw += self->rwp->rw_count;
+    @avg_aligned[self->me] = 
+        avg((self->rwp->rw_offset % OPT_blocksize) ? 0 : 100);
+    @avg_aligned["all"] = 
+        avg((self->rwp->rw_offset % OPT_blocksize) ? 0 : 100);
+}
+
+sdt:smbsrv::-smb_op-WriteX-done,
+sdt:smbsrv::-smb_op-WriteRaw-done,
+sdt:smbsrv::-smb_op-Write-done
+/self->startts && self->sr/
+{
+	t = timestamp - self->startts;
+	@avgtime_write[self->me] = avg(t);
+	OPT_client == 0 ? @avgtime_write["all"] = avg(t) : 1;
+	self->startts = 0;
+}
+
+/*
+ * timer
+ */
+profile:::tick-1sec
+{
+	secs--;
+}
+
+/*
+ * Print report
+ */
+profile:::tick-1sec
+/secs == 0/
+{	
+    /* fetch 1 min load average */
+    self->load1a = `hp_avenrun[0] / 65536;
+    self->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536;
+
+    /* convert counters to Kbytes */
+    total_read_bw /= 1024;
+    total_write_bw /= 1024;
+
+    /* normalize to seconds giving a rate */
+    /* todo: this should be measured, not based on the INTERVAL */
+    normalize(@c_cifsops, INTERVAL);
+    normalize(@c_read, INTERVAL);
+    normalize(@c_write, INTERVAL);
+
+    /* normalize to KB per second */
+    normalize(@read_bw, 1024 * INTERVAL);
+    normalize(@write_bw, 1024 * INTERVAL);
+
+    /* normalize average to microseconds */
+    normalize(@avgtime_read, 1000);
+    normalize(@avgtime_write, 1000);
+
+    /* print status */
+    OPT_clear && !OPT_json ? printf("%s", CLEAR) : 1;
+
+    OPT_json ? 
+        printf("{ \"collector\": \"cifssvrtop\", \"time\": \"%Y\", \"timestamp\": %d, \"interval\": %d, \"load\": %d.%02d, \"read_KB_int\": %d, \"write_KB_int\": %d, \"clientdata\": [",
+            walltimestamp, walltimestamp, INTERVAL, 
+            self->load1a, self->load1b, 
+            total_read_bw, total_write_bw)
+    :
+        printf("%Y, load: %d.%02d, read: %-8d KB, write: %-8d KB\n",
+            walltimestamp, self->load1a, self->load1b, 
+            total_read_bw, total_write_bw);
+
+    /* print headers */
+    OPT_json ? 1 :
+        printf("%-15s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\n",
+            "Client", "CIFSOPS", 
+            "Reads", "Writes", "Rd_bw", "Wr_bw", "Rd_t", "Wr_t", "Align%");
+
+    /* truncate to top lines if needed */
+    OPT_top ? trunc(@c_cifsops, TOP) : 1;
+
+    OPT_json ?
+        printa("{\"client\": \"%s\", \"CIFSOPS\": %@d, \"reads\": %@d, \"writes\": %@d, \"read_bw\": %@d, \"write_bw\": %@d, \"avg_read_t\": %@d, \"avg_write_t\": %@d, \"aligned_pct\": %@d },",
+            @c_cifsops, @c_read, @c_write, @read_bw, @write_bw,
+            @avgtime_read, @avgtime_write, @avg_aligned)
+    :
+        printa("%-15s\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\n",
+            @c_cifsops, @c_read, @c_write, @read_bw, @write_bw,
+            @avgtime_read, @avgtime_write, @avg_aligned);
+
+    OPT_json ? printf("{}]}\n") : 1;
+
+    /* clear data */
+    trunc(@c_cifsops); trunc(@c_read); trunc(@c_write); 
+    trunc(@read_bw); trunc(@write_bw); 
+    trunc(@avgtime_read); trunc(@avgtime_write);
+    trunc(@avg_aligned);
+    total_read_bw = 0;
+    total_write_bw = 0;
+    secs = INTERVAL;
+    counts--;
+}
+
+/*
+ * end of program 
+ */
+profile:::tick-1sec
+/counts == 0/
+{
+	exit(0);
+}
+
+/*
+ * clean up when interrupted
+ */
+dtrace:::END
+{
+    trunc(@c_cifsops); trunc(@c_read); trunc(@c_write); 
+    trunc(@read_bw); trunc(@write_bw); 
+    trunc(@avgtime_read); trunc(@avgtime_write);
+    trunc(@avg_aligned);
+}
+'

File utils/tools/iscsisvrtop

+#!/usr/bin/ksh
+#
+# iscsisvrtop - display top iSCSI I/O events on a server.
+#
+# This is measuring the response time between an incoming iSCSI operation
+# and its response. In general, this measures the servers view of how
+# quickly it can respond to requests. By default, the list shows responses
+# to each client.
+#
+# Top-level fields:
+#	load	1 min load average
+#	read	total KB read during sample
+#	write	total KB sync writes during sample
+#
+# The following per-client and "all" clients fields are shown
+#	Client	IP addr of client
+#	OPS	iSCSI operations per second
+#	Reads	Read operations per second
+#	Writes	Sync write operations per second
+#	NOPS	NOP operations per second
+#	Rd_bw	Read KB/sec
+#	Wr_bw	Sync write KB/sec
+#	ARd_sz	Average read size (KB)
+#	AWr_sz	Average write size (KB)
+#	Rd_t	Average read time in microseconds
+#	Wr_t	Average sync write time in microseconds
+#	Align%	Percentage of read/write operations that have LBA aligned to
+#		blocksize (default=4096 bytes)
+#
+# INSPIRATION:  top(1) by William LeFebvre and iotop by Brendan Gregg
+#
+# Copyright 2012, Richard Elling, All rights reserved.
+# Copyright 2011, Nexenta Systems, Inc. All rights reserved.
+#
+# CDDL HEADER START
+#
+#  The contents of this file are subject to the terms of the
+#  Common Development and Distribution License, Version 1.0 only
+#  (the "License").  You may not use this file except in compliance
+#  with the License.
+#
+#  You can obtain a copy of the license at Docs/cddl1.txt
+#  or http://www.opensolaris.org/os/licensing.
+#  See the License for the specific language governing permissions
+#  and limitations under the License.
+#
+# CDDL HEADER END
+#
+# Author: Richard.Elling@RichardElling.com
+#
+# Revision:
+#   1.9  29-Nov-2012
+#
+PATH=/usr/sbin:/usr/bin
+
+##############################
+# check to see if the NFS server module is loaded
+# if not, then the dtrace probes will fail ungracefully
+if [ "$(uname -s)" = "SunOS" ]; then
+	modinfo | awk '{print $6}' | grep -q iscsit
+	if [ $? != 0 ]; then
+		echo "error: iSCSI target module is not loaded, are you serving iSCSI?"
+		exit 1
+	fi
+fi
+
+##############################
+# --- Process Arguments ---
+#
+
+### default variables
+opt_blocksize=4096      # blocksize for alignment measurements
+sys_blocksize=512       # default system blocksize
+opt_client=0            # set if -c option set
+opt_clear=1		# set if screen to be cleared
+opt_json=0		# print in JSON format
+opt_top=0		# set if list trimmed to top
+top=0			# number of lines trimmed
+interval=10		# default interval
+count=-1		# number of intervals to show
+
+### process options
+while getopts b:c:Cjm:t: name
+do
+    case $name in
+        b)  opt_blocksize=$OPTARG ;;
+        c)	opt_client=1; client_IP=$OPTARG ;;
+        C)	opt_clear=0 ;;
+        j)	opt_json=1 ;;
+        t)	opt_top=1; top=$OPTARG ;;
+        h|?)	cat <<END >&2
+USAGE: iscsisvrtop [-b blocksize] [-Cj] [-c client_IP] [-t top] 
+                   [interval [count]]
+        -b blocksize   # alignment blocksize (default=4096)
+        -c client_IP   # trace for this client only
+        -C             # don't clear the screen
+        -j             # print output in JSON format
+        -t top         # print top number of entries only
+examples:
+    iscsisvrtop         # default output, 10 second samples
+    iscsisvrtop 1       # 1 second samples
+    iscsisvrtop -b 1024 # check alignment on 1KB boundary
+    iscsisvrtop -C 60   # 60 second samples, do not clear screen
+    iscsisvrtop -t 20   # print top 20 lines only
+    iscsisvrtop 5 12    # print 12 x 5 second samples
+END
+		exit 1
+	esac
+done
+
+shift $(( $OPTIND - 1 ))
+
+### option logic
+if [[ "$1" > 0 ]]; then
+    interval=$1; shift
+fi
+if [[ "$1" > 0 ]]; then
+    count=$1; shift
+fi
+if (( opt_clear )); then
+    clearstr=$(clear)
+else
+    clearstr=""
+fi
+
+
+
+#################################
+# --- Main Program, DTrace ---
+#
+/usr/sbin/dtrace -n '
+/*
+ * Command line arguments
+ */
+inline int OPT_blocksize = '$opt_blocksize' / '$sys_blocksize';
+inline int OPT_client	= '$opt_client';
+inline int OPT_clear 	= '$opt_clear';
+inline int OPT_json	= '$opt_json';
+inline int OPT_top 	= '$opt_top';
+inline int INTERVAL 	= '$interval';
+inline int COUNTER 	= '$count';
+inline int TOP 	= '$top';
+inline string CLIENT	= "'$client_IP'";
+inline string CLEAR 	= "'$clearstr'";
+
+#pragma D option quiet
+
+/* increase dynvarsize if you get "dynamic variable drops" */
+#pragma D option dynvarsize=8m
+
+/*
+ * Print header
+ */
+dtrace:::BEGIN 
+{
+    /* starting values */
+    counts = COUNTER;
+    secs = INTERVAL;
+    total_bytes_read = 0;
+    total_bytes_write = 0;
+
+    printf("Tracing... Please wait.\n");
+}
+
+/*
+ * Filter as needed, based on starts
+ */
+iscsi:::xfer-start,
+iscsi:::nop-receive
+/OPT_client == 0 || CLIENT == args[0]->ci_remote/
+{ 
+    @count_iops[args[0]->ci_remote] = count();
+    OPT_client == 0 ? @count_iops["all"] = count() : 1;
+    ts[arg1] = timestamp;
+}
+
+/* 
+ * read operations 
+ */
+iscsi:::xfer-done 
+/ts[arg1] != 0 && args[2]->xfer_type == 0/
+{
+    t = timestamp - ts[arg1];
+    @count_read[args[0]->ci_remote] = count();
+    OPT_client == 0 ? @count_read["all"] = count() : 1;
+    @avgtime_read[args[0]->ci_remote] = avg(t);
+    OPT_client == 0 ? @avgtime_read["all"] = avg(t) : 1;
+    @bytes_read[args[0]->ci_remote] = sum(args[2]->xfer_len);
+    OPT_client == 0 ? @bytes_read["all"] = sum(args[2]->xfer_len) : 1;
+    @avg_bytes_read[args[0]->ci_remote] = avg(args[2]->xfer_len);
+    OPT_client == 0 ? @avg_bytes_read["all"] = avg(args[2]->xfer_len) : 1;
+    total_bytes_read += args[2]->xfer_len;
+    ts[arg1] = 0;
+}
+
+/*
+ * write operations
+ */
+iscsi:::xfer-done 
+/ts[arg1] != 0 && args[2]->xfer_type == 1/
+{
+    t = timestamp - ts[arg1];
+    @count_write[args[0]->ci_remote] = count();
+    OPT_client == 0 ? @count_write["all"] = count() : 1;
+    @avgtime_write[args[0]->ci_remote] = avg(t);
+    OPT_client == 0 ? @avgtime_write["all"] = avg(t) : 1;
+    @bytes_write[args[0]->ci_remote] = sum(args[2]->xfer_len);
+    OPT_client == 0 ? @bytes_write["all"] = sum(args[2]->xfer_len) : 1;
+    @avg_bytes_write[args[0]->ci_remote] = avg(args[2]->xfer_len);
+    OPT_client == 0 ? @avg_bytes_write["all"] = avg(args[2]->xfer_len) : 1;
+    total_bytes_write += args[2]->xfer_len;
+    @avg_aligned[args[0]->ci_remote] = 
+    avg((args[2]->xfer_loffset % OPT_blocksize) ? 0 : 100);
+    ts[arg1] = 0;
+}
+
+/*
+ * nops are ops too!
+ */
+iscsi:::nop-send
+/ts[arg1] != 0/
+{
+    t = timestamp - ts[arg1];
+    @count_nop[args[0]->ci_remote] = count();
+    OPT_client == 0 ? @count_nop["all"] = count() : 1;
+    @avgtime_nop[args[0]->ci_remote] = avg(t);
+    OPT_client == 0 ? @avgtime_nop["all"] = avg(t) : 1;
+    ts[arg1] = 0;
+} 
+
+/*
+ * timer
+ */
+profile:::tick-1sec
+{
+    secs--;
+}
+
+/*
+ * Print report
+ */
+profile:::tick-1sec
+/secs == 0/
+{	
+    /* fetch 1 min load average */
+    self->load1a  = `hp_avenrun[0] / 65536;
+    self->load1b  = ((`hp_avenrun[0] % 65536) * 100) / 65536;
+
+    /* convert counters to Kbytes */
+    total_bytes_read /= 1024;
+    total_bytes_write /= 1024;
+
+    /* normalize to seconds giving a rate */
+    /* todo: this should be measured, not based on the INTERVAL */
+    normalize(@count_iops, INTERVAL);
+    normalize(@count_read, INTERVAL);
+    normalize(@count_write, INTERVAL);
+    normalize(@count_nop, INTERVAL);
+
+    /* normalize to KB per second */
+    normalize(@bytes_read, 1024 * INTERVAL);
+    normalize(@avg_bytes_read, 1024 * INTERVAL);
+    normalize(@bytes_write, 1024 * INTERVAL);
+    normalize(@avg_bytes_write, 1024 * INTERVAL);
+
+    /* normalize average to microseconds */
+    normalize(@avgtime_read, 1000);
+    normalize(@avgtime_write, 1000);
+    normalize(@avgtime_nop, 1000);
+
+    /* print status */
+    OPT_clear && !OPT_json ? printf("%s", CLEAR) : 1;
+
+    OPT_json ?
+        printf("{ \"collector\": \"iscsisvrtop\", \"time\": \"%Y\", \"timestamp\": %d, \"interval\": %d, \"load\": %d.%02d, \"read_KB_int\": %d, \"write_KB_int\": %d, \"clientdata\": [",
+            walltimestamp, walltimestamp, INTERVAL, 
+            self->load1a, self->load1b, 
+            total_bytes_read, total_bytes_write)
+    :
+        printf("%Y load: %d.%02d read_KB: %d write_KB: %d\n",
+            walltimestamp, self->load1a, self->load1b, 
+            total_bytes_read, total_bytes_write);
+
+    /* print headers */
+    OPT_json ? 1 :
+        printf("%-15s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\n",
+            "client", "ops", "reads", "writes", "nops", "rd_bw", "wr_bw",
+            "ard_sz", "awr_sz", "rd_t", "wr_t", "nop_t", "align%");
+
+    /* truncate to top lines if needed */
+    OPT_top ? trunc(@count_iops, TOP) : 1;
+
+    OPT_json ?
+        printa("{\"address\": \"%s\", \"iops\": %@d, \"reads\": %@d, \"writes\": %@d, \"nops\": %@d, \"read_bw\": %@d, \"write_bw\": %@d, \"avg_read_size\": %@d, \"avg_write_size\": %@d, \"avg_read_t\": %@d, \"avg_write_t\": %@d, \"avg_nop_t\": %@d, \"aligned_pct\": %@d},",
+            @count_iops, @count_read, @count_write, @count_nop,
+            @bytes_read, @bytes_write, @avg_bytes_read, @avg_bytes_write,
+            @avgtime_read, @avgtime_write, @avgtime_nop, @avg_aligned)
+    :
+        printa("%-15s\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\n",
+            @count_iops, @count_read, @count_write, @count_nop,
+            @bytes_read, @bytes_write, @avg_bytes_read, @avg_bytes_write,
+            @avgtime_read, @avgtime_write, @avgtime_nop, @avg_aligned);
+
+    OPT_json ? printf("{}]}\n") : 1;
+
+    /* clear data */
+    trunc(@count_iops); trunc(@count_read); trunc(@count_write); 
+    trunc(@count_nop); trunc(@bytes_read); trunc(@bytes_write);
+    trunc(@avg_bytes_read); trunc(@avg_bytes_write);
+    trunc(@avgtime_read); trunc(@avgtime_write); trunc(@avgtime_nop);
+    trunc(@avg_aligned);
+    total_bytes_read = 0;
+    total_bytes_write = 0;
+    secs = INTERVAL;
+    counts--;
+}
+
+/*
+ * end of program 
+ */
+profile:::tick-1sec
+/counts == 0/
+{
+    exit(0);
+}
+
+/*
+ * clean up when interrupted
+ */
+dtrace:::END
+{
+    trunc(@count_iops); trunc(@count_read); trunc(@count_write); 
+    trunc(@count_nop); trunc(@bytes_read); trunc(@bytes_write);
+    trunc(@avg_bytes_read); trunc(@avg_bytes_write);
+    trunc(@avgtime_read); trunc(@avgtime_write); trunc(@avgtime_nop);
+    trunc(@avg_aligned);
+}
+'

File utils/tools/nfssvrtop

+#!/usr/bin/ksh
+#
+# nfsvsvrtop - display top NFS v3 & v4 I/O events on a server.
+#
+# This is measuring the response time between an incoming NFS operation
+# and its response. In general, this measures the server's view of how
+# quickly it can respond to requests. By default, the list shows responses
+# to each client.
+# 	
+# Top-level fields:
+#   load    1 min load average
+#   read    total KB read during sample
+#   swrite  total KB sync writes during sample
+#   awrite  total KB async writes during sample
+#
+# The following per-client and "all" clients fields are shown
+#   Ver     NFS version (3 or 4)
+#   Client  IP addr of client
+#   NFSOPS  NFS operations per second
+#   Reads   Read operations per second
+#   SWrites Sync write operations per second
+#   AWrites Async write operations per second
+#   Commits Commits per second
+#   Rd_bw   Read KB/sec
+#   SWr_bw  Sync write KB/sec
+#   AWr_bw  Async write KB/sec
+#   Rd_t    Average read time in microseconds
+#   SWr_t   Average sync write time in microseconds
+#   AWr_t   Average async write time in microseconds
+#   Com_t   Average commit time in microseconds
+#   Align%  Percentage of read/write operations that have offset aligned to
+#           blocksize (default=4096 bytes)
+#
+# Note: NFSv4 compound operations are not measured, per se, but they are
+# counted in the total operations count.
+#
+# Note: dtrace doesn't do floating point. A seemingly zero response or 
+# count can result due to integer division.
+# 
+#
+# INSPIRATION:  top(1) by William LeFebvre and iotop by Brendan Gregg
+#
+# Copyright 2011, Nexenta Systems, Inc. All rights reserved.
+#
+# CDDL HEADER START
+#
+#  The contents of this file are subject to the terms of the
+#  Common Development and Distribution License, Version 1.0 only
+#  (the "License").  You may not use this file except in compliance
+#  with the License.
+#
+#  You can obtain a copy of the license at Docs/cddl1.txt
+#  or http://www.opensolaris.org/os/licensing.
+#  See the License for the specific language governing permissions
+#  and limitations under the License.
+#
+# CDDL HEADER END
+#
+# Author: Richard.Elling@RichardElling.com
+#
+# Revision:
+#   1.8	29-Nov-2012
+#
+# TODO: running count of nfsd threads (done efficiently)
+# TODO: mount point filter
+PATH=/usr/sbin:/usr/bin
+
+##############################
+# check to see if the NFS server module is loaded
+# if not, then the dtrace probes will fail ungracefully
+if [ "$(uname -s)" = "SunOS" ]; then
+	modinfo | awk '{print $6}' | grep -q nfssrv
+	if [ $? != 0 ]; then
+		echo "error: NFS server module is not loaded, are you serving NFS?"
+		exit 1
+	fi
+fi
+
+##############################
+# --- Process Arguments ---
+#
+
+### default variables
+opt_blocksize=4096  # blocksize for alignment measurements
+opt_client=0	# set if -c option set
+opt_clear=1		# set if screen to be cleared
+opt_json=0		# set if output is JSON
+opt_top=0		# set if list trimmed to top
+top=0			# number of lines trimmed
+opt_vers=0		# set if NFS version restricted
+vers=3			# version of NFS to restrict
+interval=10		# default interval
+count=-1		# number of intervals to show
+
+### process options
+while getopts b:c:Cjn:t: name
+do
+	case $name in
+    b)  opt_blocksize=$OPTARG ;;
+	c)	opt_client=1; client_IP=$OPTARG ;;
+	C)	opt_clear=0 ;;
+	j)	opt_json=1 ;;
+	n)	opt_vers=1; vers=$OPTARG ;;
+	t)	opt_top=1; top=$OPTARG ;;
+	h|?)	cat <<END >&2
+USAGE: nfssvrtop [-Cj] [-b blocksize] [-c client_IP] [-n vers] [-t top] 
+                 [interval [count]]
+             -b blocksize # alignment blocksize (default=4096)
+             -c client_IP # trace for this client only
+             -C           # don't clear the screen
+             -j           # print output in JSON format
+             -n vers      # show only NFS version
+             -t top       # print top number of entries only
+   examples:
+     nfssvrtop         # default output, 10 second samples
+     nfssvrtop -b 1024 # check alignment on 1KB boundary
+     nfssvrtop 1       # 1 second samples
+     nfssvrtop -n 4	   # only show NFSv4 traffic
+     nfssvrtop -C 60   # 60 second samples, do not clear screen
+     nfssvrtop -t 20   # print top 20 lines only
+     nfssvrtop 5 12    # print 12 x 5 second samples
+END
+		exit 1
+	esac
+done
+
+shift $(($OPTIND - 1))
+
+### option logic
+if [ ! -z "$1" ]; then
+        interval=$1; shift
+fi
+if [ ! -z "$1" ]; then
+        count=$1; shift
+fi
+if [ $opt_clear = 1 ]; then
+        clearstr=$(clear)
+else
+        clearstr=""
+fi
+
+#################################
+# --- Main Program, DTrace ---
+#
+/usr/sbin/dtrace -Cn '
+/*
+ * Command line arguments
+ */
+inline int OPT_blocksize = '$opt_blocksize';
+inline int OPT_client	= '$opt_client';
+inline int OPT_clear 	= '$opt_clear';
+inline int OPT_top 	= '$opt_top';
+inline int OPT_json	= '$opt_json';
+inline int OPT_vers	= '$opt_vers';
+inline int INTERVAL 	= '$interval';
+inline int COUNTER 	= '$count';
+inline int TOP 	= '$top';
+inline string CLIENT	= "'$client_IP'";
+inline int VERS	= '$vers';
+inline string CLEAR 	= "'$clearstr'";
+
+#pragma D option quiet
+
+/* 
+ * increase dynvarsize if you get "dynamic variable drops"
+ */
+#pragma D option dynvarsize=12m
+
+/*
+ * Print header
+ */
+dtrace:::BEGIN 
+{
+	/* starting values */
+	counts = COUNTER;
+	secs = INTERVAL;
+	total_read_b = 0;
+	total_swrite_b = 0;
+	total_awrite_b = 0;
+
+	OPT_json ? 1 : printf("Tracing... Please wait.\n");
+}
+
+/*
+ * Filter as needed, based on starts
+ */
+nfsv3:nfssrv::op-access-start,
+nfsv3:nfssrv::op-create-start,
+nfsv3:nfssrv::op-commit-start,
+nfsv3:nfssrv::op-fsinfo-start,
+nfsv3:nfssrv::op-fsstat-start,
+nfsv3:nfssrv::op-getattr-start,
+nfsv3:nfssrv::op-link-start,
+nfsv3:nfssrv::op-lookup-start,
+nfsv3:nfssrv::op-mkdir-start,
+nfsv3:nfssrv::op-mknod-start,
+nfsv3:nfssrv::op-null-start,
+nfsv3:nfssrv::op-pathconf-start,
+nfsv3:nfssrv::op-read-start,
+nfsv3:nfssrv::op-readdir-start,
+nfsv3:nfssrv::op-readdirplus-start,
+nfsv3:nfssrv::op-readlink-start,
+nfsv3:nfssrv::op-remove-start,
+nfsv3:nfssrv::op-rename-start,
+nfsv3:nfssrv::op-rmdir-start,
+nfsv3:nfssrv::op-setattr-start,
+nfsv3:nfssrv::op-symlink-start,
+nfsv3:nfssrv::op-write-start
+/OPT_client == 0 || CLIENT == args[0]->ci_remote/
+{ 
+	self->vers = "3";
+	@c_nfsops[self->vers, args[0]->ci_remote] = count();
+	OPT_client == 0 ? @c_nfsops[self->vers, "all"] = count() : 1;
+}
+
+nfsv4:nfssrv::cb-recall-start,
+nfsv4:nfssrv::compound-start,
+nfsv4:nfssrv::null-start,
+nfsv4:nfssrv::op-access-start,
+nfsv4:nfssrv::op-close-start,
+nfsv4:nfssrv::op-commit-start,
+nfsv4:nfssrv::op-create-start,
+nfsv4:nfssrv::op-delegpurge-start,
+nfsv4:nfssrv::op-delegreturn-start,
+nfsv4:nfssrv::op-getattr-start,
+nfsv4:nfssrv::op-getfh-start,
+nfsv4:nfssrv::op-link-start,
+nfsv4:nfssrv::op-lock-start,
+nfsv4:nfssrv::op-lockt-start,
+nfsv4:nfssrv::op-locku-start,
+nfsv4:nfssrv::op-lookup-start,
+nfsv4:nfssrv::op-lookupp-start,
+nfsv4:nfssrv::op-nverify-start,
+nfsv4:nfssrv::op-open-confirm-start,
+nfsv4:nfssrv::op-open-downgrade-start,
+nfsv4:nfssrv::op-open-start,
+nfsv4:nfssrv::op-openattr-start,
+nfsv4:nfssrv::op-putfh-start,
+nfsv4:nfssrv::op-putpubfh-start,
+nfsv4:nfssrv::op-putrootfh-start,
+nfsv4:nfssrv::op-read-start,
+nfsv4:nfssrv::op-readdir-start,
+nfsv4:nfssrv::op-readlink-start,
+nfsv4:nfssrv::op-release-lockowner-start,
+nfsv4:nfssrv::op-remove-start,
+nfsv4:nfssrv::op-rename-start,
+nfsv4:nfssrv::op-renew-start,
+nfsv4:nfssrv::op-restorefh-start,
+nfsv4:nfssrv::op-savefh-start,
+nfsv4:nfssrv::op-secinfo-start,
+nfsv4:nfssrv::op-setattr-start,
+nfsv4:nfssrv::op-setclientid-confirm-start,
+nfsv4:nfssrv::op-setclientid-start,
+nfsv4:nfssrv::op-verify-start,
+nfsv4:nfssrv::op-write-start
+/OPT_client == 0 || CLIENT == args[0]->ci_remote/
+{ 
+	self->vers = "4";
+	@c_nfsops[self->vers, args[0]->ci_remote] = count();
+	OPT_client == 0 ? @c_nfsops[self->vers, "all"] = count() : 1;
+}
+
+/* measure response time for commits, reads, and writes */
+nfsv3:nfssrv::op-commit-start,
+nfsv3:nfssrv::op-read-start,
+nfsv3:nfssrv::op-write-start,
+nfsv4:nfssrv::op-commit-start,
+nfsv4:nfssrv::op-read-start,
+nfsv4:nfssrv::op-write-start
+/OPT_client == 0 || CLIENT == args[0]->ci_remote/
+{ 
+	self->startts = timestamp;
+}
+
+
+/*
+ * commit 
+ */
+nfsv3:nfssrv::op-commit-start,
+nfsv4:nfssrv::op-commit-start
+/self->startts/
+{
+	@c_commit_client[self->vers, args[0]->ci_remote] = count();
+	OPT_client == 0 ? @c_commit_client[self->vers, "all"] = count() : 1;
+}
+
+nfsv3:nfssrv::op-commit-done,
+nfsv4:nfssrv::op-commit-done
+/self->startts/
+{
+	t = timestamp - self->startts;
+	@avgtime_commit[self->vers, args[0]->ci_remote] = avg(t);
+	OPT_client == 0 ? @avgtime_commit[self->vers, "all"] = avg(t) : 1;
+	self->startts = 0;
+}
+
+/*
+ * read
+ */
+nfsv3:nfssrv::op-read-start,
+nfsv4:nfssrv::op-read-start
+/self->startts/
+{
+	@c_read_client[self->vers, args[0]->ci_remote] = count();
+	OPT_client == 0 ? @c_read_client[self->vers, "all"] = count() : 1;
+	@read_b[self->vers, args[0]->ci_remote] = sum(args[2]->count);
+	OPT_client == 0 ? @read_b[self->vers, "all"] = sum(args[2]->count) : 1;
+	total_read_b += args[2]->count;
+        @avg_aligned[self->vers, args[0]->ci_remote] = 
+            avg((args[2]->offset % OPT_blocksize) ? 0 : 100);
+        @avg_aligned[self->vers, "all"] = 
+            avg((args[2]->offset % OPT_blocksize) ? 0 : 100);
+}
+
+nfsv3:nfssrv::op-read-done,
+nfsv4:nfssrv::op-read-done
+/self->startts/
+{
+	t = timestamp - self->startts;
+	@avgtime_read[self->vers, args[0]->ci_remote] = avg(t);
+	OPT_client == 0 ? @avgtime_read[self->vers, "all"] = avg(t) : 1;
+	self->startts = 0;
+}
+
+/*
+ * write (sync)
+ */
+nfsv3:nfssrv::op-write-start,
+nfsv4:nfssrv::op-write-start
+/self->startts/
+{
+        @avg_aligned[self->vers, args[0]->ci_remote] = 
+            avg((args[2]->offset % OPT_blocksize) ? 0 : 100);
+        @avg_aligned[self->vers, "all"] = 
+            avg((args[2]->offset % OPT_blocksize) ? 0 : 100);
+}
+
+nfsv3:nfssrv::op-write-start
+/self->startts && args[2]->stable/
+{
+	self->issync = 1;
+	data_len = args[2]->data.data_len;
+	@c_swrite_client[self->vers, args[0]->ci_remote] = count();
+	OPT_client == 0 ? @c_swrite_client[self->vers, "all"] = count() : 1;
+	@swrite_b[self->vers, args[0]->ci_remote] = sum(data_len);
+	OPT_client == 0 ? @swrite_b[self->vers, "all"] = sum(data_len) : 1;
+	total_swrite_b += data_len;
+}
+
+nfsv4:nfssrv::op-write-start
+/self->startts && args[2]->stable/
+{
+	self->issync = 1;
+	data_len = args[2]->data_len;
+	@c_swrite_client[self->vers, args[0]->ci_remote] = count();
+	OPT_client == 0 ? @c_swrite_client[self->vers, "all"] = count() : 1;
+	@swrite_b[self->vers, args[0]->ci_remote] = sum(data_len);
+	OPT_client == 0 ? @swrite_b[self->vers, "all"] = sum(data_len) : 1;
+	total_swrite_b += data_len;
+}
+
+nfsv3:nfssrv::op-write-done,
+nfsv4:nfssrv::op-write-done
+/self->startts && self->issync/
+{
+	t = timestamp - self->startts;
+	@avgtime_swrite[self->vers, args[0]->ci_remote] = avg(t);
+	OPT_client == 0 ? @avgtime_swrite[self->vers, "all"] = avg(t) : 1;
+	self->startts = 0;
+}
+
+/*
+ * write (async)
+ */
+nfsv3:nfssrv::op-write-start
+/self->startts && !args[2]->stable/
+{
+	self->issync = 0;
+	data_len = args[2]->data.data_len;
+	@c_awrite_client[self->vers, args[0]->ci_remote] = count();
+	OPT_client == 0 ? @c_awrite_client[self->vers, "all"] = count() : 1;
+	@awrite_b[self->vers, args[0]->ci_remote] = sum(data_len);
+	OPT_client == 0 ? @awrite_b[self->vers, "all"] = sum(data_len) : 1;
+	total_awrite_b += data_len;
+ }
+
+nfsv4:nfssrv::op-write-start
+/self->startts && !args[2]->stable/
+{
+	self->issync = 0;
+	data_len = args[2]->data_len;
+	@c_awrite_client[self->vers, args[0]->ci_remote] = count();
+	OPT_client == 0 ? @c_awrite_client[self->vers, "all"] = count() : 1;
+	@awrite_b[self->vers, args[0]->ci_remote] = sum(data_len);
+	OPT_client == 0 ? @awrite_b[self->vers, "all"] = sum(data_len) : 1;
+	total_awrite_b += data_len;
+ }
+
+nfsv3:nfssrv::op-write-done,
+nfsv4:nfssrv::op-write-done
+/self->startts && !self->issync/
+{
+	t = timestamp - self->startts;
+	@avgtime_awrite[self->vers, args[0]->ci_remote] = avg(t);
+	OPT_client == 0 ? @avgtime_awrite[self->vers, "all"] = avg(t) : 1;
+	self->startts = 0;
+}
+
+/*
+ * timer
+ */
+profile:::tick-1sec
+{
+	secs--;
+}
+
+/*
+ * Print report
+ */
+profile:::tick-1sec
+/secs == 0/
+{	
+	/* fetch 1 min load average */
+	self->load1a  = `hp_avenrun[0] / 65536;
+	self->load1b  = ((`hp_avenrun[0] % 65536) * 100) / 65536;
+
+	/* convert counters to Kbytes */
+	total_read_b /= 1024;
+	total_swrite_b /= 1024;
+	total_awrite_b /= 1024;
+
+	/* normalize to seconds giving a rate */
+	/* todo: this should be measured, not based on the INTERVAL */
+	normalize(@c_nfsops, INTERVAL);
+	normalize(@c_read_client, INTERVAL);
+	normalize(@c_swrite_client, INTERVAL);
+	normalize(@c_awrite_client, INTERVAL);
+	normalize(@c_commit_client, INTERVAL);
+	
+	/* normalize to KB per second */
+	normalize(@read_b, 1024 * INTERVAL);
+	normalize(@awrite_b, 1024 * INTERVAL);
+	normalize(@swrite_b, 1024 * INTERVAL);
+	
+	/* normalize average to microseconds */
+	normalize(@avgtime_read, 1000);
+	normalize(@avgtime_swrite, 1000);
+	normalize(@avgtime_awrite, 1000);
+	normalize(@avgtime_commit, 1000);
+
+	/* print status */
+	OPT_clear && !OPT_json ? printf("%s", CLEAR) : 1;
+	
+	OPT_json ? 
+		printf("{ \"collector\": \"nfssvrtop\", \"time\": \"%Y\", \"timestamp\": %d, \"interval\": %d, \"load\": %d.%02d, \"read_KB_int\": %d, \"sync_write_KB_int\": %d, \"async_write_KB_int\": %d, \"clientdata\": [",
+			walltimestamp, walltimestamp, INTERVAL, 
+			self->load1a, self->load1b, 
+			total_read_b, total_swrite_b, total_awrite_b)
+	:
+		printf("%Y, load: %d.%02d, read: %-8d KB, swrite: %-8d KB, awrite: %-8d KB\n",
+			walltimestamp, self->load1a, self->load1b, 
+			total_read_b, total_swrite_b, total_awrite_b);
+
+	/* print headers */
+	OPT_json ? 1 :
+		printf("%s\t%-15s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\t%7s\n",
+			"Ver", "Client", "NFSOPS", 
+			"Reads", "SWrites", "AWrites", "Commits",
+			"Rd_bw", "SWr_bw", "AWr_bw",
+			"Rd_t", "SWr_t", "AWr_t", "Com_t", "Align%");
+
+	/* truncate to top lines if needed */
+	OPT_top ? trunc(@c_nfsops, TOP) : 1;
+
+	OPT_json ?
+		printa("{\"version\": \"%s\", \"address\": \"%s\", \"NFSOPS\": %@d, \"reads\": %@d, \"sync_writes\": %@d, \"async_writes\": %@d, \"commits\": %@d, \"read_bw\": %@d, \"sync_write_bw\": %@d, \"async_write_bw\": %@d, \"avg_read_t\": %@d, \"avg_sync_write_t\": %@d, \"avg_async_write_t\": %@d, \"avg_commit_t\": %@d, \"aligned_pct\": %@d },",
+			@c_nfsops, @c_read_client, @c_swrite_client, @c_awrite_client, 
+            @c_commit_client, @read_b, @swrite_b, @awrite_b,
+			@avgtime_read, @avgtime_swrite, @avgtime_awrite,
+			@avgtime_commit, @avg_aligned)
+	:
+		printa("%s\t%-15s\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\t%7@d\n",
+			@c_nfsops, @c_read_client, @c_swrite_client, @c_awrite_client, 
+            @c_commit_client, @read_b, @swrite_b, @awrite_b,
+			@avgtime_read, @avgtime_swrite, @avgtime_awrite,
+			@avgtime_commit, @avg_aligned);
+	
+	OPT_json ? printf("{}]}\n") : 1;
+
+	/* clear data */
+	trunc(@c_nfsops); trunc(@c_read_client); trunc(@c_swrite_client); 
+    trunc(@c_awrite_client); trunc(@c_commit_client);
+	trunc(@read_b); trunc(@awrite_b); trunc(@swrite_b);
+	trunc(@avgtime_read); trunc(@avgtime_swrite); trunc(@avgtime_awrite); 
+    trunc(@avgtime_commit); trunc(@avg_aligned);
+	total_read_b = 0;
+	total_swrite_b = 0;
+	total_awrite_b = 0;
+	secs = INTERVAL;
+	counts--;
+}
+
+/*
+ * end of program 
+ */
+profile:::tick-1sec
+/counts == 0/
+{
+	exit(0);
+}
+
+/*
+ * clean up when interrupted
+ */
+dtrace:::END
+{
+	trunc(@c_nfsops); trunc(@c_read_client); trunc(@c_swrite_client); 
+    trunc(@c_awrite_client); trunc(@c_commit_client);
+	trunc(@read_b); trunc(@awrite_b); trunc(@swrite_b);
+	trunc(@avgtime_read); trunc(@avgtime_swrite); trunc(@avgtime_awrite); 
+    trunc(@avgtime_commit); trunc(@avg_aligned);
+}
+'

File utils/tools/zilstat

+#! /usr/bin/ksh -p
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+# Portions Copyright 2009 Sun Microsystems, Inc.
+# Portions Copyright 2009 Richard Elling
+#
+# File:   zilstat.ksh
+# Author: Richard.Elling@RichardElling.com
+# Online information: 
+#   http://www.RichardElling.com/Home/scripts-and-programs-1/zilstat-intro
+#
+# This dtrace program will help identify the ZIL activity by sampling
+# writes sent to the ZIL.
+# output:
+#     [TIME]
+#     Bytes - total bytes written to ZIL over the interval
+#     Bytes/S - bytes/s written to ZIL over ther interval
+#     Max-Rate - maximum rate during any 1-second sample
+# this output is listed for both the size of the data and the
+# size of the buffer containing the data.
+# In an attempt to reconcile writes > zfs_immediate_write_sz
+# or to otherwise make better decisions, the size of the buffer
+# is also represented in bins: <=4kBytes, 4-32kBytes, and > 32kBytes.
+# This should help you determine if the workload contains a bunch of
+# itty-bitty synchronous writes or just a few, large writes. This may
+# be important because if the pool does not have a log device and the
+# writes are > 32kBytes (zfs_immediate_write_sz) then they are not written
+# to the ZIL, but are instead written directly to the pool. OTOH, if there
+# is a separate log, then the writes are always sent to the log with the
+# expectation that the log device is always faster (lower latency) than
+# the pool. However, until a per-pool view is generated, the stats collected
+# here are for all pools.
+#
+# TODO: add per-pool option which also knows about zfs_immediate_write_sz
+#       logic.
+#
+##############################
+# --- Process Arguments ---
+#
+
+### default variables
+opt_mega=0
+opt_pool=0
+opt_time=0
+opt_txg=0
+filter=0
+pool=
+lines=-1
+interval=1
+count=-1
+
+### process options
+while getopts hl:Mp:t name
+do
+	case $name in
+	l)  lines=$OPTARG ;;
+        M)  opt_mega=1 ;;
+	p)  opt_pool=1; pool=$OPTARG ;;
+	t)  opt_time=1 ;;
+	h|?)    ME=$(basename $0)
+                cat <<-END >&2
+		Usage: $ME [gMt][-l linecount] [-p poolname] [interval [count]]
+    -M  # print numbers as megabytes (base 10)
+    -t  # print timestamp
+    -p poolname      # only look at poolname
+    -l linecount    # print header every linecount lines (default=only once)
+    interval in seconds or "txg" for transaction group commit intervals
+             note: "txg" only appropriate when -p poolname is used
+    count will limit the number of intervals reported
+
+    examples:
+        $ME # default output, 1 second samples
+        $ME 10  # 10 second samples
+        $ME 10 6    # print 6 x 10 second samples
+        $ME -p rpool    # show ZIL stats for rpool only
+
+    output:
+        [TIME]
+        N-Bytes    - data bytes written to ZIL over the interval
+        N-Bytes/s  - data bytes/s written to ZIL over ther interval
+        N-Max-Rate - maximum data rate during any 1-second sample
+        B-Bytes    - buffer bytes written to ZIL over the interval
+        B-Bytes/s  - buffer bytes/s written to ZIL over ther interval
+        B-Max-Rate - maximum buffer rate during any 1-second sample
+        ops        - number of synchronous iops per interval
+        <=4kB      - number of synchronous iops <= 4kBytes per interval
+        4-32kB     - number of synchronous iops 4-32kBytes per interval
+        >=32kB     - number of synchronous iops >= 32kBytes per interval
+    note: data bytes are actual data, total bytes counts buffer size
+		END
+		exit 1
+	esac
+done
+
+shift $(( $OPTIND - 1 ))
+
+### option logic
+if [[ "$1" > 0 ]]; then
+        interval=$1; shift
+fi
+if [[ "$1" > 0 ]]; then
+        count=$1; shift
+fi
+if (( opt_pool )); then
+	filter=1
+fi
+
+if [[ "$interval" == "txg" ]]; then
+    if [[ $opt_pool != 1 ]]; then
+        echo "error: -p poolname option must be used for txg intervals"
+        exit 1
+    fi
+    opt_txg=1
+    interval=0
+fi
+
+##############################
+# --- Main Program, DTrace ---
+
+/usr/sbin/dtrace -n '
+#pragma D option quiet
+ inline int OPT_time = '$opt_time';
+ inline int OPT_txg = '$opt_txg';
+ inline int OPT_pool = '$opt_pool';
+ inline int OPT_mega = '$opt_mega';
+ inline int INTERVAL = '$interval';
+ inline int LINES = '$lines';
+ inline int COUNTER = '$count';
+ inline int FILTER = '$filter';
+ inline string POOL = "'$pool'";
+ dtrace:::BEGIN
+ {
+    /* starting values */
+    MEGA = 1000000;
+    counts = COUNTER;
+    secs = INTERVAL;
+    interval = INTERVAL;
+    interval == 0 ? interval++ : 1;
+    line = 0;
+    last_event[""] = 0;
+    nused=0;
+    nused_max_per_sec=0;
+    nused_per_sec=0;
+    size=0;
+    size_max_per_sec=0;
+    size_per_sec=0;
+    syncops=0;
+    size_4k=0;
+    size_4k_32k=0;
+    size_32k=0;
+    OPT_txg ? printf("waiting for txg commit...\n") : 1;
+ }
+
+ /*
+  * collect info when zil_lwb_write_start fires
+  */
+fbt::zil_lwb_write_start:entry
+/OPT_pool == 0 || POOL == args[0]->zl_dmu_pool->dp_spa->spa_name/
+{
+     nused += args[1]->lwb_nused;
+     nused_per_sec += args[1]->lwb_nused;
+     size += args[1]->lwb_sz;
+     size_per_sec += args[1]->lwb_sz;
+     syncops++;
+     args[1]->lwb_sz <= 4096 ? size_4k++ : 1;
+     args[1]->lwb_sz > 4096 && args[1]->lwb_sz < 32768 ? size_4k_32k++ : 1;
+     args[1]->lwb_sz >= 32768 ? size_32k++ : 1;
+}
+
+/*
+ * Timer
+ */
+profile:::tick-1sec
+{
+	OPT_txg ? secs++ : secs--;
+        nused_per_sec > nused_max_per_sec ? nused_max_per_sec = nused_per_sec : 1;
+        nused_per_sec = 0;
+        size_per_sec > size_max_per_sec ? size_max_per_sec = size_per_sec : 1;
+        size_per_sec = 0;
+}
+
+/*
+ * Print header
+ */
+profile:::tick-1sec
+/OPT_txg == 0 && line == 0/
+{
+	/* print optional headers */
+	OPT_time   ? printf("%-20s ", "TIME")  : 1;
+
+	/* print header */
+        OPT_mega  ? printf("%10s %10s %10s %10s %10s %10s",
+                "N-MB", "N-MB/s", "N-Max-Rate",
+                "B-MB", "B-MB/s", "B-Max-Rate") :
+            printf("%10s %10s %10s %10s %10s %10s",
+                "N-Bytes", "N-Bytes/s", "N-Max-Rate",
+                "B-Bytes", "B-Bytes/s", "B-Max-Rate");
+        printf(" %6s %6s %6s %6s\n",
+                "ops", "<=4kB", "4-32kB", ">=32kB");
+	line = LINES;
+}
+
+ fbt::txg_quiesce:entry
+ /OPT_txg == 1 && POOL == args[0]->dp_spa->spa_name && line == 0/
+ {
+	OPT_time  ? printf("%-20s ", "TIME")  : 1;
+
+        OPT_mega  ? printf("%10s %10s %10s %10s %10s %10s %10s",
+                "txg", "N-MB", "N-MB/s", "N-Max-Rate",
+                "B-MB", "B-MB/s", "B-Max-Rate") :
+            printf("%10s %10s %10s %10s %10s %10s %10s",
+                "txg", "N-Bytes", "N-Bytes/s", "N-Max-Rate",
+                "B-Bytes", "B-Bytes/s", "B-Max-Rate");
+        printf(" %6s %6s %6s %6s\n",
+                "ops", "<=4kB", "4-32kB", ">=32kB");
+	line = LINES;
+}
+
+ /*
+  * Print Output
+  */
+ profile:::tick-1sec
+ /OPT_txg == 0 && secs == 0/
+ {
+	OPT_time  ? printf("%-20Y ", walltimestamp) : 1;
+        OPT_mega  ?
+            printf("%10d %10d %10d %10d %10d %10d",
+                nused/MEGA, nused/(interval*MEGA), nused_max_per_sec/MEGA,
+                size/MEGA, size/(interval*MEGA), size_max_per_sec/MEGA) :
+            printf("%10d %10d %10d %10d %10d %10d",
+                nused, nused/interval, nused_max_per_sec,
+                size, size/interval, size_max_per_sec);
+        printf(" %6d %6d %6d %6d\n",
+                syncops, size_4k, size_4k_32k, size_32k);
+        nused = 0;
+        nused_per_sec = 0;
+        nused_max_per_sec = 0;
+        size=0;
+        size_max_per_sec=0;
+        size_per_sec=0;
+        syncops=0;
+        size_4k=0;
+        size_4k_32k=0;
+        size_32k=0;
+        secs = INTERVAL;
+	counts--;
+	line--;
+ }
+
+fbt::txg_quiesce:entry
+/OPT_txg == 1 && POOL == args[0]->dp_spa->spa_name/
+{
+        secs <= 0 ? secs=1 : 1;
+	OPT_time ? printf("%-20Y ", walltimestamp) : 1;
+        OPT_mega ?
+            printf("%10d %10d %10d %10d %10d %10d %10d", args[1],
+                nused/MEGA, nused/(secs*MEGA), nused_max_per_sec/MEGA,
+                size/MEGA, size/(secs*MEGA), size_max_per_sec/MEGA) :
+            printf("%10d %10d %10d %10d %10d %10d %10d", args[1],
+                nused, nused/secs, nused_max_per_sec,
+                size, size/secs, size_max_per_sec);
+        printf(" %6d %6d %6d %6d\n",
+                syncops, size_4k, size_4k_32k, size_32k);
+        nused = 0;
+        nused_per_sec = 0;
+        nused_max_per_sec = 0;
+        size=0;
+        size_max_per_sec=0;
+        size_per_sec=0;
+        syncops=0;
+        size_4k=0;
+        size_4k_32k=0;
+        size_32k=0;
+        secs = 0;
+	counts--;
+	line--;
+ }
+
+ /*
+  * End of program
+  */
+ profile:::tick-1sec
+ /OPT_txg == 0 && counts == 0/
+ {
+	exit(0);
+ }
+ fbt::txg_quiesce:entry
+ /OPT_txg == 1 && counts == 0/
+ {
+    exit(0);
+ }
+'