changeset 13990:b04b9a80047a

3618 ::zio dcmd does not show timestamp data Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: George Wilson <gwilson@zfsmail.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Garrett D'Amore <garrett@damore.org> Approved by: Dan McDonald <danmcd@nexenta.com>
author Matthew Ahrens <mahrens@delphix.com>
date Thu, 21 Mar 2013 14:47:36 -0800
parents c97356b78587
children 53e4f9da98a1
files usr/src/cmd/mdb/common/modules/zfs/zfs.c usr/src/uts/common/fs/zfs/sys/vdev_impl.h usr/src/uts/common/fs/zfs/sys/zio.h usr/src/uts/common/fs/zfs/vdev.c usr/src/uts/common/fs/zfs/vdev_queue.c
diffstat 5 files changed, 54 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Sat Feb 09 17:21:16 2013 -0500
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Thu Mar 21 14:47:36 2013 -0800
@@ -46,6 +46,7 @@
 
 #ifdef _KERNEL
 #define	ZFS_OBJ_NAME	"zfs"
+extern int64_t mdb_gethrtime(void);
 #else
 #define	ZFS_OBJ_NAME	"libzpool.so.1"
 #endif
@@ -1712,13 +1713,13 @@
  * the main summary.  More detailed information can always be found by doing a
  * '::print zio' on the underlying zio_t.  The columns we display are:
  *
- *	ADDRESS		TYPE	STAGE		WAITER
+ *	ADDRESS  TYPE  STAGE  WAITER  TIME_ELAPSED
  *
  * The 'address' column is indented by one space for each depth level as we
  * descend down the tree.
  */
 
-#define	ZIO_MAXINDENT	24
+#define	ZIO_MAXINDENT	7
 #define	ZIO_MAXWIDTH	(sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
 #define	ZIO_WALK_SELF	0
 #define	ZIO_WALK_CHILD	1
@@ -1736,7 +1737,6 @@
 	enum zio_type io_type;
 	enum zio_stage io_stage;
 	void *io_waiter;
-	uint64_t io_timestamp;
 	void *io_spa;
 	struct {
 		struct {
@@ -1746,15 +1746,26 @@
 	int io_error;
 } mdb_zio_t;
 
+typedef struct mdb_zio_timestamp {
+	hrtime_t io_timestamp;
+} mdb_zio_timestamp_t;
+
 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
 
 static int
-zio_print_cb(uintptr_t addr, const mdb_zio_t *zio, zio_print_args_t *zpa)
+zio_print_cb(uintptr_t addr, zio_print_args_t *zpa)
 {
 	mdb_ctf_id_t type_enum, stage_enum;
 	int indent = zpa->zpa_current_depth;
 	const char *type, *stage;
 	uintptr_t laddr;
+	mdb_zio_t zio;
+	mdb_zio_timestamp_t zio_timestamp = { 0 };
+
+	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1)
+		return (WALK_ERR);
+	(void) mdb_ctf_vread(&zio_timestamp, ZFS_STRUCT "zio",
+	    "mdb_zio_timestamp_t", addr, MDB_CTF_VREAD_QUIET);
 
 	if (indent > ZIO_MAXINDENT)
 		indent = ZIO_MAXINDENT;
@@ -1765,13 +1776,13 @@
 		return (WALK_ERR);
 	}
 
-	if ((type = mdb_ctf_enum_name(type_enum, zio->io_type)) != NULL)
+	if ((type = mdb_ctf_enum_name(type_enum, zio.io_type)) != NULL)
 		type += sizeof ("ZIO_TYPE_") - 1;
 	else
 		type = "?";
 
-	if (zio->io_error == 0) {
-		stage = mdb_ctf_enum_name(stage_enum, zio->io_stage);
+	if (zio.io_error == 0) {
+		stage = mdb_ctf_enum_name(stage_enum, zio.io_stage);
 		if (stage != NULL)
 			stage += sizeof ("ZIO_STAGE_") - 1;
 		else
@@ -1786,10 +1797,22 @@
 		} else {
 			mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
 			    ZIO_MAXWIDTH - indent, addr, type, stage);
-			if (zio->io_waiter)
-				mdb_printf("%?p\n", zio->io_waiter);
+			if (zio.io_waiter)
+				mdb_printf("%-16p ", zio.io_waiter);
 			else
-				mdb_printf("-\n");
+				mdb_printf("%-16s ", "-");
+#ifdef _KERNEL
+			if (zio_timestamp.io_timestamp != 0) {
+				mdb_printf("%llums", (mdb_gethrtime() -
+				    zio_timestamp.io_timestamp) /
+				    1000000);
+			} else {
+				mdb_printf("%-12s ", "-");
+			}
+#else
+			mdb_printf("%-12s ", "-");
+#endif
+			mdb_printf("\n");
 		}
 	}
 
@@ -1818,7 +1841,6 @@
 zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
 {
 	zio_link_t zl;
-	mdb_zio_t zio;
 	uintptr_t ziop;
 	zio_print_args_t *zpa = arg;
 
@@ -1832,17 +1854,13 @@
 	else
 		ziop = (uintptr_t)zl.zl_child;
 
-	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", ziop, 0) == -1)
-		return (WALK_ERR);
-
-	return (zio_print_cb(ziop, &zio, arg));
+	return (zio_print_cb(ziop, arg));
 }
 
 /* ARGSUSED */
 static int
 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 {
-	mdb_zio_t zio;
 	zio_print_args_t zpa = { 0 };
 
 	if (!(flags & DCMD_ADDRSPEC))
@@ -1864,14 +1882,13 @@
 		zpa.zpa_max_depth = 1;
 	}
 
-	if (mdb_ctf_vread(&zio, ZFS_STRUCT "zio", "mdb_zio_t", addr, 0) == -1)
-		return (DCMD_ERR);
-
-	if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags))
-		mdb_printf("%<u>%-*s %-5s %-16s %-?s%</u>\n", ZIO_MAXWIDTH,
-		    "ADDRESS", "TYPE", "STAGE", "WAITER");
-
-	if (zio_print_cb(addr, &zio, &zpa) != WALK_NEXT)
+	if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags)) {
+		mdb_printf("%<u>%-*s %-5s %-16s %-16s %-12s%</u>\n",
+		    ZIO_MAXWIDTH, "ADDRESS", "TYPE", "STAGE", "WAITER",
+		    "TIME_ELAPSED");
+	}
+
+	if (zio_print_cb(addr, &zpa) != WALK_NEXT)
 		return (DCMD_ERR);
 
 	return (DCMD_OK);
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Sat Feb 09 17:21:16 2013 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Thu Mar 21 14:47:36 2013 -0800
@@ -104,8 +104,7 @@
 	avl_tree_t	vq_read_tree;
 	avl_tree_t	vq_write_tree;
 	avl_tree_t	vq_pending_tree;
-	uint64_t	vq_io_complete_ts;
-	uint64_t	vq_io_delta_ts;
+	hrtime_t	vq_io_complete_ts;
 	kmutex_t	vq_lock;
 };
 
--- a/usr/src/uts/common/fs/zfs/sys/zio.h	Sat Feb 09 17:21:16 2013 -0500
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h	Thu Mar 21 14:47:36 2013 -0800
@@ -408,7 +408,7 @@
 
 	uint64_t	io_offset;
 	uint64_t	io_deadline;
-	uint64_t	io_timestamp;
+	hrtime_t	io_timestamp;
 	avl_node_t	io_offset_node;
 	avl_node_t	io_deadline_node;
 	avl_tree_t	*io_vdev_tree;
--- a/usr/src/uts/common/fs/zfs/vdev.c	Sat Feb 09 17:21:16 2013 -0500
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Thu Mar 21 14:47:36 2013 -0800
@@ -3180,10 +3180,10 @@
 			 * the spa_deadman_synctime we panic the system.
 			 */
 			fio = avl_first(&vq->vq_pending_tree);
-			delta = ddi_get_lbolt64() - fio->io_timestamp;
-			if (delta > NSEC_TO_TICK(spa_deadman_synctime(spa))) {
-				zfs_dbgmsg("SLOW IO: zio timestamp %llu, "
-				    "delta %llu, last io %llu",
+			delta = gethrtime() - fio->io_timestamp;
+			if (delta > spa_deadman_synctime(spa)) {
+				zfs_dbgmsg("SLOW IO: zio timestamp %lluns, "
+				    "delta %lluns, last io %lluns",
 				    fio->io_timestamp, delta,
 				    vq->vq_io_complete_ts);
 				fm_panic("I/O to pool '%s' appears to be "
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c	Sat Feb 09 17:21:16 2013 -0500
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c	Thu Mar 21 14:47:36 2013 -0800
@@ -45,8 +45,11 @@
 int zfs_vdev_max_pending = 10;
 int zfs_vdev_min_pending = 4;
 
-/* deadline = pri + ddi_get_lbolt64() >> time_shift) */
-int zfs_vdev_time_shift = 6;
+/*
+ * The deadlines are grouped into buckets based on zfs_vdev_time_shift:
+ * deadline = pri + gethrtime() >> time_shift)
+ */
+int zfs_vdev_time_shift = 29; /* each bucket is 0.537 seconds */
 
 /* exponential I/O issue ramp-up rate */
 int zfs_vdev_ramp_rate = 2;
@@ -412,7 +415,7 @@
 
 	mutex_enter(&vq->vq_lock);
 
-	zio->io_timestamp = ddi_get_lbolt64();
+	zio->io_timestamp = gethrtime();
 	zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
 	    zio->io_priority;
 
@@ -445,8 +448,7 @@
 
 	vdev_queue_pending_remove(vq, zio);
 
-	vq->vq_io_complete_ts = ddi_get_lbolt64();
-	vq->vq_io_delta_ts = vq->vq_io_complete_ts - zio->io_timestamp;
+	vq->vq_io_complete_ts = gethrtime();
 
 	for (int i = 0; i < zfs_vdev_ramp_rate; i++) {
 		zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending);