changeset 13790:ac6eff781c67

3112 ztest does not honor ZFS_DEBUG 3113 ztest should use watchpoints to protect frozen arc bufs 3114 some leaked nvlists in zfsdev_ioctl 3115 poll(2) returns prematurely in presence of spurious wakeups Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Matt Amdur <Matt.Amdur@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <chris.siden@delphix.com> Approved by: Eric Schrock <eric.schrock@delphix.com>
author Matthew Ahrens <mahrens@delphix.com>
date Thu, 30 Aug 2012 05:13:49 -0700
parents f0c17d471b7a
children 40cea5d62fa3
files usr/src/cmd/ztest/ztest.c usr/src/lib/libzpool/common/sys/zfs_context.h usr/src/uts/common/fs/zfs/arc.c usr/src/uts/common/fs/zfs/dsl_dataset.c usr/src/uts/common/fs/zfs/dsl_synctask.c usr/src/uts/common/fs/zfs/spa_history.c usr/src/uts/common/fs/zfs/spa_misc.c usr/src/uts/common/fs/zfs/sys/arc.h usr/src/uts/common/fs/zfs/sys/zfs_debug.h usr/src/uts/common/fs/zfs/zio.c usr/src/uts/common/io/devpoll.c usr/src/uts/common/os/condvar.c usr/src/uts/common/sys/condvar.h usr/src/uts/common/syscall/poll.c
diffstat 14 files changed, 190 insertions(+), 111 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/ztest/ztest.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/cmd/ztest/ztest.c	Thu Aug 30 05:13:49 2012 -0700
@@ -5835,6 +5835,8 @@
 
 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
 
+	dprintf_setup(&argc, argv);
+
 	if (!ischild) {
 		process_options(argc, argv);
 
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h	Thu Aug 30 05:13:49 2012 -0700
@@ -61,6 +61,7 @@
 #include <atomic.h>
 #include <dirent.h>
 #include <time.h>
+#include <procfs.h>
 #include <libsysevent.h>
 #include <sys/note.h>
 #include <sys/types.h>
--- a/usr/src/uts/common/fs/zfs/arc.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/arc.c	Thu Aug 30 05:13:49 2012 -0700
@@ -135,6 +135,12 @@
 #include <sys/kstat.h>
 #include <zfs_fletcher.h>
 
+#ifndef _KERNEL
+/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
+boolean_t arc_watch = B_FALSE;
+int arc_procfd;
+#endif
+
 static kmutex_t		arc_reclaim_thr_lock;
 static kcondvar_t	arc_reclaim_thr_cv;	/* used to signal reclaim thr */
 static uint8_t		arc_thread_exit;
@@ -474,6 +480,7 @@
 static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
 static int arc_evict_needed(arc_buf_contents_t type);
 static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes);
+static void arc_buf_watch(arc_buf_t *buf);
 
 static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
 
@@ -949,6 +956,50 @@
 	fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
 	    buf->b_hdr->b_freeze_cksum);
 	mutex_exit(&buf->b_hdr->b_freeze_lock);
+	arc_buf_watch(buf);
+}
+
+#ifndef _KERNEL
+typedef struct procctl {
+	long cmd;
+	prwatch_t prwatch;
+} procctl_t;
+#endif
+
+/* ARGSUSED */
+static void
+arc_buf_unwatch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+	if (arc_watch) {
+		int result;
+		procctl_t ctl;
+		ctl.cmd = PCWATCH;
+		ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data;
+		ctl.prwatch.pr_size = 0;
+		ctl.prwatch.pr_wflags = 0;
+		result = write(arc_procfd, &ctl, sizeof (ctl));
+		ASSERT3U(result, ==, sizeof (ctl));
+	}
+#endif
+}
+
+/* ARGSUSED */
+static void
+arc_buf_watch(arc_buf_t *buf)
+{
+#ifndef _KERNEL
+	if (arc_watch) {
+		int result;
+		procctl_t ctl;
+		ctl.cmd = PCWATCH;
+		ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data;
+		ctl.prwatch.pr_size = buf->b_hdr->b_size;
+		ctl.prwatch.pr_wflags = WA_WRITE;
+		result = write(arc_procfd, &ctl, sizeof (ctl));
+		ASSERT3U(result, ==, sizeof (ctl));
+	}
+#endif
 }
 
 void
@@ -975,6 +1026,8 @@
 	}
 
 	mutex_exit(&buf->b_hdr->b_freeze_lock);
+
+	arc_buf_unwatch(buf);
 }
 
 void
@@ -992,6 +1045,7 @@
 	    buf->b_hdr->b_state == arc_anon);
 	arc_cksum_compute(buf, B_FALSE);
 	mutex_exit(hash_lock);
+
 }
 
 static void
@@ -1348,21 +1402,22 @@
  * the buffer is placed on l2arc_free_on_write to be freed later.
  */
 static void
-arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
-    void *data, size_t size)
+arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
 {
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+
 	if (HDR_L2_WRITING(hdr)) {
 		l2arc_data_free_t *df;
 		df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
-		df->l2df_data = data;
-		df->l2df_size = size;
+		df->l2df_data = buf->b_data;
+		df->l2df_size = hdr->b_size;
 		df->l2df_func = free_func;
 		mutex_enter(&l2arc_free_on_write_mtx);
 		list_insert_head(l2arc_free_on_write, df);
 		mutex_exit(&l2arc_free_on_write_mtx);
 		ARCSTAT_BUMP(arcstat_l2_free_on_write);
 	} else {
-		free_func(data, size);
+		free_func(buf->b_data, hdr->b_size);
 	}
 }
 
@@ -1378,16 +1433,15 @@
 		arc_buf_contents_t type = buf->b_hdr->b_type;
 
 		arc_cksum_verify(buf);
+		arc_buf_unwatch(buf);
 
 		if (!recycle) {
 			if (type == ARC_BUFC_METADATA) {
-				arc_buf_data_free(buf->b_hdr, zio_buf_free,
-				    buf->b_data, size);
+				arc_buf_data_free(buf, zio_buf_free);
 				arc_space_return(size, ARC_SPACE_DATA);
 			} else {
 				ASSERT(type == ARC_BUFC_DATA);
-				arc_buf_data_free(buf->b_hdr,
-				    zio_data_buf_free, buf->b_data, size);
+				arc_buf_data_free(buf, zio_data_buf_free);
 				ARCSTAT_INCR(arcstat_data_size, -size);
 				atomic_add_64(&arc_size, -size);
 			}
@@ -2556,6 +2610,7 @@
 	}
 
 	arc_cksum_compute(buf, B_FALSE);
+	arc_buf_watch(buf);
 
 	if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
 		/*
@@ -3113,6 +3168,7 @@
 		}
 		hdr->b_datacnt -= 1;
 		arc_cksum_verify(buf);
+		arc_buf_unwatch(buf);
 
 		mutex_exit(hash_lock);
 
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c	Thu Aug 30 05:13:49 2012 -0700
@@ -2302,7 +2302,6 @@
 			}
 		}
 	}
-
 }
 
 void
--- a/usr/src/uts/common/fs/zfs/dsl_synctask.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/dsl_synctask.c	Thu Aug 30 05:13:49 2012 -0700
@@ -230,12 +230,7 @@
     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
     void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
 {
-	dsl_sync_task_group_t *dstg;
-
-	if (!spa_writeable(dp->dp_spa))
-		return;
-
-	dstg = dsl_sync_task_group_create(dp);
+	dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp);
 	dsl_sync_task_create(dstg, checkfunc, syncfunc,
 	    arg1, arg2, blocks_modified);
 	dsl_sync_task_group_nowait(dstg, tx);
--- a/usr/src/uts/common/fs/zfs/spa_history.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_history.c	Thu Aug 30 05:13:49 2012 -0700
@@ -303,7 +303,7 @@
 	dmu_tx_t *tx;
 	nvlist_t *nvarg;
 
-	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY)
+	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
 		return (EINVAL);
 
 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
@@ -439,8 +439,9 @@
 	/*
 	 * If this is part of creating a pool, not everything is
 	 * initialized yet, so don't bother logging the internal events.
+	 * Likewise if the pool is not writeable.
 	 */
-	if (tx->tx_txg == TXG_INITIAL) {
+	if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
 		fnvlist_free(nvl);
 		return;
 	}
--- a/usr/src/uts/common/fs/zfs/spa_misc.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c	Thu Aug 30 05:13:49 2012 -0700
@@ -1600,6 +1600,18 @@
 
 	spa_mode_global = mode;
 
+#ifndef _KERNEL
+	if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
+		arc_procfd = open("/proc/self/ctl", O_WRONLY);
+		if (arc_procfd == -1) {
+			perror("could not enable watchpoints: "
+			    "opening /proc/self/ctl failed: ");
+		} else {
+			arc_watch = B_TRUE;
+		}
+	}
+#endif
+
 	refcount_init();
 	unique_init();
 	zio_init();
--- a/usr/src/uts/common/fs/zfs/sys/arc.h	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h	Thu Aug 30 05:13:49 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_ARC_H
@@ -135,6 +136,11 @@
 void l2arc_start(void);
 void l2arc_stop(void);
 
+#ifndef _KERNEL
+extern boolean_t arc_watch;
+extern int arc_procfd;
+#endif
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/fs/zfs/sys/zfs_debug.h	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_debug.h	Thu Aug 30 05:13:49 2012 -0700
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_DEBUG_H
@@ -75,6 +76,10 @@
 extern void zfs_dbgmsg_fini(void);
 extern void zfs_dbgmsg(const char *fmt, ...);
 
+#ifndef _KERNEL
+extern int dprintf_find_string(const char *string);
+#endif
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/fs/zfs/zio.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c	Thu Aug 30 05:13:49 2012 -0700
@@ -125,11 +125,21 @@
 		while (p2 & (p2 - 1))
 			p2 &= p2 - 1;
 
+#ifndef _KERNEL
+		/*
+		 * If we are using watchpoints, put each buffer on its own page,
+		 * to eliminate the performance overhead of trapping to the
+		 * kernel when modifying a non-watched buffer that shares the
+		 * page with a watched buffer.
+		 */
+		if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
+			continue;
+#endif
 		if (size <= 4 * SPA_MINBLOCKSIZE) {
 			align = SPA_MINBLOCKSIZE;
-		} else if (P2PHASE(size, PAGESIZE) == 0) {
+		} else if (IS_P2ALIGNED(size, PAGESIZE)) {
 			align = PAGESIZE;
-		} else if (P2PHASE(size, p2 >> 2) == 0) {
+		} else if (IS_P2ALIGNED(size, p2 >> 2)) {
 			align = p2 >> 2;
 		}
 
--- a/usr/src/uts/common/io/devpoll.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/io/devpoll.c	Thu Aug 30 05:13:49 2012 -0700
@@ -23,7 +23,9 @@
  * Use is subject to license terms.
  */
 
-/* Copyright (c) 2011 by Delphix. All rights reserved. */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/devops.h>
@@ -695,9 +697,15 @@
 	minor_t		minor;
 	dp_entry_t	*dpep;
 	pollcache_t	*pcp;
+	hrtime_t	now;
 	int		error = 0;
 	STRUCT_DECL(dvpoll, dvpoll);
 
+	if (cmd == DP_POLL) {
+		/* do this now, before we sleep on DP_WRITER_PRESENT */
+		now = gethrtime();
+	}
+
 	minor = getminor(dev);
 	mutex_enter(&devpoll_lock);
 	ASSERT(minor < dptblsize);
@@ -725,9 +733,7 @@
 		pollstate_t	*ps;
 		nfds_t		nfds;
 		int		fdcnt = 0;
-		int		time_out;
-		clock_t		*deltap = NULL;
-		clock_t		delta;
+		hrtime_t	deadline = 0;
 
 		STRUCT_INIT(dvpoll, mode);
 		error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll),
@@ -737,18 +743,16 @@
 			return (EFAULT);
 		}
 
-		time_out = STRUCT_FGET(dvpoll, dp_timeout);
-		if (time_out > 0) {
+		deadline = STRUCT_FGET(dvpoll, dp_timeout);
+		if (deadline > 0) {
 			/*
-			 * cv_relwaituntil_sig operates at the tick
-			 * granularity, which by default is 10 ms.
-			 * This results in rounding user specified
-			 * timeouts up but prevents the system
-			 * from being flooded with small high
-			 * resolution timers.
+			 * Convert the deadline from relative milliseconds
+			 * to absolute nanoseconds.  They must wait for at
+			 * least a tick.
 			 */
-			delta = MSEC_TO_TICK_ROUNDUP(time_out);
-			deltap = &delta;
+			deadline = deadline * NANOSEC / MILLISEC;
+			deadline = MAX(deadline, nsec_per_tick);
+			deadline += now;
 		}
 
 		if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) {
@@ -758,16 +762,15 @@
 			 * Do not check for signals if we have a zero timeout.
 			 */
 			DP_REFRELE(dpep);
-			if (time_out == 0)
+			if (deadline == 0)
 				return (0);
 			mutex_enter(&curthread->t_delay_lock);
-			while ((delta = cv_relwaituntil_sig(
-			    &curthread->t_delay_cv, &curthread->t_delay_lock,
-			    deltap, TR_MILLISEC)) > 0) {
+			while ((error =
+			    cv_timedwait_sig_hrtime(&curthread->t_delay_cv,
+			    &curthread->t_delay_lock, deadline)) > 0)
 				continue;
-			}
 			mutex_exit(&curthread->t_delay_lock);
-			return (delta == 0 ? EINTR : 0);
+			return (error == 0 ? EINTR : 0);
 		}
 
 		/*
@@ -814,21 +817,22 @@
 
 			/*
 			 * Sleep until we are notified, signaled, or timed out.
-			 * Do not check for signals if we have a zero timeout.
 			 */
-			if (time_out == 0)	/* immediate timeout */
+			if (deadline == 0) {
+				/* immediate timeout; do not check signals */
 				break;
-
-			delta = cv_relwaituntil_sig(&pcp->pc_cv, &pcp->pc_lock,
-			    deltap, TR_MILLISEC);
+			}
+			error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
+			    &pcp->pc_lock, deadline);
 			/*
 			 * If we were awakened by a signal or timeout
 			 * then break the loop, else poll again.
 			 */
-			if (delta <= 0) {
-				if (delta == 0)	/* signal */
-					error = EINTR;
+			if (error <= 0) {
+				error = (error == 0) ? EINTR : 0;
 				break;
+			} else {
+				error = 0;
 			}
 		}
 		mutex_exit(&pcp->pc_lock);
--- a/usr/src/uts/common/os/condvar.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/os/condvar.c	Thu Aug 30 05:13:49 2012 -0700
@@ -24,7 +24,9 @@
  * Use is subject to license terms.
  */
 
-/* Copyright (c) 2011 by Delphix. All rights reserved. */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #include <sys/thread.h>
 #include <sys/proc.h>
@@ -481,6 +483,21 @@
 }
 
 /*
+ * Wait until the specified time.
+ * If tim == -1, waits without timeout using cv_wait_sig_swap().
+ */
+int
+cv_timedwait_sig_hrtime(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim)
+{
+	if (tim == -1) {
+		return (cv_wait_sig_swap(cvp, mp));
+	} else {
+		return (cv_timedwait_sig_hires(cvp, mp, tim, 1,
+		    CALLOUT_FLAG_ABSOLUTE | CALLOUT_FLAG_ROUNDUP));
+	}
+}
+
+/*
  * Same as cv_timedwait_sig() except that the third argument is a relative
  * timeout value, as opposed to an absolute one. There is also a fourth
  * argument that specifies how accurately the timeout must be implemented.
@@ -502,30 +519,6 @@
 }
 
 /*
- * Same as cv_reltimedwait_sig() except that the timeout is optional. If
- * there is no timeout then the function will block until woken up
- * or interrupted.
- */
-clock_t
-cv_relwaituntil_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t *delta,
-    time_res_t res)
-{
-	/*
-	 * If there is no timeout specified wait indefinitely for a
-	 * signal or a wakeup.
-	 */
-	if (delta == NULL) {
-		return (cv_wait_sig_swap(cvp, mp));
-	}
-
-	/*
-	 * cv_reltimedwait_sig will wait for the relative timeout
-	 * specified by delta.
-	 */
-	return (cv_reltimedwait_sig(cvp, mp, *delta, res));
-}
-
-/*
  * Like cv_wait_sig_swap but allows the caller to indicate (with a
  * non-NULL sigret) that they will take care of signalling the cv
  * after wakeup, if necessary.  This is a vile hack that should only
@@ -766,6 +759,10 @@
  * so the caller can return a premature timeout to the calling process
  * so it can reevaluate the situation in light of the new system time.
  * (The system clock has been reset if timecheck != timechanged.)
+ *
+ * Generally, cv_timedwait_sig_hrtime() should be used instead of this
+ * routine.  It waits based on hrtime rather than wall-clock time and therefore
+ * does not need to deal with the time changing.
  */
 int
 cv_waituntil_sig(kcondvar_t *cvp, kmutex_t *mp,
--- a/usr/src/uts/common/sys/condvar.h	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/sys/condvar.h	Thu Aug 30 05:13:49 2012 -0700
@@ -23,7 +23,9 @@
  * Use is subject to license terms.
  */
 
-/* Copyright (c) 2011 by Delphix. All rights reserved. */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 /*
  * condvar.h:
@@ -95,10 +97,9 @@
 extern	clock_t	cv_reltimedwait(kcondvar_t *, kmutex_t *, clock_t, time_res_t);
 extern	int	cv_wait_sig(kcondvar_t *, kmutex_t *);
 extern	clock_t	cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
+extern	int	cv_timedwait_sig_hrtime(kcondvar_t *, kmutex_t *, hrtime_t);
 extern	clock_t	cv_reltimedwait_sig(kcondvar_t *, kmutex_t *, clock_t,
     time_res_t);
-extern	clock_t	cv_relwaituntil_sig(kcondvar_t *, kmutex_t *, clock_t *,
-    time_res_t);
 extern	int	cv_wait_sig_swap(kcondvar_t *, kmutex_t *);
 extern	int	cv_wait_sig_swap_core(kcondvar_t *, kmutex_t *, int *);
 extern	void	cv_signal(kcondvar_t *);
--- a/usr/src/uts/common/syscall/poll.c	Thu Aug 30 03:32:10 2012 -0700
+++ b/usr/src/uts/common/syscall/poll.c	Thu Aug 30 05:13:49 2012 -0700
@@ -24,11 +24,14 @@
  * Use is subject to license terms.
  */
 
-/* Copyright (c) 2011 by Delphix. All rights reserved. */
 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
 /*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+/*
  * Portions of this source code were derived from Berkeley 4.3 BSD
  * under license from the Regents of the University of California.
  */
@@ -288,9 +291,7 @@
 	proc_t *p = ttoproc(t);
 	int fdcnt = 0;
 	int i;
-	int imm_timeout = 0;
-	clock_t *deltap = NULL;
-	clock_t delta;
+	hrtime_t deadline; /* hrtime value when we want to return */
 	pollfd_t *pollfdp;
 	pollstate_t *ps;
 	pollcache_t *pcp;
@@ -301,24 +302,15 @@
 	/*
 	 * Determine the precise future time of the requested timeout, if any.
 	 */
-	if (tsp != NULL) {
-		if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
-			imm_timeout = 1;
-		} else {
-			/*
-			 * cv_relwaituntil_sig operates at
-			 * the tick granularity, which by default is 10 ms.
-			 * Convert the specified timespec to ticks, rounding
-			 * up to at least 1 tick to avoid flooding the
-			 * system with small high resolution timers.
-			 */
-			delta = SEC_TO_TICK(tsp->tv_sec) +
-			    NSEC_TO_TICK(tsp->tv_nsec);
-			if (delta < 1) {
-				delta = 1;
-			}
-			deltap = &delta;
-		}
+	if (tsp == NULL) {
+		deadline = -1;
+	} else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) {
+		deadline = 0;
+	} else {
+		/* They must wait at least a tick. */
+		deadline = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
+		deadline = MAX(deadline, nsec_per_tick);
+		deadline += gethrtime();
 	}
 
 	/*
@@ -351,16 +343,15 @@
 		/*
 		 * Sleep until we have passed the requested future
 		 * time or until interrupted by a signal.
-		 * Do not check for signals if we have a zero timeout.
+		 * Do not check for signals if we do not want to wait.
 		 */
-		if (!imm_timeout) {
+		if (deadline != 0) {
 			mutex_enter(&t->t_delay_lock);
-			while ((delta = cv_relwaituntil_sig(&t->t_delay_cv,
-			    &t->t_delay_lock, deltap, TR_MILLISEC)) > 0)
+			while ((error = cv_timedwait_sig_hrtime(&t->t_delay_cv,
+			    &t->t_delay_lock, deadline)) > 0)
 				continue;
 			mutex_exit(&t->t_delay_lock);
-			if (delta == 0)
-				error = EINTR;
+			error = (error == 0) ? EINTR : 0;
 		}
 		goto pollout;
 	}
@@ -550,20 +541,19 @@
 		 * Do not check for signals if we have a zero timeout.
 		 */
 		mutex_exit(&ps->ps_lock);
-		if (imm_timeout) {
-			delta = -1;
+		if (deadline == 0) {
+			error = -1;
 		} else {
-			delta = cv_relwaituntil_sig(&pcp->pc_cv, &pcp->pc_lock,
-			    deltap, TR_MILLISEC);
+			error = cv_timedwait_sig_hrtime(&pcp->pc_cv,
+			    &pcp->pc_lock, deadline);
 		}
 		mutex_exit(&pcp->pc_lock);
 		/*
 		 * If we have received a signal or timed out
 		 * then break out and return.
 		 */
-		if (delta <= 0) {
-			if (delta == 0)
-				error = EINTR;
+		if (error <= 0) {
+			error = (error == 0) ? EINTR : 0;
 			break;
 		}
 		/*