changeset 3642:9280dc401622

6265036 cv_waituntil_sig() often returns early
author qiao
date Tue, 13 Feb 2007 14:18:11 -0800
parents 4488c321dec8
children d5ffc0a2d216
files usr/src/uts/common/brand/lx/syscall/lx_futex.c usr/src/uts/common/fs/portfs/port.c usr/src/uts/common/fs/proc/prcontrol.c usr/src/uts/common/fs/proc/prdata.h usr/src/uts/common/io/devpoll.c usr/src/uts/common/os/aio.c usr/src/uts/common/os/callout.c usr/src/uts/common/os/clock.c usr/src/uts/common/os/condvar.c usr/src/uts/common/os/logsubr.c usr/src/uts/common/os/timers.c usr/src/uts/common/sys/callo.h usr/src/uts/common/sys/condvar.h usr/src/uts/common/sys/lwp_timer_impl.h usr/src/uts/common/sys/port_impl.h usr/src/uts/common/sys/time.h usr/src/uts/common/sys/timer.h usr/src/uts/common/syscall/lwp_timer.c usr/src/uts/common/syscall/lwpsys.c usr/src/uts/common/syscall/poll.c usr/src/uts/common/syscall/sem.c usr/src/uts/common/syscall/sigtimedwait.c
diffstat 22 files changed, 230 insertions(+), 174 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/brand/lx/syscall/lx_futex.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/brand/lx/syscall/lx_futex.c	Tue Feb 13 14:18:11 2007 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -212,7 +212,7 @@
 	err = 0;
 	while ((fw.fw_woken == 0) && (err == 0)) {
 		ret = cv_waituntil_sig(&fw.fw_cv, &futex_hash_lock[index],
-			timeout, timechanged);
+			timeout);
 		if (ret < 0)
 			err = set_errno(ETIMEDOUT);
 		else if (ret == 0)
--- a/usr/src/uts/common/fs/portfs/port.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/fs/portfs/port.c	Tue Feb 13 14:18:11 2007 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1140,7 +1140,6 @@
 	uint_t		tnent;
 	int		rval;
 	int		blocking = -1;
-	int		timecheck;
 	int		flag;
 	timespec_t	rqtime;
 	timespec_t	*rqtp = NULL;
@@ -1227,7 +1226,6 @@
 			goto portnowait;
 		}
 		rqtp = pgt->pgt_rqtp;
-		timecheck = pgt->pgt_timecheck;
 		pgt->pgt_flags |= PORTGET_WAIT_EVENTS;
 	} else {
 		/* check if enough events are available ... */
@@ -1251,7 +1249,6 @@
 
 		if (rqtp != NULL) {
 			timespec_t	now;
-			timecheck = timechanged;
 			gethrestime(&now);
 			timespecadd(rqtp, &now);
 		}
@@ -1293,7 +1290,7 @@
 		}
 
 		rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex,
-		    rqtp, timecheck);
+		    rqtp);
 
 		if (rval <= 0) {
 			error = (rval == 0) ? EINTR : ETIME;
@@ -1508,7 +1505,6 @@
 			}
 			if (rqtp != NULL) {
 				timespec_t	now;
-				pgt->pgt_timecheck = timechanged;
 				gethrestime(&now);
 				timespecadd(&pgt->pgt_rqtime, &now);
 			}
@@ -1517,7 +1513,6 @@
 			/* timeout already checked -> remember values */
 			pgt->pgt_rqtp = rqtp;
 			if (rqtp != NULL) {
-				pgt->pgt_timecheck = timecheck;
 				pgt->pgt_rqtime = *rqtp;
 			}
 		}
--- a/usr/src/uts/common/fs/proc/prcontrol.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/fs/proc/prcontrol.c	Tue Feb 13 14:18:11 2007 -0800
@@ -21,7 +21,7 @@
  */
 
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1081,7 +1081,6 @@
 	proc_t *p = pcp->prc_proc;
 	timestruc_t rqtime;
 	timestruc_t *rqtp = NULL;
-	int timecheck = 0;
 	kthread_t *t;
 	int error;
 
@@ -1091,7 +1090,6 @@
 		 */
 		timestruc_t now;
 
-		timecheck = timechanged;
 		gethrestime(&now);
 		rqtp = &rqtime;
 		rqtp->tv_sec = timeo / MILLISEC;
@@ -1107,7 +1105,7 @@
 			thread_unlock(t);
 			mutex_enter(&pcp->prc_mutex);
 			prunlock(pnp);
-			error = pr_wait(pcp, rqtp, timecheck);
+			error = pr_wait(pcp, rqtp);
 			if (error)	/* -1 is timeout */
 				return (error);
 			if ((error = prlock(pnp, ZNO)) != 0)
@@ -1126,7 +1124,7 @@
 			thread_unlock(t);
 			mutex_enter(&pcp->prc_mutex);
 			prunlock(pnp);
-			error = pr_wait(pcp, rqtp, timecheck);
+			error = pr_wait(pcp, rqtp);
 			if (error)	/* -1 is timeout */
 				return (error);
 			if ((error = prlock(pnp, ZNO)) != 0)
@@ -1284,13 +1282,12 @@
  */
 int
 pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
-	timestruc_t *ts,	/* absolute time of timeout, if any */
-	int timecheck)
+	timestruc_t *ts)	/* absolute time of timeout, if any */
 {
 	int rval;
 
 	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
-	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
+	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts);
 	mutex_exit(&pcp->prc_mutex);
 	switch (rval) {
 	case 0:
@@ -2045,7 +2042,7 @@
 		 * Wait for the agent to stop and notify us.
 		 * If we've been interrupted, return that information.
 		 */
-		error = pr_wait(pcp, NULL, 0);
+		error = pr_wait(pcp, NULL);
 		if (error == EINTR) {
 			error = 0;
 			break;
--- a/usr/src/uts/common/fs/proc/prdata.h	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/fs/proc/prdata.h	Tue Feb 13 14:18:11 2007 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -332,7 +332,7 @@
 extern	void	pr_stop(prnode_t *);
 extern	int	pr_wait_stop(prnode_t *, time_t);
 extern	int	pr_setrun(prnode_t *, ulong_t);
-extern	int	pr_wait(prcommon_t *, timestruc_t *, int);
+extern	int	pr_wait(prcommon_t *, timestruc_t *);
 extern	void	pr_wait_die(prnode_t *);
 extern	int	pr_setsig(prnode_t *, siginfo_t *);
 extern	int	pr_kill(prnode_t *, int, cred_t *);
--- a/usr/src/uts/common/io/devpoll.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/io/devpoll.c	Tue Feb 13 14:18:11 2007 -0800
@@ -20,7 +20,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -689,7 +689,6 @@
 	timestruc_t	now;
 	timestruc_t	rqtime;
 	timestruc_t	*rqtp = NULL;
-	int		timecheck = 0;
 	minor_t 	minor;
 	dp_entry_t	*dpep;
 	pollcache_t	*pcp;
@@ -698,7 +697,6 @@
 
 	if (cmd == DP_POLL) {
 		/* do this now, before we sleep on DP_WRITER_PRESENT below */
-		timecheck = timechanged;
 		gethrestime(&now);
 	}
 	minor = getminor(dev);
@@ -761,7 +759,7 @@
 				return (0);
 			mutex_enter(&curthread->t_delay_lock);
 			while ((rval = cv_waituntil_sig(&curthread->t_delay_cv,
-			    &curthread->t_delay_lock, rqtp, timecheck)) > 0)
+			    &curthread->t_delay_lock, rqtp)) > 0)
 				continue;
 			mutex_exit(&curthread->t_delay_lock);
 			return ((rval == 0)? EINTR : 0);
@@ -816,7 +814,7 @@
 			if (time_out == 0)	/* immediate timeout */
 				break;
 			rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock,
-				rqtp, timecheck);
+				rqtp);
 			/*
 			 * If we were awakened by a signal or timeout
 			 * then break the loop, else poll again.
--- a/usr/src/uts/common/os/aio.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/os/aio.c	Tue Feb 13 14:18:11 2007 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -529,7 +529,6 @@
 	aio_req_t	*reqp;
 	clock_t		status;
 	int		blocking;
-	int		timecheck;
 	timestruc_t	rqtime;
 	timestruc_t	*rqtp;
 
@@ -545,7 +544,6 @@
 		return (error);
 	if (rqtp) {
 		timestruc_t now;
-		timecheck = timechanged;
 		gethrestime(&now);
 		timespecadd(rqtp, &now);
 	}
@@ -575,7 +573,7 @@
 		}
 		if (blocking) {
 			status = cv_waituntil_sig(&aiop->aio_waitcv,
-			    &aiop->aio_mutex, rqtp, timecheck);
+			    &aiop->aio_mutex, rqtp);
 
 			if (status > 0)		/* check done queue again */
 				continue;
@@ -619,7 +617,6 @@
 	int		iocb_index = 0;
 	model_t		model = get_udatamodel();
 	int		blocking = 1;
-	int		timecheck;
 	timestruc_t	rqtime;
 	timestruc_t	*rqtp;
 
@@ -690,7 +687,6 @@
 	 */
 	if (rqtp) {
 		timestruc_t now;
-		timecheck = timechanged;
 		gethrestime(&now);
 		timespecadd(rqtp, &now);
 	}
@@ -752,7 +748,7 @@
 
 		if ((cnt < waitcnt) && blocking) {
 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
-				&aiop->aio_mutex, rqtp, timecheck);
+				&aiop->aio_mutex, rqtp);
 			if (rval > 0)
 				continue;
 			if (rval < 0) {
@@ -920,7 +916,6 @@
 	size_t		ssize;
 	model_t		model = get_udatamodel();
 	int		blocking;
-	int		timecheck;
 	timestruc_t	rqtime;
 	timestruc_t	*rqtp;
 
@@ -936,7 +931,6 @@
 		return (error);
 	if (rqtp) {
 		timestruc_t now;
-		timecheck = timechanged;
 		gethrestime(&now);
 		timespecadd(rqtp, &now);
 	}
@@ -1056,7 +1050,7 @@
 			 */
 			mutex_exit(&aiop->aio_cleanupq_mutex);
 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
-				&aiop->aio_mutex, rqtp, timecheck);
+				&aiop->aio_mutex, rqtp);
 			/*
 			 * we have to drop aio_mutex and
 			 * grab it in the right order.
--- a/usr/src/uts/common/os/callout.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/os/callout.c	Tue Feb 13 14:18:11 2007 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -79,6 +78,49 @@
 	CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)],	\
 	cp, c_lbnext, c_lbprev)
 
+#define	CALLOUT_HRES_INSERT(ct, cp, cnext, cprev, hresms)		\
+{									\
+	callout_t *nextp = ct->ct_hresq;				\
+	callout_t *prevp;						\
+									\
+	if (nextp == NULL || hresms <= nextp->c_hresms) {		\
+		cp->cnext = ct->ct_hresq;				\
+		ct->ct_hresq = cp;					\
+		cp->cprev = NULL;					\
+		if (cp->cnext != NULL)					\
+			cp->cnext->cprev = cp;				\
+	} else {							\
+		do {							\
+			prevp = nextp;					\
+			nextp = nextp->cnext;				\
+		} while (nextp != NULL && hresms > nextp->c_hresms);	\
+		prevp->cnext = cp;					\
+		cp->cprev = prevp;					\
+		cp->cnext = nextp;					\
+		if (nextp != NULL) 					\
+			nextp->cprev = cp;				\
+	}								\
+}
+
+#define	CALLOUT_HRES_DELETE(ct, cp, cnext, cprev, hresms)	\
+{								\
+	if (cp == ct->ct_hresq) {				\
+		ct->ct_hresq = cp->cnext;			\
+		if (cp->cnext != NULL)				\
+			cp->cnext->cprev = NULL;		\
+	} else {						\
+		cp->cprev->cnext = cp->cnext;			\
+		if (cp->cnext != NULL)				\
+			cp->cnext->cprev = cp->cprev;		\
+	}							\
+}
+
+#define	CALLOUT_HRES_UPDATE(INSDEL, ct, cp, id, hresms)		\
+	ASSERT(MUTEX_HELD(&ct->ct_lock));			\
+	ASSERT(cp->c_xid == id);				\
+	CALLOUT_HRES_##INSDEL(ct, cp, c_hrnext,			\
+	c_hrprev, hresms)
+
 /*
  * Allocate a callout structure.  We try quite hard because we
  * can't sleep, and if we can't do the allocation, we're toast.
@@ -106,9 +148,13 @@
 timeout_common(void (*func)(void *), void *arg, clock_t delta,
     callout_table_t *ct)
 {
-	callout_t *cp;
-	callout_id_t id;
-	clock_t runtime;
+	callout_t	*cp;
+	callout_id_t	id;
+	clock_t		runtime;
+	timestruc_t	now;
+	int64_t		hresms;
+
+	gethrestime(&now);
 
 	mutex_enter(&ct->ct_lock);
 
@@ -127,6 +173,11 @@
 		delta = 1;
 	cp->c_runtime = runtime = lbolt + delta;
 
+	/* Calculate the future time in milli-second */
+	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC +
+	    TICK_TO_MSEC(delta);
+	cp->c_hresms = hresms;
+
 	/*
 	 * Assign an ID to this callout
 	 */
@@ -140,6 +191,7 @@
 	cp->c_xid = id;
 
 	CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime);
+	CALLOUT_HRES_UPDATE(INSERT, ct, cp, id, hresms);
 
 	mutex_exit(&ct->ct_lock);
 
@@ -184,6 +236,7 @@
 			clock_t time_left = runtime - lbolt;
 
 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime);
+			CALLOUT_HRES_UPDATE(DELETE, ct, cp, id, 0);
 			cp->c_idnext = ct->ct_freelist;
 			ct->ct_freelist = cp;
 			mutex_exit(&ct->ct_lock);
@@ -245,9 +298,11 @@
 static void
 callout_execute(callout_table_t *ct)
 {
-	callout_t *cp;
-	callout_id_t xid;
-	clock_t runtime;
+	callout_t	*cp;
+	callout_id_t	xid;
+	clock_t		runtime;
+	timestruc_t	now;
+	int64_t		hresms;
 
 	mutex_enter(&ct->ct_lock);
 
@@ -267,14 +322,16 @@
 			mutex_enter(&ct->ct_lock);
 
 			/*
-			 * Delete callout from hash tables, return to freelist,
-			 * and tell anyone who cares that we're done.
+			 * Delete callout from both the hash tables and the
+			 * hres queue, return it to freelist, and tell anyone
+			 * who cares that we're done.
 			 * Even though we dropped and reacquired ct->ct_lock,
 			 * it's OK to pick up where we left off because only
 			 * newly-created timeouts can precede cp on ct_lbhash,
 			 * and those timeouts cannot be due on this tick.
 			 */
 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
+			CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
 			cp->c_idnext = ct->ct_freelist;
 			ct->ct_freelist = cp;
 			cp->c_xid = 0;	/* Indicate completion for c_done */
@@ -289,6 +346,44 @@
 		if (ct->ct_runtime == runtime)
 			ct->ct_runtime = runtime + 1;
 	}
+
+	gethrestime(&now);
+
+	/* Calculate the current time in milli-second */
+	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
+
+	cp = ct->ct_hresq;
+	while (cp != NULL && hresms >= cp->c_hresms) {
+		xid = cp->c_xid;
+		if (xid & CALLOUT_EXECUTING) {
+			cp = cp->c_hrnext;
+			continue;
+		}
+		cp->c_executor = curthread;
+		cp->c_xid = xid |= CALLOUT_EXECUTING;
+		runtime = cp->c_runtime;
+		mutex_exit(&ct->ct_lock);
+		DTRACE_PROBE1(callout__start, callout_t *, cp);
+		(*cp->c_func)(cp->c_arg);
+		DTRACE_PROBE1(callout__end, callout_t *, cp);
+		mutex_enter(&ct->ct_lock);
+
+		/*
+		 * See comments above.
+		 */
+		CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
+		CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
+		cp->c_idnext = ct->ct_freelist;
+		ct->ct_freelist = cp;
+		cp->c_xid = 0;	/* Indicate completion for c_done */
+		cv_broadcast(&cp->c_done);
+
+		/*
+		 * Start over from the head of the list, see if
+		 * any timeout bearing an earlier hres time.
+		 */
+		cp = ct->ct_hresq;
+	}
 	mutex_exit(&ct->ct_lock);
 }
 
@@ -298,8 +393,10 @@
 static void
 callout_schedule_1(callout_table_t *ct)
 {
-	callout_t *cp;
-	clock_t curtime, runtime;
+	callout_t	*cp;
+	clock_t		curtime, runtime;
+	timestruc_t	now;
+	int64_t		hresms;
 
 	mutex_enter(&ct->ct_lock);
 	ct->ct_curtime = curtime = lbolt;
@@ -320,6 +417,26 @@
 		}
 		ct->ct_runtime++;
 	}
+
+	gethrestime(&now);
+
+	/* Calculate the current time in milli-second */
+	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
+
+	cp = ct->ct_hresq;
+	while (cp != NULL && hresms >= cp->c_hresms) {
+		if (cp->c_xid & CALLOUT_EXECUTING) {
+			cp = cp->c_hrnext;
+			continue;
+		}
+		mutex_exit(&ct->ct_lock);
+		if (ct->ct_taskq == NULL)
+			softcall((void (*)(void *))callout_execute, ct);
+		else
+			(void) taskq_dispatch(ct->ct_taskq,
+			    (task_func_t *)callout_execute, ct, KM_NOSLEEP);
+		return;
+	}
 	mutex_exit(&ct->ct_lock);
 }
 
--- a/usr/src/uts/common/os/clock.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/os/clock.c	Tue Feb 13 14:18:11 2007 -0800
@@ -856,7 +856,7 @@
 					hrestime = tod;
 					membar_enter();	/* hrestime visible */
 					timedelta = 0;
-					timechanged++;
+					hrestime_isvalid = 1;
 					tod_needsync = 0;
 					hr_clock_unlock(s);
 				}
@@ -1787,16 +1787,20 @@
 	mutex_exit(&tod_lock);
 }
 
-int	timechanged;	/* for testing if the system time has been reset */
+int	hrestime_isvalid = 0;
 
 void
 set_hrestime(timestruc_t *ts)
 {
 	int spl = hr_clock_lock();
 	hrestime = *ts;
-	membar_enter();	/* hrestime must be visible before timechanged++ */
+	/*
+	 * hrestime must be visible before hrestime_isvalid
+	 * is set to 1
+	 */
+	membar_enter();
 	timedelta = 0;
-	timechanged++;
+	hrestime_isvalid = 1;
 	hr_clock_unlock(spl);
 }
 
--- a/usr/src/uts/common/os/condvar.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/os/condvar.c	Tue Feb 13 14:18:11 2007 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -619,19 +619,13 @@
  *	        >0 if awakened via cv_signal() or cv_broadcast()
  *		   or by a spurious wakeup.
  *		   (might return time remaining)
- * As a special test, if someone abruptly resets the system time
- * (but not through adjtime(2); drifting of the clock is allowed and
- * expected [see timespectohz_adj()]), then we force a return of -1
- * so the caller can return a premature timeout to the calling process
- * so it can reevaluate the situation in light of the new system time.
- * (The system clock has been reset if timecheck != timechanged.)
  */
 int
-cv_waituntil_sig(kcondvar_t *cvp, kmutex_t *mp,
-	timestruc_t *when, int timecheck)
+cv_waituntil_sig(kcondvar_t *cvp, kmutex_t *mp, timestruc_t *when)
 {
 	timestruc_t now;
 	timestruc_t delta;
+	clock_t ticks;
 	int rval;
 
 	if (when == NULL)
@@ -648,29 +642,20 @@
 		 */
 		rval = cv_timedwait_sig(cvp, mp, lbolt);
 	} else {
-		if (timecheck == timechanged) {
-			rval = cv_timedwait_sig(cvp, mp,
-				lbolt + timespectohz_adj(when, now));
-		} else {
-			/*
-			 * Someone reset the system time;
-			 * just force an immediate timeout.
-			 */
-			rval = -1;
-		}
-		if (rval == -1 && timecheck == timechanged) {
-			/*
-			 * Even though cv_timedwait_sig() returned showing a
-			 * timeout, the future time may not have passed yet.
-			 * If not, change rval to indicate a normal wakeup.
-			 */
-			gethrestime(&now);
-			delta = *when;
-			timespecsub(&delta, &now);
-			if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
-			    delta.tv_nsec > 0))
+		ticks = lbolt + timespectohz(when, now);
+		rval = cv_timedwait_sig(cvp, mp, ticks);
+
+		gethrestime(&now);
+		delta = *when;
+		timespecsub(&delta, &now);
+
+		/*
+		 * timeout is premature iff
+		 *	ticks >= lbolt  and  when > now
+		 */
+		if (rval == -1 && ticks >= lbolt && (delta.tv_sec > 0 ||
+		    (delta.tv_sec == 0 && delta.tv_nsec > 0)))
 				rval = 1;
-		}
 	}
 	return (rval);
 }
--- a/usr/src/uts/common/os/logsubr.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/os/logsubr.c	Tue Feb 13 14:18:11 2007 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -327,7 +327,7 @@
 		lc->flags |= SL_LOGONLY;
 
 		/*
-		 * The ttime is written with 0 in log_sensmsg() only when
+		 * The ttime is written with 0 in log_sendmsg() only when
 		 * good gethrestime_sec() data is not available to store in
 		 * the log_ctl_t in the early boot phase.
 		 */
@@ -605,16 +605,12 @@
 	log_enter();
 
 	/*
-	 * In the early boot phase hrestime is invalid, then timechanged is 0.
-	 * If hrestime is not valid, the ttime is set to 0 here and the correct
-	 * ttime is calculated in log_conswitch() later. The log_conswitch()
-	 * calculation to determine the correct ttime does not use ttime data
-	 * from these log_ctl_t structures; it only uses ttime from log_ctl_t's
-	 * that contain good data.
-	 *
+	 * If we are still in the early boot phase and the hrestime is invalid,
+	 * we set ttime to 0 so that log_conswitch() can determine the correct
+	 * ttime with a log_ctl_t structure which contains a valid ttime stamp.
 	 */
 	lc->ltime = lbolt;
-	if (timechanged) {
+	if (hrestime_isvalid) {
 		lc->ttime = gethrestime_sec();
 	} else {
 		lc->ttime = 0;
--- a/usr/src/uts/common/os/timers.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/os/timers.c	Tue Feb 13 14:18:11 2007 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -849,28 +849,6 @@
 }
 
 /*
- * Same as timespectohz() except that we adjust the clock ticks down a bit.
- * If we will be waiting for a long time, we may encounter skewing problems
- * due to adjtime() system calls.  Since we can skew up to 1/16 lbolt rate
- * if adjtime is going crazy, we reduce the time delta since timeout() takes
- * clock ticks rather than wallclock elapsed time.  This may cause the caller
- * (who calls timeout()) to return with a timeout prematurely and callers
- * must accommodate this.  See lwp_timeout(), queue_lwptimer() and
- * cv_waituntil_sig(), currently the only callers of this function.
- */
-clock_t
-timespectohz_adj(timespec_t *tv, timespec_t now)
-{
-	timespec_t wait_time = *tv;
-
-	timespecsub(&wait_time, &now);
-	wait_time.tv_sec -= wait_time.tv_sec >> 4;
-	wait_time.tv_nsec -= wait_time.tv_nsec >> 4;
-	timespecadd(&wait_time, &now);
-	return (timespectohz(&wait_time, now));
-}
-
-/*
  * hrt2ts(): convert from hrtime_t to timestruc_t.
  *
  * All this routine really does is:
@@ -1183,7 +1161,6 @@
 	timespec_t rqtime;
 	timespec_t rmtime;
 	timespec_t now;
-	int timecheck;
 	int ret = 1;
 	model_t datamodel = get_udatamodel();
 
@@ -1203,12 +1180,11 @@
 		return (set_errno(EINVAL));
 
 	if (timerspecisset(&rqtime)) {
-		timecheck = timechanged;
 		gethrestime(&now);
 		timespecadd(&rqtime, &now);
 		mutex_enter(&curthread->t_delay_lock);
 		while ((ret = cv_waituntil_sig(&curthread->t_delay_cv,
-		    &curthread->t_delay_lock, &rqtime, timecheck)) > 0)
+		    &curthread->t_delay_lock, &rqtime)) > 0)
 			continue;
 		mutex_exit(&curthread->t_delay_lock);
 	}
--- a/usr/src/uts/common/sys/callo.h	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/sys/callo.h	Tue Feb 13 14:18:11 2007 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,8 @@
 
 
 /*
- * Copyright (c) 1997-1998 by Sun Microsystems, Inc.
- * All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
  */
 
 #ifndef _SYS_CALLO_H
@@ -51,8 +50,11 @@
 	struct callout	*c_idprev;	/* prev in ID hash */
 	struct callout	*c_lbnext;	/* next in lbolt hash */
 	struct callout	*c_lbprev;	/* prev in lbolt hash */
+	struct callout	*c_hrnext;	/* next in hres queue */
+	struct callout	*c_hrprev;	/* prev in hres queue */
 	callout_id_t	c_xid;		/* extended callout ID; see below */
 	clock_t		c_runtime;	/* absolute run time */
+	int64_t		c_hresms;	/* hres in milli-second */
 	void		(*c_func)(void *); /* function to call */
 	void		*c_arg;		/* argument to function */
 	kthread_id_t	c_executor;	/* thread executing callout */
@@ -125,6 +127,7 @@
 	callout_id_t	ct_long_id;	/* most recently issued long-term ID */
 	callout_t 	*ct_idhash[CALLOUT_BUCKETS];	/* ID hash chains */
 	callout_t 	*ct_lbhash[CALLOUT_BUCKETS];	/* lbolt hash chains */
+	callout_t	*ct_hresq;	/* hres sorted queue */
 } callout_table_t;
 
 #ifdef	_KERNEL
--- a/usr/src/uts/common/sys/condvar.h	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/sys/condvar.h	Tue Feb 13 14:18:11 2007 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -82,7 +81,7 @@
 extern	int	cv_wait_sig_swap_core(kcondvar_t *, kmutex_t *, int *);
 extern	void	cv_signal(kcondvar_t *);
 extern	void	cv_broadcast(kcondvar_t *);
-extern	int	cv_waituntil_sig(kcondvar_t *, kmutex_t *, timestruc_t *, int);
+extern	int	cv_waituntil_sig(kcondvar_t *, kmutex_t *, timestruc_t *);
 
 #endif	/* defined(_KERNEL) */
 
--- a/usr/src/uts/common/sys/lwp_timer_impl.h	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/sys/lwp_timer_impl.h	Tue Feb 13 14:18:11 2007 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -42,7 +41,7 @@
 	kthread_t	*lwpt_thread;
 	timespec_t	*lwpt_tsp;
 	timespec_t	lwpt_rqtime;
-	int		lwpt_timecheck;
+	clock_t		lwpt_lbolt;
 	int		lwpt_imm_timeout;
 	int		lwpt_time_error;
 	timeout_id_t	lwpt_id;
--- a/usr/src/uts/common/sys/port_impl.h	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/sys/port_impl.h	Tue Feb 13 14:18:11 2007 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -182,7 +182,6 @@
 typedef struct	port_gettimer {
 	ushort_t	pgt_flags;
 	ushort_t	pgt_loop;
-	int		pgt_timecheck;
 	timespec_t	pgt_rqtime;
 	timespec_t	*pgt_rqtp;
 	struct timespec	*pgt_timeout;
--- a/usr/src/uts/common/sys/time.h	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/sys/time.h	Tue Feb 13 14:18:11 2007 -0800
@@ -272,7 +272,7 @@
 } todinfo_t;
 
 extern	int64_t		timedelta;
-extern	int		timechanged;
+extern	int		hrestime_isvalid;
 extern	int		tod_needsync;
 extern	kmutex_t	tod_lock;
 extern	timestruc_t	hrestime;
--- a/usr/src/uts/common/sys/timer.h	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/sys/timer.h	Tue Feb 13 14:18:11 2007 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -99,7 +99,6 @@
 extern	void	timer_lwpexit(void);
 extern	clock_t	hzto(struct timeval *);
 extern	clock_t	timespectohz(timespec_t *, timespec_t);
-extern	clock_t	timespectohz_adj(timespec_t *, timespec_t);
 extern	int	itimerspecfix(timespec_t *);
 extern	void	timespecadd(timespec_t *, timespec_t *);
 extern	void	timespecsub(timespec_t *, timespec_t *);
--- a/usr/src/uts/common/syscall/lwp_timer.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/syscall/lwp_timer.c	Tue Feb 13 14:18:11 2007 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -59,16 +58,17 @@
 
 	mutex_enter(&t->t_delay_lock);
 	gethrestime(&now);
+
 	/*
-	 * Requeue the timeout if no one has reset the system time
-	 * and if the absolute future time has not been reached.
+	 * timeout is premature iff
+	 *	lwpt_lbolt >= lbolt  and  when > now
 	 */
-	if (lwptp->lwpt_timecheck == timechanged &&
+	if (lwptp->lwpt_lbolt >= lbolt &&
 	    (lwptp->lwpt_rqtime.tv_sec > now.tv_sec ||
 	    (lwptp->lwpt_rqtime.tv_sec == now.tv_sec &&
 	    lwptp->lwpt_rqtime.tv_nsec > now.tv_nsec))) {
 		lwptp->lwpt_id = realtime_timeout(lwp_timer_timeout, lwptp,
-			timespectohz_adj(&lwptp->lwpt_rqtime, now));
+			timespectohz(&lwptp->lwpt_rqtime, now));
 	} else {
 		/*
 		 * Set the thread running only if it is asleep on
@@ -93,7 +93,6 @@
 
 	if (tsp == NULL)	/* not really an error, just need to bzero() */
 		goto err;
-	lwptp->lwpt_timecheck = timechanged; /* do this before gethrestime() */
 	gethrestime(&now);		/* do this before copyin() */
 	if (curproc->p_model == DATAMODEL_NATIVE) {
 		if (copyin(tsp, &lwptp->lwpt_rqtime, sizeof (timespec_t))) {
@@ -128,6 +127,8 @@
 		lwptp->lwpt_id = 0;
 		lwptp->lwpt_imm_timeout = 0;
 		timespecadd(&lwptp->lwpt_rqtime, &now);
+		lwptp->lwpt_lbolt = lbolt +
+		    timespectohz(&lwptp->lwpt_rqtime, now);
 	}
 	return (0);
 err:
@@ -144,7 +145,12 @@
 	ASSERT(lwptp->lwpt_thread == curthread);
 	ASSERT(MUTEX_HELD(&curthread->t_delay_lock));
 	gethrestime(&now);
-	if (lwptp->lwpt_timecheck == timechanged &&
+
+	/*
+	 * timeout is premature iff
+	 *	lwpt_lbolt >= lbolt  and  when > now
+	 */
+	if (lwptp->lwpt_lbolt >= lbolt &&
 	    (lwptp->lwpt_rqtime.tv_sec > now.tv_sec ||
 	    (lwptp->lwpt_rqtime.tv_sec == now.tv_sec &&
 	    lwptp->lwpt_rqtime.tv_nsec > now.tv_nsec))) {
@@ -152,7 +158,7 @@
 		 * Queue the timeout.
 		 */
 		lwptp->lwpt_id = realtime_timeout(lwp_timer_timeout, lwptp,
-			timespectohz_adj(&lwptp->lwpt_rqtime, now));
+			timespectohz(&lwptp->lwpt_rqtime, now));
 		return (0);
 	}
 
--- a/usr/src/uts/common/syscall/lwpsys.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/syscall/lwpsys.c	Tue Feb 13 14:18:11 2007 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -403,7 +402,6 @@
 	timespec_t now;
 	timespec_t *rqtp = NULL;
 	kthread_t *t = curthread;
-	int timecheck = 0;
 	int error = 0;
 	model_t datamodel = ttoproc(t)->p_model;
 
@@ -411,7 +409,6 @@
 		(void) lwp_unpark(lwpid);
 
 	if (timeoutp) {
-		timecheck = timechanged;
 		gethrestime(&now);
 		if (datamodel == DATAMODEL_NATIVE) {
 			if (copyin(timeoutp, &rqtime, sizeof (timespec_t))) {
@@ -446,7 +443,7 @@
 		error = EINTR;
 	while (error == 0 && t->t_unpark == 0) {
 		switch (cv_waituntil_sig(&t->t_delay_cv,
-		    &t->t_delay_lock, rqtp, timecheck)) {
+		    &t->t_delay_lock, rqtp)) {
 		case 0:
 			error = EINTR;
 			break;
--- a/usr/src/uts/common/syscall/poll.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/syscall/poll.c	Tue Feb 13 14:18:11 2007 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -290,7 +290,6 @@
 	int rval;
 	int i;
 	timespec_t *rqtp = NULL;
-	int timecheck = 0;
 	int imm_timeout = 0;
 	pollfd_t *pollfdp;
 	pollstate_t *ps;
@@ -307,7 +306,6 @@
 			imm_timeout = 1;
 		else {
 			timespec_t now;
-			timecheck = timechanged;
 			gethrestime(&now);
 			rqtp = tsp;
 			timespecadd(rqtp, &now);
@@ -348,7 +346,7 @@
 		if (!imm_timeout) {
 			mutex_enter(&t->t_delay_lock);
 			while ((rval = cv_waituntil_sig(&t->t_delay_cv,
-			    &t->t_delay_lock, rqtp, timecheck)) > 0)
+			    &t->t_delay_lock, rqtp)) > 0)
 				continue;
 			mutex_exit(&t->t_delay_lock);
 			if (rval == 0)
@@ -546,7 +544,7 @@
 			rval = -1;
 		else
 			rval = cv_waituntil_sig(&pcp->pc_cv, &pcp->pc_lock,
-				rqtp, timecheck);
+				rqtp);
 		mutex_exit(&pcp->pc_lock);
 		/*
 		 * If we have received a signal or timed out
--- a/usr/src/uts/common/syscall/sem.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/syscall/sem.c	Tue Feb 13 14:18:11 2007 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -874,7 +874,6 @@
 	struct sembuf	*uops;	/* ptr to copy of user ops */
 	struct sembuf 	x_sem;	/* avoid kmem_alloc's */
 	timespec_t	now, ts, *tsp = NULL;
-	int		timecheck = 0;
 	int		cvres, needundo, mode;
 	struct sem_undo	*undo;
 	proc_t		*pp = curproc;
@@ -898,7 +897,6 @@
 	 * we can legally not validate 'timeout' if it is unused.
 	 */
 	if (timeout != NULL) {
-		timecheck = timechanged;
 		gethrestime(&now);
 		if (error = compute_timeout(&tsp, &ts, &now, timeout))
 			return (set_errno(error));
@@ -1089,8 +1087,7 @@
 				ipc_hold(sem_svc, (kipc_perm_t *)sp);
 			}
 			semp->semncnt++;
-			cvres = cv_waituntil_sig(&semp->semncnt_cv, lock,
-				tsp, timecheck);
+			cvres = cv_waituntil_sig(&semp->semncnt_cv, lock, tsp);
 			lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
 
 			if (!IPC_FREE(&sp->sem_perm)) {
@@ -1126,8 +1123,7 @@
 				ipc_hold(sem_svc, (kipc_perm_t *)sp);
 			}
 			semp->semzcnt++;
-			cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock,
-				tsp, timecheck);
+			cvres = cv_waituntil_sig(&semp->semzcnt_cv, lock, tsp);
 			lock = ipc_relock(sem_svc, sp->sem_perm.ipc_id, lock);
 
 			/*
--- a/usr/src/uts/common/syscall/sigtimedwait.c	Tue Feb 13 10:15:31 2007 -0800
+++ b/usr/src/uts/common/syscall/sigtimedwait.c	Tue Feb 13 14:18:11 2007 -0800
@@ -20,7 +20,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -102,7 +102,6 @@
 	proc_t *p = ttoproc(t);
 	timespec_t sig_timeout;
 	timespec_t *rqtp = NULL;
-	int timecheck = 0;
 	int ret;
 	int error = 0;
 	k_siginfo_t info, *infop;
@@ -111,7 +110,6 @@
 	if (timeoutp) {
 		timespec_t now;
 
-		timecheck = timechanged;
 		gethrestime(&now);
 		if (datamodel == DATAMODEL_NATIVE) {
 			if (copyin(timeoutp, &sig_timeout,
@@ -153,7 +151,7 @@
 	 * the absolute future time is passed.
 	 */
 	while ((ret = cv_waituntil_sig(&t->t_delay_cv, &p->p_lock,
-	    rqtp, timecheck)) > 0)
+	    rqtp)) > 0)
 		continue;
 	if (ret == -1)
 		error = EAGAIN;