changeset 6057:275ef2021819

6664275 robust locks are not so robust after all
author raf
date Thu, 21 Feb 2008 13:40:27 -0800
parents 37f30782c577
children 8472859e569a
files usr/src/common/atomic/atomic.c usr/src/lib/libc/inc/thr_uberdata.h usr/src/lib/libc/port/threads/synch.c usr/src/uts/common/syscall/lwp_sobj.c usr/src/uts/intel/sys/synch32.h usr/src/uts/sparc/sys/synch32.h
diffstat 6 files changed, 153 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/common/atomic/atomic.c	Thu Feb 21 13:21:27 2008 -0800
+++ b/usr/src/common/atomic/atomic.c	Thu Feb 21 13:40:27 2008 -0800
@@ -18,8 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -421,7 +422,7 @@
 }
 
 uint64_t
-atomic_cas_uint64(volatile uint64_t *target, ulong_t cmp, uint64_t new)
+atomic_cas_64(volatile uint64_t *target, uint64_t cmp, uint64_t new)
 {
 	uint64_t old = *target;
 	if (old == cmp)
--- a/usr/src/lib/libc/inc/thr_uberdata.h	Thu Feb 21 13:21:27 2008 -0800
+++ b/usr/src/lib/libc/inc/thr_uberdata.h	Thu Feb 21 13:40:27 2008 -0800
@@ -102,6 +102,7 @@
  * and why they are different for sparc and intel.
  */
 #if defined(__sparc)
+
 /* lock.lock64.pad[x]	   4 5 6 7 */
 #define	LOCKMASK	0xff000000
 #define	WAITERMASK	0x000000ff
@@ -110,7 +111,15 @@
 #define	WAITER		0x00000001
 #define	LOCKSET		0xff
 #define	LOCKCLEAR	0
+
+#define	PIDSHIFT	32
+#define	LOCKMASK64	0xffffffffff000000ULL
+#define	LOCKBYTE64	0x00000000ff000000ULL
+#define	WAITERMASK64	0x00000000000000ffULL
+#define	SPINNERMASK64	0x0000000000ff0000ULL
+
 #elif defined(__x86)
+
 /* lock.lock64.pad[x]	   7 6 5 4 */
 #define	LOCKMASK	0xff000000
 #define	WAITERMASK	0x00ff0000
@@ -119,6 +128,13 @@
 #define	WAITER		0x00010000
 #define	LOCKSET		0x01
 #define	LOCKCLEAR	0
+
+#define	PIDSHIFT	0
+#define	LOCKMASK64	0xff000000ffffffffULL
+#define	LOCKBYTE64	0x0100000000000000ULL
+#define	WAITERMASK64	0x00ff000000000000ULL
+#define	SPINNERMASK64	0x0000ff0000000000ULL
+
 #else
 #error "neither __sparc nor __x86 is defined"
 #endif
--- a/usr/src/lib/libc/port/threads/synch.c	Thu Feb 21 13:21:27 2008 -0800
+++ b/usr/src/lib/libc/port/threads/synch.c	Thu Feb 21 13:40:27 2008 -0800
@@ -26,10 +26,12 @@
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
-#include <sys/sdt.h>
+#define	atomic_cas_64	_atomic_cas_64
 
 #include "lint.h"
 #include "thr_uberdata.h"
+#include <sys/sdt.h>
+#include <atomic.h>
 
 /*
  * This mutex is initialized to be held by lwp#1.
@@ -39,7 +41,6 @@
 mutex_t	stall_mutex = DEFAULTMUTEX;
 
 static int shared_mutex_held(mutex_t *);
-static int mutex_unlock_internal(mutex_t *, int);
 static int mutex_queuelock_adaptive(mutex_t *);
 static void mutex_wakeup_all(mutex_t *);
 
@@ -292,6 +293,43 @@
 }
 
 /*
+ * Same as clear_lockbyte(), but operates on mutex_lockword64.
+ * The mutex_ownerpid field is cleared along with the lock byte.
+ */
+static uint64_t
+clear_lockbyte64(volatile uint64_t *lockword64)
+{
+	uint64_t old;
+	uint64_t new;
+
+	do {
+		old = *lockword64;
+		new = old & ~LOCKMASK64;
+	} while (atomic_cas_64(lockword64, old, new) != old);
+
+	return (old);
+}
+
+/*
+ * Similar to set_lock_byte(), which only tries to set the lock byte.
+ * Here, we attempt to set the lock byte AND the mutex_ownerpid,
+ * keeping the remaining bytes constant.
+ */
+static int
+set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid)
+{
+	uint64_t old;
+	uint64_t new;
+
+	old = *lockword64 & ~LOCKMASK64;
+	new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64;
+	if (atomic_cas_64(lockword64, old, new) == old)
+		return (LOCKCLEAR);
+
+	return (LOCKSET);
+}
+
+/*
  * Increment the spinners count in the mutex lock word.
  * Return 0 on success.  Return -1 if the count would overflow.
  */
@@ -1171,11 +1209,11 @@
 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
 		ASSERT(mp->mutex_type & LOCK_ROBUST);
 		/*
-		 * We shouldn't own the mutex; clear the lock.
+		 * We shouldn't own the mutex.
+		 * Just clear the lock; everyone has already been waked up.
 		 */
 		mp->mutex_owner = 0;
-		if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK)
-			mutex_wakeup_all(mp);
+		(void) clear_lockbyte(&mp->mutex_lockword);
 		error = ENOTRECOVERABLE;
 	}
 
@@ -1246,7 +1284,7 @@
 	ulwp_t *self = curthread;
 	uberdata_t *udp = self->ul_uberdata;
 	int error = EBUSY;
-	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
+	volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64;
 	uint32_t new_lockword;
 	int count = 0;
 	int max_count;
@@ -1269,9 +1307,9 @@
 	 * incurring the overhead of the spin loop.
 	 */
 	enter_critical(self);
-	if (set_lock_byte(lockp) == 0) {
+	if (set_lock_byte64(lockp, udp->pid) == 0) {
 		mp->mutex_owner = (uintptr_t)self;
-		mp->mutex_ownerpid = udp->pid;
+		/* mp->mutex_ownerpid was set by set_lock_byte64() */
 		exit_critical(self);
 		error = 0;
 		goto done;
@@ -1299,9 +1337,10 @@
 	}
 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
 	for (count = 1; ; count++) {
-		if (*lockp == 0 && set_lock_byte(lockp) == 0) {
+		if ((*lockp & LOCKMASK64) == 0 &&
+		    set_lock_byte64(lockp, udp->pid) == 0) {
 			mp->mutex_owner = (uintptr_t)self;
-			mp->mutex_ownerpid = udp->pid;
+			/* mp->mutex_ownerpid was set by set_lock_byte64() */
 			error = 0;
 			break;
 		}
@@ -1326,9 +1365,9 @@
 		 * necessary for correctness, to avoid ending up with an
 		 * unheld mutex with waiters but no one to wake them up.
 		 */
-		if (set_lock_byte(lockp) == 0) {
+		if (set_lock_byte64(lockp, udp->pid) == 0) {
 			mp->mutex_owner = (uintptr_t)self;
-			mp->mutex_ownerpid = udp->pid;
+			/* mp->mutex_ownerpid was set by set_lock_byte64() */
 			error = 0;
 		}
 		count++;
@@ -1339,15 +1378,12 @@
 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
 		ASSERT(mp->mutex_type & LOCK_ROBUST);
 		/*
-		 * We shouldn't own the mutex; clear the lock.
+		 * We shouldn't own the mutex.
+		 * Just clear the lock; everyone has already been waked up.
 		 */
 		mp->mutex_owner = 0;
-		mp->mutex_ownerpid = 0;
-		if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK) {
-			no_preempt(self);
-			(void) ___lwp_mutex_wakeup(mp, 1);
-			preempt(self);
-		}
+		/* mp->mutex_ownerpid is cleared by clear_lockbyte64() */
+		(void) clear_lockbyte64(&mp->mutex_lockword64);
 		error = ENOTRECOVERABLE;
 	}
 
@@ -1481,8 +1517,8 @@
 	lwpid_t lwpid = 0;
 	uint32_t old_lockword;
 
+	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
 	mp->mutex_owner = 0;
-	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
 	old_lockword = clear_lockbyte(&mp->mutex_lockword);
 	if ((old_lockword & WAITERMASK) &&
 	    (release_all || (old_lockword & SPINNERMASK) == 0)) {
@@ -1504,14 +1540,14 @@
 static void
 mutex_unlock_process(mutex_t *mp, int release_all)
 {
-	uint32_t old_lockword;
-
+	uint64_t old_lockword64;
+
+	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
 	mp->mutex_owner = 0;
-	mp->mutex_ownerpid = 0;
-	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
-	old_lockword = clear_lockbyte(&mp->mutex_lockword);
-	if ((old_lockword & WAITERMASK) &&
-	    (release_all || (old_lockword & SPINNERMASK) == 0)) {
+	/* mp->mutex_ownerpid is cleared by clear_lockbyte64() */
+	old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64);
+	if ((old_lockword64 & WAITERMASK64) &&
+	    (release_all || (old_lockword64 & SPINNERMASK64) == 0)) {
 		ulwp_t *self = curthread;
 		no_preempt(self);	/* ensure a prompt wakeup */
 		(void) ___lwp_mutex_wakeup(mp, release_all);
@@ -1635,11 +1671,11 @@
 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
 		ASSERT(mp->mutex_type & LOCK_ROBUST);
 		/*
-		 * We shouldn't own the mutex; clear the lock.
+		 * We shouldn't own the mutex.
+		 * Just clear the lock; everyone has already been waked up.
 		 */
 		mp->mutex_owner = 0;
-		if (clear_lockbyte(&mp->mutex_lockword) & WAITERMASK)
-			mutex_wakeup_all(mp);
+		(void) clear_lockbyte(&mp->mutex_lockword);
 		error = ENOTRECOVERABLE;
 	}
 
@@ -1902,9 +1938,9 @@
 	 */
 	ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0);
 	enter_critical(self);
-	if (set_lock_byte(&mp->mutex_lockw) == 0) {
+	if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) {
 		mp->mutex_owner = (uintptr_t)self;
-		mp->mutex_ownerpid = udp->pid;
+		/* mp->mutex_ownerpid was set by set_lock_byte64() */
 		exit_critical(self);
 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
 		return (0);
@@ -2155,7 +2191,7 @@
 	if (mtype & LOCK_PRIO_INHERIT) {
 		no_preempt(self);
 		mp->mutex_owner = 0;
-		mp->mutex_ownerpid = 0;
+		/* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */
 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
 		mp->mutex_lockw = LOCKCLEAR;
 		error = ___lwp_mutex_unlock(mp);
@@ -2997,7 +3033,7 @@
 	self->ul_sp = stkptr();
 	self->ul_wchan = cvp;
 	mp->mutex_owner = 0;
-	mp->mutex_ownerpid = 0;
+	/* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */
 	if (mtype & LOCK_PRIO_INHERIT)
 		mp->mutex_lockw = LOCKCLEAR;
 	/*
--- a/usr/src/uts/common/syscall/lwp_sobj.c	Thu Feb 21 13:21:27 2008 -0800
+++ b/usr/src/uts/common/syscall/lwp_sobj.c	Thu Feb 21 13:40:27 2008 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -178,8 +178,14 @@
 			addr = ent->lwpchan_addr;
 			if (start <= addr && addr < end) {
 				*prev = ent->lwpchan_next;
+				/*
+				 * We do this only for the obsolete type
+				 * USYNC_PROCESS_ROBUST.  Otherwise robust
+				 * locks do not draw ELOCKUNMAPPED or
+				 * EOWNERDEAD due to being unmapped.
+				 */
 				if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
-				    (ent->lwpchan_type & LOCK_ROBUST))
+				    (ent->lwpchan_type & USYNC_PROCESS_ROBUST))
 					lwp_mutex_cleanup(ent, LOCK_UNMAPPED);
 				kmem_free(ent, sizeof (*ent));
 				atomic_add_32(&lcp->lwpchan_entries, -1);
@@ -234,7 +240,7 @@
 	lcp->lwpchan_mask = lcp->lwpchan_size - 1;
 	lcp->lwpchan_entries = 0;
 	lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size *
-		sizeof (lwpchan_hashbucket_t), KM_SLEEP);
+	    sizeof (lwpchan_hashbucket_t), KM_SLEEP);
 	lcp->lwpchan_next_data = NULL;
 
 	mutex_enter(&p->p_lcp_lock);
@@ -243,7 +249,7 @@
 			/* someone beat us to it */
 			mutex_exit(&p->p_lcp_lock);
 			kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
-				sizeof (lwpchan_hashbucket_t));
+			    sizeof (lwpchan_hashbucket_t));
 			kmem_free(lcp, sizeof (lwpchan_data_t));
 			return;
 		}
@@ -266,7 +272,7 @@
 			while (ent != NULL) {
 				next = ent->lwpchan_next;
 				newbucket = lwpchan_bucket(lcp,
-					(uintptr_t)ent->lwpchan_addr);
+				    (uintptr_t)ent->lwpchan_addr);
 				ent->lwpchan_next = newbucket->lwpchan_chain;
 				newbucket->lwpchan_chain = ent;
 				ent = next;
@@ -345,7 +351,7 @@
 	while (lcp != NULL) {
 		lwpchan_data_t *next_lcp = lcp->lwpchan_next_data;
 		kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
-			sizeof (lwpchan_hashbucket_t));
+		    sizeof (lwpchan_hashbucket_t));
 		kmem_free(lcp, sizeof (lwpchan_data_t));
 		lcp = next_lcp;
 	}
@@ -1037,7 +1043,8 @@
 	uint16_t flag;
 
 	fuword16_noerr(&lp->mutex_flag, &flag);
-	if ((flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) == 0) {
+	if ((flag &
+	    (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) {
 		flag |= lockflg;
 		suword16_noerr(&lp->mutex_flag, flag);
 	}
@@ -1531,7 +1538,7 @@
 	 */
 	if (release_all)
 		lwp_release_all(&lwpchan);
-	else if (lwp_release(&lwpchan, &waiters, 0) == 1)
+	else if (lwp_release(&lwpchan, &waiters, 0))
 		suword8_noerr(&lp->mutex_waiters, waiters);
 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
 out:
@@ -1669,6 +1676,8 @@
 		 * unlock the condition variable's mutex. (pagefaults are
 		 * possible here.)
 		 */
+		if (mtype & USYNC_PROCESS)
+			suword32_noerr(&mp->mutex_ownerpid, 0);
 		ulock_clear(&mp->mutex_lockw);
 		fuword8_noerr(&mp->mutex_waiters, &waiters);
 		if (waiters != 0) {
@@ -1684,7 +1693,7 @@
 			 * requestor will update the waiter bit correctly by
 			 * re-evaluating it.
 			 */
-			if (lwp_release(&m_lwpchan, &waiters, 0) > 0)
+			if (lwp_release(&m_lwpchan, &waiters, 0))
 				suword8_noerr(&mp->mutex_waiters, waiters);
 		}
 		m_locked = 0;
@@ -1788,6 +1797,8 @@
 	if (UPIMUTEX(mtype) == 0) {
 		lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
 		m_locked = 1;
+		if (mtype & USYNC_PROCESS)
+			suword32_noerr(&mp->mutex_ownerpid, 0);
 		ulock_clear(&mp->mutex_lockw);
 		fuword8_noerr(&mp->mutex_waiters, &waiters);
 		if (waiters != 0) {
@@ -1795,7 +1806,7 @@
 			 * See comment above on lock clearing and lwp_release()
 			 * success/failure.
 			 */
-			if (lwp_release(&m_lwpchan, &waiters, 0) > 0)
+			if (lwp_release(&m_lwpchan, &waiters, 0))
 				suword8_noerr(&mp->mutex_waiters, waiters);
 		}
 		m_locked = 0;
@@ -2563,7 +2574,7 @@
 		 * lwp_release() occurs, and the lock requestor will
 		 * update the waiter bit correctly by re-evaluating it.
 		 */
-		if (lwp_release(&mlwpchan, &mwaiters, 0) > 0)
+		if (lwp_release(&mlwpchan, &mwaiters, 0))
 			suword8_noerr(&mp->mutex_waiters, mwaiters);
 	}
 	lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
@@ -2661,7 +2672,7 @@
 		 * See comment above on lock clearing and lwp_release()
 		 * success/failure.
 		 */
-		if (lwp_release(&mlwpchan, &mwaiters, 0) > 0)
+		if (lwp_release(&mlwpchan, &mwaiters, 0))
 			suword8_noerr(&mp->mutex_waiters, mwaiters);
 	}
 	lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
@@ -2873,7 +2884,8 @@
 	}
 	if (ent->lwpchan_type & USYNC_PROCESS) {
 		fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid);
-		if (owner_pid != curproc->p_pid)
+		if ((UPIMUTEX(ent->lwpchan_type) || owner_pid != 0) &&
+		    owner_pid != curproc->p_pid)
 			goto out;
 	}
 	if (UPIMUTEX(ent->lwpchan_type)) {
@@ -2894,11 +2906,34 @@
 	} else {
 		lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
 		locked = 1;
-		(void) lwp_clear_mutex(lp, lockflg);
-		ulock_clear(&lp->mutex_lockw);
-		fuword8_noerr(&lp->mutex_waiters, &waiters);
-		if (waiters && lwp_release(&ent->lwpchan_lwpchan, &waiters, 0))
-			suword8_noerr(&lp->mutex_waiters, waiters);
+		if ((ent->lwpchan_type & USYNC_PROCESS) && owner_pid == 0) {
+			/*
+			 * There is no owner.  If there are waiters,
+			 * we should wake up one or all of them.
+			 * It doesn't hurt to wake them up in error
+			 * since they will just retry the lock and
+			 * go to sleep again if necessary.
+			 */
+			fuword8_noerr(&lp->mutex_waiters, &waiters);
+			if (waiters != 0) {	/* there are waiters */
+				fuword16_noerr(&lp->mutex_flag, &flag);
+				if (flag & LOCK_NOTRECOVERABLE) {
+					lwp_release_all(&ent->lwpchan_lwpchan);
+					suword8_noerr(&lp->mutex_waiters, 0);
+				} else if (lwp_release(&ent->lwpchan_lwpchan,
+				    &waiters, 0)) {
+					suword8_noerr(&lp->mutex_waiters,
+					    waiters);
+				}
+			}
+		} else {
+			(void) lwp_clear_mutex(lp, lockflg);
+			ulock_clear(&lp->mutex_lockw);
+			fuword8_noerr(&lp->mutex_waiters, &waiters);
+			if (waiters &&
+			    lwp_release(&ent->lwpchan_lwpchan, &waiters, 0))
+				suword8_noerr(&lp->mutex_waiters, waiters);
+		}
 		lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
 	}
 out:
@@ -3128,7 +3163,7 @@
 		    (flag & LOCK_NOTRECOVERABLE)) {
 			lwp_release_all(&lwpchan);
 			suword8_noerr(&lp->mutex_waiters, 0);
-		} else if (lwp_release(&lwpchan, &waiters, 0) == 1) {
+		} else if (lwp_release(&lwpchan, &waiters, 0)) {
 			suword8_noerr(&lp->mutex_waiters, waiters);
 		}
 	}
--- a/usr/src/uts/intel/sys/synch32.h	Thu Feb 21 13:21:27 2008 -0800
+++ b/usr/src/uts/intel/sys/synch32.h	Thu Feb 21 13:40:27 2008 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -41,7 +41,10 @@
 #define	mutex_magic		flags.magic
 #define	mutex_owner		data
 /* used to atomically operate on whole word via cas or swap instruction */
-#define	mutex_lockword		lock.lock32.lockword /* address of */
+#define	mutex_lockword		lock.lock32.lockword
+/* this requires cas64 */
+#define	mutex_lockword64	lock.owner64
+/* these are bytes */
 #define	mutex_lockw		lock.lock64.pad[7]
 #define	mutex_waiters		lock.lock64.pad[6]
 #define	mutex_spinners		lock.lock64.pad[5]
--- a/usr/src/uts/sparc/sys/synch32.h	Thu Feb 21 13:21:27 2008 -0800
+++ b/usr/src/uts/sparc/sys/synch32.h	Thu Feb 21 13:40:27 2008 -0800
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -41,7 +41,10 @@
 #define	mutex_magic		flags.magic
 #define	mutex_owner		data
 /* used to atomically operate on whole word via cas or swap instruction */
-#define	mutex_lockword		lock.lock32.lockword /* address of */
+#define	mutex_lockword		lock.lock32.lockword
+/* this requires cas64 */
+#define	mutex_lockword64	lock.owner64
+/* these are bytes */
 #define	mutex_lockw		lock.lock64.pad[4]
 #define	mutex_waiters		lock.lock64.pad[7]
 #define	mutex_spinners		lock.lock64.pad[5]