changeset 11130:ce5c27fd996f

6885655 Oban master panicked in RESYNC_NEXT during cluster shutdown
author James Hall <James.Hall@Sun.COM>
date Fri, 20 Nov 2009 10:46:15 +0000
parents 54c132bf34c7
children 3feab67d3ed4
files usr/src/uts/common/io/lvm/md/md_mddb.c usr/src/uts/common/io/lvm/md/md_subr.c usr/src/uts/common/io/lvm/mirror/mirror.c usr/src/uts/common/io/lvm/mirror/mirror_resync.c usr/src/uts/common/io/lvm/softpart/sp.c
diffstat 5 files changed, 87 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/lvm/md/md_mddb.c	Fri Nov 20 11:21:11 2009 +0100
+++ b/usr/src/uts/common/io/lvm/md/md_mddb.c	Fri Nov 20 10:46:15 2009 +0000
@@ -6936,7 +6936,7 @@
 			mddb_parse_msg->msg_lb_flags[i] =
 			    lbp->lb_locators[i].l_flags;
 		}
-		kresult = kmem_zalloc(sizeof (md_mn_kresult_t), KM_SLEEP);
+		kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 		while (rval != 0) {
 			rval = mdmn_ksend_message(s->s_setno,
 			    MD_MN_MSG_MDDB_PARSE, 0, 0,
@@ -9739,7 +9739,7 @@
 			 * the master node.  Once devids are turned on
 			 * for MN disksets, can send devid.
 			 */
-			kres = kmem_zalloc(sizeof (md_mn_kresult_t), KM_SLEEP);
+			kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 			msg_recerr = kmem_zalloc(
 			    sizeof (md_mn_msg_mddb_optrecerr_t), KM_SLEEP);
 			while (!(md_get_setstatus(s->s_setno) &
--- a/usr/src/uts/common/io/lvm/md/md_subr.c	Fri Nov 20 11:21:11 2009 +0100
+++ b/usr/src/uts/common/io/lvm/md/md_subr.c	Fri Nov 20 10:46:15 2009 +0000
@@ -716,7 +716,7 @@
 				mddb_parse_msg->msg_lb_flags[i] =
 				    lbp->lb_locators[i].l_flags;
 			}
-			kresult = kmem_zalloc(sizeof (md_mn_kresult_t),
+			kresult = kmem_alloc(sizeof (md_mn_kresult_t),
 			    KM_SLEEP);
 			while (rval != 0) {
 				flag = 0;
@@ -3921,9 +3921,11 @@
  * mirror owner, and MD_MSGF_DIRECTED will be set in the flags.  Non-owner
  * nodes will not receive these messages.
  *
- * For the case where md_mn_is_commd_present() is false, we rely on the
- * "result" having been kmem_zalloc()ed which, in effect, sets MDMNE_NULL for
- * kmmr_comm_state making MDMN_KSEND_MSG_OK() result in 0.
+ * For the case where md_mn_is_commd_present() is false, we simply pre-set
+ * the result->kmmr_comm_state to MDMNE_RPC_FAIL.
+ * This covers the case where the service mdcommd has been killed and so we do
+ * not get a 'new' result structure copied back. Instead we return with the
+ * supplied result field, and we need to flag a failure to the caller.
  */
 int
 mdmn_ksend_message(
@@ -3942,6 +3944,15 @@
 	int		rval;
 	k_sigset_t	oldmask, newmask;
 
+	/*
+	 * Ensure that we default to a recoverable failure state if the
+	 * door upcall cannot pass the request on to rpc.mdcommd.
+	 * This may occur when shutting the node down while there is still
+	 * a mirror resync or metadevice state update occurring.
+	 */
+	result->kmmr_comm_state = MDMNE_RPC_FAIL;
+	result->kmmr_exitval = ~0;
+
 	if (size > MDMN_MAX_KMSG_DATA)
 		return (ENOMEM);
 	kmsg = kmem_zalloc(sizeof (md_mn_kmsg_t), KM_SLEEP);
@@ -4096,7 +4107,7 @@
 
 	if (lockp)
 		IOLOCK_RETURN_RELEASE(0, lockp);
-	kres = kmem_zalloc(sizeof (md_mn_kresult_t), KM_SLEEP);
+	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 
 	/*
 	 * Mask signals for the mdmd_ksend_message call.  This keeps the door
@@ -4143,7 +4154,7 @@
 	 * The check open message doesn't have to be logged, nor should the
 	 * result be stored in the MCT. We want an up-to-date state.
 	 */
-	kresult = kmem_zalloc(sizeof (md_mn_kresult_t), KM_SLEEP);
+	kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 
 	/*
 	 * Mask signals for the mdmd_ksend_message call.  This keeps the door
--- a/usr/src/uts/common/io/lvm/mirror/mirror.c	Fri Nov 20 11:21:11 2009 +0100
+++ b/usr/src/uts/common/io/lvm/mirror/mirror.c	Fri Nov 20 10:46:15 2009 +0000
@@ -216,6 +216,7 @@
 send_poke_hotspares_msg(daemon_request_t *drq)
 {
 	int			rval;
+	int			nretries = 0;
 	md_mn_msg_pokehsp_t	pokehsp;
 	md_mn_kresult_t		*kresult;
 	set_t			setno = (set_t)drq->dq.qlen;
@@ -223,6 +224,8 @@
 	pokehsp.pokehsp_setno = setno;
 
 	kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
+
+retry_sphmsg:
 	rval = mdmn_ksend_message(setno, MD_MN_MSG_POKE_HOTSPARES,
 	    MD_MSGF_NO_LOG | MD_MSGF_NO_BCAST, 0, (char *)&pokehsp,
 	    sizeof (pokehsp), kresult);
@@ -234,6 +237,13 @@
 			while (!md_mn_is_commd_present()) {
 				delay(md_hz);
 			}
+			/*
+			 * commd has become reachable again, so retry once.
+			 * If this fails we'll panic as the system is in an
+			 * unexpected state.
+			 */
+			if (nretries++ == 0)
+				goto retry_sphmsg;
 		}
 		cmn_err(CE_PANIC,
 		    "ksend_message failure: POKE_HOTSPARES");
@@ -419,6 +429,7 @@
 	md_mn_kresult_t		*kresult;
 	mm_unit_t		*new_un;
 	int			rval;
+	int			nretries = 0;
 
 	mnum = MD_SID(un);
 	setno = MD_UN2SET(un);
@@ -475,6 +486,8 @@
 		}
 
 		kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
+
+cc4hs_msg:
 		rval = mdmn_ksend_message(setno, msgtype, msgflags, 0,
 		    (char *)&allochspmsg, sizeof (allochspmsg),
 		    kresult);
@@ -503,6 +516,13 @@
 				while (!md_mn_is_commd_present()) {
 					delay(md_hz);
 				}
+				/*
+				 * commd has become reachable again, so retry
+				 * once. If this fails we'll panic as the
+				 * system is in an unexpected state.
+				 */
+				if (nretries++ == 0)
+					goto cc4hs_msg;
 			}
 			cmn_err(CE_PANIC,
 			    "ksend_message failure: ALLOCATE_HOTSPARE");
@@ -2411,6 +2431,7 @@
 	md_mn_msgtype_t		msgtype;
 	int			save_lock = 0;
 	mdi_unit_t		*ui_sm;
+	int			nretries = 0;
 
 	sm = &un->un_sm[smi];
 	smic = &un->un_smic[smi];
@@ -2569,6 +2590,7 @@
 		}
 
 		kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
+sscs_msg:
 		rval = mdmn_ksend_message(setno, msgtype, msgflags, 0,
 		    (char *)&stchmsg, sizeof (stchmsg), kresult);
 
@@ -2579,6 +2601,13 @@
 				while (!md_mn_is_commd_present()) {
 					delay(md_hz);
 				}
+				/*
+				 * commd is now available; retry the message
+				 * one time. If that fails we fall through and
+				 * panic as the system is in an unexpected state
+				 */
+				if (nretries++ == 0)
+					goto sscs_msg;
 			}
 			cmn_err(CE_PANIC,
 			    "ksend_message failure: STATE_UPDATE");
--- a/usr/src/uts/common/io/lvm/mirror/mirror_resync.c	Fri Nov 20 11:21:11 2009 +0100
+++ b/usr/src/uts/common/io/lvm/mirror/mirror_resync.c	Fri Nov 20 10:46:15 2009 +0000
@@ -1018,6 +1018,7 @@
 	md_mn_kresult_t		*kres;
 	int			dont_send = 0;
 	int			rval;
+	int			nretries = 0;
 
 	rmsg = (md_mn_msg_resync_t *)un->un_rs_msg;
 
@@ -1073,6 +1074,7 @@
 	md_unit_writerexit(ui);
 	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 
+smrd_msg:
 	mutex_enter(&un->un_rs_cpr_mx);
 	CALLB_CPR_SAFE_BEGIN(&un->un_rs_cprinfo);
 
@@ -1091,6 +1093,13 @@
 			while (!md_mn_is_commd_present()) {
 				delay(md_hz);
 			}
+			/*
+			 * commd is now available again. Retry the message once.
+			 * If this fails we panic as the system is in an
+			 * unexpected state.
+			 */
+			if (nretries++ == 0)
+				goto smrd_msg;
 		}
 		cmn_err(CE_PANIC, "ksend_message failure: RESYNC_PHASE_DONE");
 	}
@@ -1122,6 +1131,7 @@
 	md_mps_t		*ps;
 	mm_submirror_t		*sm;
 	int			smi;
+	int			nretries = 0;
 
 	ASSERT(rmsg != NULL);
 #ifdef DEBUG
@@ -1159,6 +1169,7 @@
 	md_unit_readerexit(ui);
 	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 
+smrn_msg:
 	mutex_enter(&un->un_rs_cpr_mx);
 	CALLB_CPR_SAFE_BEGIN(&un->un_rs_cprinfo);
 
@@ -1175,6 +1186,13 @@
 			while (!md_mn_is_commd_present()) {
 				delay(md_hz);
 			}
+			/*
+			 * commd is now available again. Retry the message once.
+			 * If this fails we panic as the system is in an
+			 * unexpected state.
+			 */
+			if (nretries++ == 0)
+				goto smrn_msg;
 		}
 		cmn_err(CE_PANIC, "ksend_message failure: RESYNC_NEXT");
 	}
@@ -2330,6 +2348,7 @@
 	char		cpr_name[23];	/* Unique CPR name */
 	int		rs_copysize;
 	char		*rs_buffer;
+	int		nretries = 0;
 
 resync_restart:
 #ifdef DEBUG
@@ -2652,6 +2671,7 @@
 
 			kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 
+smrf_msg:
 			mutex_enter(&un->un_rs_cpr_mx);
 			CALLB_CPR_SAFE_BEGIN(&un->un_rs_cprinfo);
 
@@ -2671,6 +2691,14 @@
 					while (!md_mn_is_commd_present()) {
 						delay(md_hz);
 					}
+					/*
+					 * commd is now available again. Retry
+					 * the message once. If this fails we
+					 * panic as the system is in an
+					 * unexpected state.
+					 */
+					if (nretries++ == 0)
+						goto smrf_msg;
 				}
 				cmn_err(CE_PANIC,
 				    "ksend_message failure: RESYNC_FINISH");
@@ -3122,7 +3150,7 @@
 	 * TODO: make this a kmem_cache pool to improve
 	 * alloc/free performance ???
 	 */
-	kres = (md_mn_kresult_t *)kmem_zalloc(sizeof (md_mn_kresult_t),
+	kres = (md_mn_kresult_t *)kmem_alloc(sizeof (md_mn_kresult_t),
 	    KM_SLEEP);
 	rr = (md_mn_msg_rr_dirty_t *)kmem_alloc(sizeof (md_mn_msg_rr_dirty_t),
 	    KM_SLEEP);
--- a/usr/src/uts/common/io/lvm/softpart/sp.c	Fri Nov 20 11:21:11 2009 +0100
+++ b/usr/src/uts/common/io/lvm/softpart/sp.c	Fri Nov 20 10:46:15 2009 +0000
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -335,12 +335,14 @@
 	set_t		setno = MD_UN2SET(un);
 	int		rval;
 	const char	*str = (status == MD_SP_ERR) ? "MD_SP_ERR" : "MD_SP_OK";
+	int		nretries = 0;
 
 	sp_msg.sp_setstat_mnum = MD_SID(un);
 	sp_msg.sp_setstat_status = status;
 
 	kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
 
+spss_msg:
 	rval = mdmn_ksend_message(setno, MD_MN_MSG_SP_SETSTAT2, MD_MSGF_NO_LOG,
 	    0, (char *)&sp_msg, sizeof (sp_msg), kres);
 
@@ -351,6 +353,13 @@
 			while (!md_mn_is_commd_present()) {
 				delay(md_hz);
 			}
+			/*
+			 * commd is available again. Retry the message once.
+			 * If it fails we panic as the system is in an
+			 * unexpected state.
+			 */
+			if (nretries++ == 0)
+				goto spss_msg;
 		}
 		/*
 		 * Panic as we are now in an inconsistent state.