changeset 10311:539b18426dae

6821158 srn_notify() waits forever for pollers to respond
author Randy Fishel <Randy.Fishel@Sun.COM>
date Fri, 14 Aug 2009 11:48:37 -0700
parents ba87b3315737
children 5076f8129626
files usr/src/uts/common/io/srn.c
diffstat 1 files changed, 40 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/srn.c	Fri Aug 14 11:18:12 2009 -0600
+++ b/usr/src/uts/common/io/srn.c	Fri Aug 14 11:48:37 2009 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -78,6 +78,7 @@
 	int		srn_type[SRN_MAX_CLONE]; /* type of handshake */
 	int		srn_delivered[SRN_MAX_CLONE];
 	srn_event_info_t srn_pending[SRN_MAX_CLONE];
+	int		srn_fault[SRN_MAX_CLONE];
 } srn = { NULL, -1};
 typedef struct srnstate *srn_state_t;
 
@@ -85,6 +86,9 @@
 uint_t		srn_poll_cnt[SRN_MAX_CLONE];	/* count of events for poll */
 int		srn_apm_count;
 int		srn_autosx_count;
+/* Number of seconds to wait for clients to ack a poll */
+int		srn_timeout = 10;
+
 struct pollhead	srn_pollhead[SRN_MAX_CLONE];
 
 static int	srn_open(dev_t *, int, int, cred_t *);
@@ -251,7 +255,7 @@
 srn_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
 	struct pollhead **phpp)
 {
-	extern struct pollhead srn_pollhead[];	/* common/os/sunpm.c */
+	extern struct pollhead srn_pollhead[];
 	int	clone;
 
 	clone = SRN_MINOR_TO_CLONE(getminor(dev));
@@ -342,6 +346,7 @@
 	crfree(srn.srn_cred[clone]);
 	srn.srn_cred[clone] = 0;
 	srn_poll_cnt[clone] = 0;
+	srn.srn_fault[clone] = 0;
 	if (srn.srn_pending[clone].ae_type || srn.srn_delivered[clone]) {
 		srn.srn_pending[clone].ae_type = 0;
 		srn.srn_delivered[clone] = 0;
@@ -407,6 +412,7 @@
 		}
 		ASSERT(srn.srn_type[clone] == SRN_TYPE_APM);
 		srn.srn_type[clone] = SRN_TYPE_AUTOSX;
+		srn.srn_fault[clone] = 0;
 		srn_apm_count--;
 		ASSERT(srn_apm_count >= 0);
 		ASSERT(srn_autosx_count >= 0);
@@ -423,6 +429,11 @@
 		 * then wake up the kernel thread sleeping for the delivery
 		 */
 		PMD(PMD_SX, ("SRN_IOC_NEXTEVENT entered\n"))
+		if (srn.srn_fault[clone]) {
+			PMD(PMD_SX, ("SRN_IOC_NEXTEVENT clone %d fault "
+			    "cleared\n", clone))
+			srn.srn_fault[clone] = 0;
+		}
 		mutex_enter(&srn_clone_lock);
 		if (srn_poll_cnt[clone] == 0) {
 			mutex_exit(&srn_clone_lock);
@@ -450,6 +461,11 @@
 	case SRN_IOC_SUSPEND:
 		/* ack suspend */
 		PMD(PMD_SX, ("SRN_IOC_SUSPEND entered clone %d\n", clone))
+		if (srn.srn_fault[clone]) {
+			PMD(PMD_SX, ("SRN_IOC_SUSPEND clone %d fault "
+			    "cleared\n", clone))
+			srn.srn_fault[clone] = 0;
+		}
 		mutex_enter(&srn_clone_lock);
 		if (srn.srn_delivered[clone] != SRN_SUSPEND_REQ) {
 			mutex_exit(&srn_clone_lock);
@@ -467,6 +483,11 @@
 	case SRN_IOC_RESUME:
 		/* ack resume */
 		PMD(PMD_SX, ("SRN_IOC_RESUME entered clone %d\n", clone))
+		if (srn.srn_fault[clone]) {
+			PMD(PMD_SX, ("SRN_IOC_RESUME clone %d fault "
+			    "cleared\n", clone))
+			srn.srn_fault[clone] = 0;
+		}
 		mutex_enter(&srn_clone_lock);
 		if (srn.srn_delivered[clone] != SRN_NORMAL_RESUME) {
 			mutex_exit(&srn_clone_lock);
@@ -522,11 +543,13 @@
 	PMD(PMD_SX, ("count %d\n", count))
 	for (clone = 0; clone < SRN_MAX_CLONE; clone++) {
 		if (srn.srn_type[clone] == type) {
+#ifdef DEBUG
 			if (type == SRN_TYPE_APM) {
 				ASSERT(srn.srn_pending[clone].ae_type == 0);
 				ASSERT(srn_poll_cnt[clone] == 0);
 				ASSERT(srn.srn_delivered[clone] == 0);
 			}
+#endif
 			srn.srn_pending[clone].ae_type = event;
 			srn_poll_cnt[clone] = 1;
 			PMD(PMD_SX, ("pollwake %d\n", clone))
@@ -544,7 +567,7 @@
 	/* otherwise wait for acks */
 restart:
 	/*
-	 * We wait untill all of the pending events are cleared.
+	 * We wait until all of the pending events are cleared.
 	 * We have to start over every time we do a cv_wait because
 	 * we give up the mutex and can be re-entered
 	 */
@@ -552,10 +575,22 @@
 		if (srn.srn_clones[clone] == 0 ||
 		    srn.srn_type[clone] != SRN_TYPE_APM)
 			continue;
-		if (srn.srn_pending[clone].ae_type) {
+		if (srn.srn_pending[clone].ae_type && !srn.srn_fault[clone]) {
 			PMD(PMD_SX, ("srn_notify waiting for ack for clone %d, "
 			    "event %x\n", clone, event))
-			cv_wait(&srn_clones_cv[clone], &srn_clone_lock);
+			if (cv_timedwait(&srn_clones_cv[clone],
+			    &srn_clone_lock, ddi_get_lbolt() +
+			    drv_usectohz(srn_timeout * 1000000)) == -1) {
+				/*
+				 * Client didn't respond, mark it as faulted
+				 * and continue as if a regular signal.
+				 */
+				PMD(PMD_SX, ("srn_notify: clone %d did not "
+				    "ack event %x\n", clone, event))
+				cmn_err(CE_WARN, "srn_notify: clone %d did "
+				    "not ack event %x\n", clone, event);
+				srn.srn_fault[clone] = 1;
+			}
 			goto restart;
 		}
 	}