changeset 3151:9a7f99c63ca3

6482166 vntsd dumps core and all console connections get killed. 6490897 vntsd fails on default control domain 6492688 ldc panic under load
author sg70180
date Tue, 21 Nov 2006 13:13:17 -0800
parents 2486170b8294
children 978eba4e5246
files usr/src/cmd/vntsd/console.c usr/src/cmd/vntsd/listen.c usr/src/cmd/vntsd/svc-vntsd usr/src/cmd/vntsd/vntsd.c usr/src/cmd/vntsd/vntsd.h usr/src/cmd/vntsd/vntsdvcc.c usr/src/uts/sun4v/io/ldc.c usr/src/uts/sun4v/io/vcc.c usr/src/uts/sun4v/io/vldc.c usr/src/uts/sun4v/sys/ldc_impl.h usr/src/uts/sun4v/sys/vldc_impl.h
diffstat 11 files changed, 390 insertions(+), 133 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/vntsd/console.c	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/cmd/vntsd/console.c	Tue Nov 21 13:13:17 2006 -0800
@@ -587,8 +587,11 @@
 
 	if ((groupp->no_cons_clientpq == NULL) &&
 	    (groupp->status & VNTSD_GROUP_SIG_WAIT)) {
-		/* group is waiting to be deleted */
-		groupp->status &= ~VNTSD_GROUP_SIG_WAIT;
+		/*
+		 * group is waiting to be deleted. - signal the group's
+		 * listen thread - the VNTSD_GROUP_SIG_WAIT state will
+		 * be cleared when the listen thread exits.
+		 */
 		(void) cond_signal(&groupp->cvp);
 	}
 	(void) mutex_unlock(&groupp->lock);
@@ -612,9 +615,17 @@
 	(void) snprintf(err_msg, VNTSD_LINE_LEN, "console_chk_status client%d"
 	    " num_cos=%d", clientp->sockfd, groupp->num_cons);
 
+	/*
+	 * obtain group lock to protect groupp->num_cons.
+	 * When groupp->num_cons == 0, close client and exit the tread.
+	 */
+	(void) mutex_lock(&groupp->lock);
+
 	if (groupp->num_cons == 0) {
 		/* no more console in the group */
+		(void) mutex_unlock(&groupp->lock);
 		client_fini(groupp, clientp);
+		return;
 	}
 
 	if (status == VNTSD_STATUS_INTR) {
@@ -625,33 +636,53 @@
 	switch (status) {
 
 	case VNTSD_STATUS_CLIENT_QUIT:
+		(void) mutex_unlock(&groupp->lock);
 		client_fini(groupp, clientp);
 		return;
 
 	case VNTSD_STATUS_RESELECT_CONS:
-		assert(clientp->cons);
+
+		if (clientp->cons == NULL) {
+			/*
+			 * domain was deleted before client connects to it
+			 * connect to other console in the same group
+			 */
+			(void) mutex_unlock(&groupp->lock);
+			client_init(clientp);
+			return;
+		}
+
 		if ((groupp->num_cons == 1) &&
-		    (groupp->conspq->handle == clientp->cons)) {
+		    ((clientp->status & VNTSD_CLIENT_CONS_DELETED) ||
+		    (groupp->conspq->handle == clientp->cons))) {
 			/* no other selection available */
+			(void) mutex_unlock(&groupp->lock);
 			client_fini(groupp, clientp);
 		} else {
+			(void) mutex_unlock(&groupp->lock);
 			client_init(clientp);
 		}
+
 		return;
 
 	case VNTSD_STATUS_VCC_IO_ERR:
 		if ((clientp->status & VNTSD_CLIENT_CONS_DELETED) == 0) {
 			/* check if console was deleted  */
+			(void) mutex_unlock(&groupp->lock);
 			status = vntsd_vcc_err(clientp->cons);
+			(void) mutex_lock(&groupp->lock);
 		}
 
 		if (status != VNTSD_STATUS_CONTINUE) {
 			/* console was deleted */
-			if (groupp->num_cons == 1) {
+			if (groupp->num_cons <= 1) {
+				(void) mutex_unlock(&groupp->lock);
 				client_fini(groupp, clientp);
+				return;
 			}
 		}
 
+		(void) mutex_unlock(&groupp->lock);
 		/* console is ok */
 		client_init(clientp);
 		return;
@@ -660,28 +691,31 @@
 	case VNTSD_STATUS_MOV_CONS_BACKWARD:
 		if (groupp->num_cons == 1) {
 			/* same console */
+			(void) mutex_unlock(&groupp->lock);
 			return;
 		}
 
 		/* get selected console */
-		(void) mutex_lock(&(clientp->cons->group->lock));
-		clientp->cons = vntsd_que_pos(clientp->cons->group->conspq,
+		clientp->cons = vntsd_que_pos(groupp->conspq,
 		    clientp->cons,
 		    (status == VNTSD_STATUS_MOV_CONS_FORWARD)?(1):(-1));
-		(void) mutex_unlock(&(clientp->cons->group->lock));
+		(void) mutex_unlock(&groupp->lock);
 		return;
 
 	case VNTSD_SUCCESS:
 	case VNTSD_STATUS_CONTINUE:
 	case VNTSD_STATUS_NO_CONS:
+		(void) mutex_unlock(&groupp->lock);
 		client_init(clientp);
 		return;
 
 	case VNTSD_ERR_INVALID_INPUT:
+		(void) mutex_unlock(&groupp->lock);
 		return;
 
 	default:
 		/* fatal error */
+		(void) mutex_unlock(&groupp->lock);
 		vntsd_log(status, err_msg);
 		client_fini(groupp, clientp);
 		return;
@@ -743,6 +777,12 @@
 			rv = read_cmd(clientp, prompt, &cmd);
 			/* check error and may exit */
 			console_chk_status(groupp, clientp, rv);
+
+			/* any console is removed from group? */
+			num_cons = vntsd_chk_group_total_cons(groupp);
+			if (num_cons <= 1) {
+				cmd = ' ';
+			}
 		}
 
 		switch (cmd) {
@@ -761,6 +801,10 @@
 
 		case ' ':
 
+			if (num_cons == 0)
+				/* no console in the group */
+				break;
+
 			if (clientp->cons == NULL) {
 				if (num_cons == 1) {
 					/* by pass selecting console */
--- a/usr/src/cmd/vntsd/listen.c	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/cmd/vntsd/listen.c	Tue Nov 21 13:13:17 2006 -0800
@@ -44,9 +44,11 @@
 #include <syslog.h>
 #include "vntsd.h"
 
+#define	    MAX_BIND_RETRIES		6
 /*
  * check the state of listen thread. exit if there is an fatal error
- * or the group is removed.
+ * or the group is removed. Main thread will call free_group
+ * to close group socket and free group structure.
  */
 static void
 listen_chk_status(vntsd_group_t *groupp, int status)
@@ -69,18 +71,15 @@
 		return;
 
 	case VNTSD_STATUS_INTR:
+		/* signal for deleting group */
 		assert(groupp->status & VNTSD_GROUP_SIG_WAIT);
-		/* close listen socket */
+
+		/* let main thread know  */
 		(void) mutex_lock(&groupp->lock);
-		(void) close(groupp->sockfd);
-		groupp->sockfd = -1;
-
-		/* let group know */
 		groupp->status &= ~VNTSD_GROUP_SIG_WAIT;
 		(void) cond_signal(&groupp->cvp);
+		(void) mutex_unlock(&groupp->lock);
 
-		(void) mutex_unlock(&groupp->lock);
-		/* exit thread */
 		thr_exit(0);
 		break;
 
@@ -89,16 +88,33 @@
 
 	case VNTSD_STATUS_NO_CONS:
 	default:
-		/* fatal, exit thread */
+		/* fatal error or no console in the group, remove the group. */
 
 		(void) mutex_lock(&groupp->lock);
-		(void) close(groupp->sockfd);
-		groupp->sockfd = -1;
+
+		if (groupp->status & VNTSD_GROUP_SIG_WAIT) {
+			/* group is already in deletion */
+			(void) mutex_unlock(&groupp->lock);
+			return;
+		}
+
+		/*
+		 * if there still is console(s) in the group,
+		 * the console(s) could not be connected any more because of
+		 * a fatal error. Therefore, mark the console and notify
+		 * main thread to delete console and group.
+		 */
+		(void) vntsd_que_walk(groupp->conspq,
+		    (el_func_t)vntsd_mark_deleted_cons);
+		groupp->status |= VNTSD_GROUP_CLEAN_CONS;
+
+		/* signal main thread to delete the group */
+		(void) thr_kill(groupp->vntsd->tid, SIGUSR1);
 		(void) mutex_unlock(&groupp->lock);
-		vntsd_log(status, err_msg);
-		vntsd_clean_group(groupp);
 
-		thr_exit(0);
+		/* log error */
+		if (status != VNTSD_STATUS_NO_CONS)
+			vntsd_log(status, err_msg);
 		break;
 	}
 }
@@ -110,6 +126,7 @@
 
 	struct	    sockaddr_in addr;
 	int	    on;
+	int	    retries = 0;
 
 
 	/* allocate a socket */
@@ -134,11 +151,34 @@
 	addr.sin_port = htons(port_no);
 
 	/* bind socket */
-	if (bind(*sockfd, (struct sockaddr *)&addr, sizeof (addr)) < 0) {
-		if (errno == EINTR) {
-			return (VNTSD_STATUS_INTR);
+
+	for (; ; ) {
+
+		/*
+		 * After a socket is closed, the port
+		 * is transitioned to TIME_WAIT state.
+		 * It may take a few retries to bind
+		 * a just released port.
+		 */
+		if (bind(*sockfd, (struct sockaddr *)&addr,
+			    sizeof (addr)) < 0) {
+
+			if (errno == EINTR) {
+				return (VNTSD_STATUS_INTR);
+			}
+
+			if (errno == EADDRINUSE && retries < MAX_BIND_RETRIES) {
+				/* port may be in TIME_WAIT state, retry */
+				(void) sleep(5);
+				retries++;
+				continue;
+			}
+
+			return (VNTSD_ERR_LISTEN_BIND);
+
 		}
-		return (VNTSD_ERR_LISTEN_BIND);
+
+		break;
 
 	}
 
@@ -265,6 +305,7 @@
 		if (num_cons == 0) {
 			(void) close(newsockfd);
 			listen_chk_status(groupp, VNTSD_STATUS_NO_CONS);
+			continue;
 		}
 
 		/* a connection is established */
--- a/usr/src/cmd/vntsd/svc-vntsd	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/cmd/vntsd/svc-vntsd	Tue Nov 21 13:13:17 2006 -0800
@@ -55,7 +55,17 @@
 fi
 
 if [ -x /usr/lib/ldoms/vntsd ]; then
-    /usr/lib/ldoms/vntsd $args || exit $SMF_EXIT_ERR_CONFIG
+    /usr/lib/ldoms/vntsd $args
+    rc=$?
+    if [ $rc -ne 0 ]; then
+	# if vntsd exited in error with status 1, let SMF restart it
+	# otherwise we want it to go into maintenance.
+	if [ $rc -eq 1 ]; then
+	    exit $SMF_ERR_OTHER
+	else
+	    exit $SMF_ERR_FATAL
+	fi
+    fi
 else
     echo "WARNING: /usr/lib/ldoms/vntsd is missing or not executable" >& 2
     exit $SMF_EXIT_ERR_CONFIG
--- a/usr/src/cmd/vntsd/vntsd.c	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/cmd/vntsd/vntsd.c	Tue Nov 21 13:13:17 2006 -0800
@@ -82,7 +82,12 @@
 
 	D1(stderr, "t@%d exit_sig_handler%d \n", thr_self(), sig);
 
-	exit(0);
+	if (thr_self() != vntsdp->tid) {
+		/* not main thread, pass to main thread */
+		(void) thr_kill(vntsdp->tid, sig);
+	} else {
+		exit(0);
+	}
 }
 
 /*
@@ -358,20 +363,18 @@
 	(void) snprintf(path, sz-1, VCC_DEVICE_CTL_PATH, vntsdp->devinst,
 	    sizeof (vntsdp->devinst));
 	vntsdp->ctrl_fd = open(path, O_RDWR);
-	free(path);
 
 	if (vntsdp->ctrl_fd == -1) {
-		/*
-		 * do not print error if device is not present
-		 * the daemon is probably being started incorrectly
-		 */
-		if (errno != ENOENT) {
-			syslog(LOG_ERR,
-			    "Error opening VCC device control port: %s",
-			    strerror(errno));
-		}
-		exit(1);
+		/* print error if device is not present */
+		syslog(LOG_ERR,
+		    "Error opening VCC device control port: %s",
+		    path);
+		/* tell SMF no retry */
+		exit(2);
 	}
+
+	free(path);
+
 	if ((vntsdp->options & VNTSD_OPT_DAEMON_OFF) == 0) {
 		/* daemonize it */
 		pid = fork();
@@ -483,6 +486,11 @@
 		    poll_drv[0].revents);
 
 		vntsd_daemon_wakeup(vntsdp);
+		/*
+		 * Main thread may miss a console-delete signal when it is
+		 * not polling vcc. check if any console is deleted.
+		 */
+		vntsd_delete_cons(vntsdp);
 
 	}
 
@@ -529,9 +537,8 @@
 }
 
 /*
- * check if a vcc i/o error is caused by removal of a console. If so notify
- * all clients connected to the console and wake up main thread to cleanup
- * the console.
+ * check if a vcc i/o error is caused by removal of a console. If so
+ * wake up main thread to cleanup the console.
  */
 int
 vntsd_vcc_err(vntsd_cons_t *consp)
@@ -556,9 +563,10 @@
 	(void) mutex_lock(&consp->lock);
 	consp->status |= VNTSD_CONS_DELETED;
 
-	/* signal all clients to disconnect from console */
-	(void) vntsd_que_walk(consp->clientpq,
-	    (el_func_t)vntsd_notify_client_cons_del);
+	/*
+	 * main thread will close all clients after receiving console
+	 * delete signal.
+	 */
 	(void) mutex_unlock(&consp->lock);
 
 	/* mark the group */
--- a/usr/src/cmd/vntsd/vntsd.h	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/cmd/vntsd/vntsd.h	Tue Nov 21 13:13:17 2006 -0800
@@ -206,20 +206,41 @@
 /* vntsd options */
 #define	    VNTSD_OPT_DAEMON_OFF	0x1
 
-/* group states */
+/*
+ * group states
+ * When a console is removed or vntsd is exiting, main thread
+ * notifies listen, read and write thread to exit.
+ * After those threads exit, main thread clears up group structurre.
+ *
+ * VNTSD_GROUP_SIG_WAIT
+ * The main thread is waiting for listen thread to exit.
+ * VNTSD_GROUP_CLEAN_CONS
+ * There are console(s) in the group that are being removed.
+ * This is a transition state where the corresponding vcc port has been
+ * removed, but vntsd has not done its clean up yet.
+ * VNTSD_GROUP_IN_CLEANUP
+ * vntsd main thread has started cleaning up the group.
+ */
 
-#define	    VNTSD_GROUP_SIG_WAIT	0x1	/*  waiting for signal */
-#define	    VNTSD_GROUP_CLEAN_CONS	0x2	/*  cons needs to be clean */
-#define	    VNTSD_GROUP_CLEANUP		0x4	/*  waiting for signal */
+#define	    VNTSD_GROUP_SIG_WAIT	0x1
+#define	    VNTSD_GROUP_CLEAN_CONS	0x2
+#define	    VNTSD_GROUP_IN_CLEANUP	0x4
 
 
 
 
 
-/* console status */
+/*
+ * console states
+ * There are two states when a console is removed
+ * VNTSD_CONS_DELETED
+ * the console is being deleted
+ * VNTSD_CONS_SIG_WAIT
+ * console is waiting for all clients to exit.
+ */
 
 #define	    VNTSD_CONS_DELETED		0x1	/* deleted */
-#define	    VNTSD_CONS_SIG_WAIT		0x2	/* waiting fro signal */
+#define	    VNTSD_CONS_SIG_WAIT		0x2	/* waiting for signal */
 
 
 #define	    VNTSD_CLIENT_IO_ERR		    0x1	    /* reader */
@@ -451,6 +472,7 @@
 boolean_t	vntsd_vcc_cons_alive(vntsd_cons_t *consp);
 boolean_t	vntsd_notify_client_cons_del(vntsd_client_t *clientp);
 int		vntsd_chk_group_total_cons(vntsd_group_t *groupp);
+boolean_t	vntsd_mark_deleted_cons(vntsd_cons_t *consp);
 
 
 #ifdef	DEBUG
--- a/usr/src/cmd/vntsd/vntsdvcc.c	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/cmd/vntsd/vntsdvcc.c	Tue Nov 21 13:13:17 2006 -0800
@@ -69,9 +69,23 @@
 	assert(consp);
 	(void) mutex_destroy(&consp->lock);
 	(void) cond_destroy(&consp->cvp);
+	if (consp->vcc_fd != -1)
+		(void) close(consp->vcc_fd);
 	free(consp);
 }
 
+/* free group structure */
+static void
+free_group(vntsd_group_t *groupp)
+{
+	assert(groupp);
+	(void) mutex_destroy(&groupp->lock);
+	(void) cond_destroy(&groupp->cvp);
+	if (groupp->sockfd != -1)
+		(void) close(groupp->sockfd);
+	free(groupp);
+}
+
 /*
  *  all clients connected to a console must disconnect before
  *  removing a console.
@@ -166,25 +180,28 @@
 		for (; ; ) {
 			/* get the console to be deleted */
 			(void) mutex_lock(&groupp->lock);
-			assert(groupp->conspq);
-			consp = vntsd_que_walk(groupp->conspq,
-			    (el_func_t)find_clean_cons);
-			if (consp == NULL) {
-				/* no more cons to delete */
+
+			/* clean up any deleted console in the group */
+			if (groupp->conspq != NULL) {
+				consp = vntsd_que_walk(groupp->conspq,
+				    (el_func_t)find_clean_cons);
+				if (consp == NULL) {
+					/* no more cons to delete */
+					(void) mutex_unlock(&groupp->lock);
+					break;
+				}
+
+				/* remove console from the group */
+				(void) vntsd_que_rm(&groupp->conspq, consp);
 				(void) mutex_unlock(&groupp->lock);
-				break;
+
+				/* clean up the console */
+				cleanup_cons(consp);
 			}
 
-			/* remove console from the group */
-			(void) vntsd_que_rm(&groupp->conspq, consp);
-			(void) mutex_unlock(&groupp->lock);
-
-			/* clean up the console */
-			cleanup_cons(consp);
-
 			/* delete group? */
-			if (groupp->num_cons == 0) {
-				/* no more console delete it */
+			if (groupp->conspq == NULL) {
+				/* no more console in the group delete group */
 				assert(groupp->vntsd);
 
 				(void) mutex_lock(&groupp->vntsd->lock);
@@ -213,23 +230,21 @@
 	(void) mutex_lock(&groupp->lock);
 
 	/* prevent from reentry */
-	if (groupp->status & VNTSD_GROUP_CLEANUP) {
-		if (groupp->listen_tid == thr_self()) {
-			/* signal that the listen thread is exiting */
-			groupp->status &= ~VNTSD_GROUP_SIG_WAIT;
-			(void) cond_signal(&groupp->cvp);
-		}
+	if (groupp->status & VNTSD_GROUP_IN_CLEANUP) {
 		(void) mutex_unlock(&groupp->lock);
 		return;
 	}
-	groupp->status |= VNTSD_GROUP_CLEANUP;
+	groupp->status |= VNTSD_GROUP_IN_CLEANUP;
+
+	/* mark group waiting for listen thread to exits */
+	groupp->status |= VNTSD_GROUP_SIG_WAIT;
 	(void) mutex_unlock(&groupp->lock);
 
 	vntsd_free_que(&groupp->conspq, (clean_func_t)cleanup_cons);
 
+	(void) mutex_lock(&groupp->lock);
 	/* walk through no cons client queue */
 	while (groupp->no_cons_clientpq != NULL) {
-		groupp->status |= VNTSD_GROUP_SIG_WAIT;
 		(void) vntsd_que_walk(groupp->no_cons_clientpq,
 		    (el_func_t)vntsd_notify_client_cons_del);
 		to.tv_sec = VNTSD_CV_WAIT_DELTIME;
@@ -237,23 +252,9 @@
 		(void) cond_reltimedwait(&groupp->cvp, &groupp->lock, &to);
 	}
 
-	if (groupp->listen_tid == thr_self()) {
-		/* listen thread is exiting */
-		(void) mutex_lock(&(groupp->vntsd->lock));
-		(void) vntsd_que_rm(&groupp->vntsd->grouppq, groupp);
-		(void) mutex_unlock(&groupp->vntsd->lock);
-
-		(void) cond_destroy(&groupp->cvp);
-		(void) mutex_unlock(&groupp->lock);
-		(void) mutex_destroy(&groupp->lock);
-		free(groupp);
-		return;
-	}
-
-	/* signal listen thread to exit  */
-	groupp->status |= VNTSD_GROUP_SIG_WAIT;
-
+	/* waiting for listen thread to exit */
 	while (groupp->status & VNTSD_GROUP_SIG_WAIT) {
+		/* signal listen thread to exit  */
 		(void) thr_kill(groupp->listen_tid, SIGUSR1);
 		to.tv_sec = VNTSD_CV_WAIT_DELTIME;
 		to.tv_nsec = 0;
@@ -264,9 +265,7 @@
 	(void) mutex_unlock(&groupp->lock);
 	(void) thr_join(groupp->listen_tid, NULL, NULL);
 	/* free group */
-	(void) cond_destroy(&groupp->cvp);
-	(void) mutex_destroy(&groupp->lock);
-	free(groupp);
+	free_group(groupp);
 }
 
 /* allocate and initialize console structure */
@@ -293,7 +292,7 @@
 	(void) strlcpy(consp->domain_name, consolep->domain_name, MAXPATHLEN);
 	(void) strlcpy(consp->dev_name, consolep->dev_name, MAXPATHLEN);
 	consp->wr_tid = (thread_t)-1;
-	consp->vcc_fd = (thread_t)-1;
+	consp->vcc_fd = -1;
 
 	/* join the group */
 	(void) mutex_lock(&groupp->lock);
@@ -350,7 +349,7 @@
 
 	groupp->tcp_port = tcp_port;
 	groupp->listen_tid = (thread_t)-1;
-	groupp->sockfd = (thread_t)-1;
+	groupp->sockfd = -1;
 	groupp->vntsd = vntsdp;
 
 	D1(stderr, "t@%d alloc_group@%lld:%s\n", thr_self(), groupp->tcp_port,
@@ -359,6 +358,16 @@
 	return (groupp);
 }
 
+/* mark a deleted console */
+boolean_t
+vntsd_mark_deleted_cons(vntsd_cons_t *consp)
+{
+	(void) mutex_lock(&consp->lock);
+	consp->status |= VNTSD_CONS_DELETED;
+	(void) mutex_unlock(&consp->lock);
+	return (B_FALSE);
+}
+
 /*
  * Initialize a console, if console is associated with with a
  * new group, intialize the group.
@@ -378,19 +387,55 @@
 	(void) mutex_lock(&vntsdp->lock);
 	groupp = vntsd_que_find(vntsdp->grouppq,
 	    (compare_func_t)grp_by_tcp, (void *)&(consp->tcp_port));
+	if (groupp != NULL)
+		(void) mutex_lock(&groupp->lock);
+
 	(void) mutex_unlock(&vntsdp->lock);
 
 	if (groupp != NULL) {
-		/* group with same tcp port found */
+		/*
+		 *  group with same tcp port found.
+		 *  if there is no console in the group, the
+		 *  group should be removed and the tcp port can
+		 *  be used for tne new group.
+		 *  This is possible, when there is tight loop of
+		 *  creating/deleting domains. When a vcc port is
+		 *  removed, a read thread will have an I/O error because
+		 *  vcc has closed the port. The read thread then marks
+		 *  the console is removed and notify main thread to
+		 *  remove the console.
+		 *  Meanwhile, the same port and its group (with same
+		 *  tcp port and group name) is created. Vcc notify
+		 *  vntsd that new console is added.
+		 *  Main thread now have two events. If main thread polls
+		 *  out vcc notification first, it will find that there is
+		 *  a group has no console.
+		 */
 
-		if (strcmp(groupp->group_name, consp->group_name)) {
+		if (vntsd_chk_group_total_cons(groupp) == 0) {
+
+			/* all consoles in the group have been removed */
+			(void) vntsd_que_walk(groupp->conspq,
+			    (el_func_t)vntsd_mark_deleted_cons);
+			groupp->status |= VNTSD_GROUP_CLEAN_CONS;
+			(void) mutex_unlock(&groupp->lock);
+			groupp = NULL;
+
+		} else if (strcmp(groupp->group_name, consp->group_name)) {
 			/* conflict group name */
 			vntsd_log(VNTSD_ERR_VCC_GRP_NAME,
 			    "group name is different from existing group");
+			(void) mutex_unlock(&groupp->lock);
 			return (VNTSD_ERR_VCC_CTRL_DATA);
+
+		} else {
+			/* group already existed */
+			(void) mutex_unlock(&groupp->lock);
 		}
 
-	} else {
+	}
+
+	if (groupp == NULL) {
 		/* new group */
 		groupp = alloc_group(vntsdp, consp->group_name,
 		    consp->tcp_port);
@@ -416,9 +461,7 @@
 		/* no memory */
 		if (new_groupp != NULL) {
 			/* clean up new group */
-			(void) cond_destroy(&groupp->cvp);
-			(void) mutex_destroy(&groupp->lock);
-			free(groupp);
+			free_group(groupp);
 		}
 
 		return (VNTSD_ERR_NO_MEM);
@@ -463,17 +506,59 @@
 	return (rv != 0);
 }
 
+/* find deleted console by console no */
+static boolean_t
+deleted_cons_by_consno(vntsd_cons_t *consp, int *cons_no)
+{
+	vntsd_client_t *clientp;
+
+	assert(consp);
+
+	if (consp->cons_no != *cons_no)
+		return (B_FALSE);
+
+	/* has console marked as deleted? */
+	if ((consp->status & VNTSD_CONS_DELETED) == 0)
+		return (B_TRUE);
+
+	/* notify clients of console ? */
+	clientp = (vntsd_client_t *)consp->clientpq->handle;
+
+	if (clientp == NULL)
+		/* therre is no client for this console */
+		return (B_TRUE);
+
+	if (clientp->status & VNTSD_CLIENT_CONS_DELETED)
+		/* clients of console have notified */
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/* find group structure from console no */
+static boolean_t
+find_cons_group_by_cons_no(vntsd_group_t *groupp, uint_t *cons_no)
+{
+	vntsd_cons_t *consp;
+
+	consp = vntsd_que_find(groupp->conspq,
+	    (compare_func_t)deleted_cons_by_consno, cons_no);
+	return (consp != NULL);
+
+}
+
 /* delete a console if the console exists in the vntsd */
 static void
-delete_cons_before_add(vntsd_t *vntsdp, uint64_t tcp_port, uint_t cons_no)
+delete_cons_before_add(vntsd_t *vntsdp, uint_t cons_no)
 {
 	vntsd_group_t	    *groupp;
 	vntsd_cons_t	    *consp;
 
 	/* group exists? */
 	(void) mutex_lock(&vntsdp->lock);
-	groupp = vntsd_que_find(vntsdp->grouppq, (compare_func_t)grp_by_tcp,
-	    (void *)&(tcp_port));
+	groupp = vntsd_que_find(vntsdp->grouppq,
+	    (compare_func_t)find_cons_group_by_cons_no,
+	    &cons_no);
 	(void) mutex_unlock(&vntsdp->lock);
 
 	if (groupp == NULL) {
@@ -484,7 +569,7 @@
 	/* group exists, if console exists? */
 	(void) mutex_lock(&groupp->lock);
 	consp = vntsd_que_find(groupp->conspq,
-	    (compare_func_t)vntsd_cons_by_consno, &cons_no);
+	    (compare_func_t)deleted_cons_by_consno, &cons_no);
 
 	if (consp == NULL) {
 		/* no such console */
@@ -527,7 +612,7 @@
 	}
 
 	/* clean up the console if console was deleted and added again */
-	delete_cons_before_add(vntsdp, console.tcp_port, console.cons_no);
+	delete_cons_before_add(vntsdp, console.cons_no);
 
 	/* initialize console */
 
@@ -543,9 +628,7 @@
 		/* create listen thread for this console */
 		if (create_listen_thread(groupp)) {
 			vntsd_log(VNTSD_ERR_CREATE_LISTEN_THR, err_msg);
-			(void) cond_destroy(&groupp->cvp);
-			(void) mutex_destroy(&groupp->lock);
-			free(groupp);
+			free_group(groupp);
 		}
 
 	}
--- a/usr/src/uts/sun4v/io/ldc.c	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/uts/sun4v/io/ldc.c	Tue Nov 21 13:13:17 2006 -0800
@@ -185,6 +185,12 @@
 int ldc_max_retries = LDC_MAX_RETRIES;
 clock_t ldc_delay = LDC_DELAY;
 
+/*
+ * delay between each retry of channel unregistration in
+ * ldc_close(), to wait for pending interrupts to complete.
+ */
+clock_t ldc_close_delay = LDC_CLOSE_DELAY;
+
 #ifdef DEBUG
 
 /*
@@ -1832,7 +1838,7 @@
 	if (!ldcp->cb_enabled)
 		notify_client = B_FALSE;
 
-	/* Unlock channel */
+	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
 
 	if (notify_client) {
 		ldcp->cb_inprogress = B_TRUE;
@@ -1847,7 +1853,6 @@
 		ldcp->cb_inprogress = B_FALSE;
 	}
 
-	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
 	mutex_exit(&ldcp->lock);
 
 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
@@ -2108,16 +2113,21 @@
 	} else
 		ldcp->rx_intr_state = LDC_INTR_PEND;
 
-	mutex_exit(&ldcp->lock);
 
 	if (notify_client) {
+		ldcp->cb_inprogress = B_TRUE;
+		mutex_exit(&ldcp->lock);
 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
 		if (rv) {
 			DWARN(ldcp->id,
 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
 			    ldcp->id);
 		}
-	}
+		mutex_enter(&ldcp->lock);
+		ldcp->cb_inprogress = B_FALSE;
+	}
+
+	mutex_exit(&ldcp->lock);
 
 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
 	return (DDI_INTR_CLAIMED);
@@ -2656,6 +2666,13 @@
 		return (EBUSY);
 	}
 
+	if (ldcp->cb_inprogress) {
+		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
+		    ldcp->id);
+		mutex_exit(&ldcp->lock);
+		return (EWOULDBLOCK);
+	}
+
 	/* Obtain Tx lock */
 	mutex_enter(&ldcp->tx_lock);
 
@@ -2723,7 +2740,7 @@
 		 * As there could be pending interrupts we need
 		 * to wait and try again
 		 */
-		drv_usecwait(ldc_delay);
+		drv_usecwait(ldc_close_delay);
 		mutex_enter(&ldcp->lock);
 		mutex_enter(&ldcp->tx_lock);
 		retries++;
--- a/usr/src/uts/sun4v/io/vcc.c	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/uts/sun4v/io/vcc.c	Tue Nov 21 13:13:17 2006 -0800
@@ -415,8 +415,8 @@
 	attr.mode = LDC_MODE_RAW;
 
 	if ((rv = ldc_init(vport->ldc_id, &attr, &(vport->ldc_handle))) != 0) {
-		cmn_err(CE_CONT, "i_vcc_ldc_init: port %d inv channel 0x%lx\n",
-		    vport->number, vport->ldc_id);
+		cmn_err(CE_CONT, "i_vcc_ldc_init: port %d ldc channel %ld"
+		    " failed ldc_init %d \n", vport->number, vport->ldc_id, rv);
 		vport->ldc_id = VCC_INVALID_CHANNEL;
 		return (rv);
 	}
@@ -1133,6 +1133,12 @@
 
 	mutex_enter(&vport->lock);
 
+	if ((vport->status & VCC_PORT_AVAIL) == 0) {
+		/* port may be removed */
+		mutex_exit(&vport->lock);
+		return (ENXIO);
+	}
+
 	if (vport->status & VCC_PORT_OPEN) {
 		/* only one open per port */
 		cmn_err(CE_CONT, "vcc_open: virtual-console-concentrator@%d:%d "
@@ -1482,7 +1488,7 @@
 		return (EINVAL);
 	}
 
-		/* an added port */
+	/* an added port */
 
 	D1("i_vcc_inquiry\n");
 
@@ -1629,6 +1635,8 @@
 		console.cons_no = -1;
 	} else if (console.tcp_port != vport->tcp_port) {
 		console.cons_no = -1;
+	} else if (vport->ldc_id == VCC_INVALID_CHANNEL) {
+		console.cons_no = -1;
 	}
 
 	D1("i_vcc_cons_status@%d: %s %s %llx\n", console.cons_no,
@@ -2360,6 +2368,7 @@
 		if (rv !=  MDEG_SUCCESS) {
 			return (rv);
 		}
+
 	}
 
 	/*
@@ -2368,7 +2377,6 @@
 	 * for now.
 	 */
 
-
 	return (MDEG_SUCCESS);
 }
 
--- a/usr/src/uts/sun4v/io/vldc.c	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/uts/sun4v/io/vldc.c	Tue Nov 21 13:13:17 2006 -0800
@@ -161,12 +161,10 @@
 uint64_t vldc_max_cookie = VLDC_MAX_COOKIE;
 
 /*
- * when calls to LDC return EWOULDBLOCK or EAGAIN the operation is retried
- * up to 'vldc_retries' times with a wait of 'vldc_delay' microseconds
- * between each retry.
+ * when ldc_close() returns EAGAIN, it is retried with a wait
+ * of 'vldc_close_delay' between each retry.
  */
-static clock_t	vldc_delay = 100;
-static int	vldc_retries = 3;
+static clock_t	vldc_close_delay = VLDC_CLOSE_DELAY;
 
 #ifdef DEBUG
 
@@ -270,6 +268,10 @@
 	/* ensure the port can't be destroyed while we are handling the cb */
 	mutex_enter(&vport->minorp->lock);
 
+	if (vport->status == VLDC_PORT_CLOSED) {
+		return (LDC_SUCCESS);
+	}
+
 	old_status = vport->ldc_status;
 	rv = ldc_status(vport->ldc_handle, &vport->ldc_status);
 	if (rv != 0) {
@@ -290,7 +292,7 @@
 		 * implies that the port cannot be used until it has
 		 * been closed and reopened.
 		 */
-		if (vport->status != VLDC_PORT_CLOSED && old_status == LDC_UP) {
+		if (old_status == LDC_UP) {
 			vport->status = VLDC_PORT_RESET;
 			vport->hanged_up = B_TRUE;
 			pollevents = POLLHUP;
@@ -759,16 +761,10 @@
 static int
 i_vldc_ldc_close(vldc_port_t *vport)
 {
-	int retries = 0;	/* count of number of retries attempted */
 	int err = 0;
 
 	ASSERT(MUTEX_HELD(&vport->minorp->lock));
 
-	while ((err = ldc_close(vport->ldc_handle)) == EAGAIN) {
-		drv_usecwait(vldc_delay);
-		if (++retries > vldc_retries)
-			break;
-	}
 	/*
 	 * If ldc_close() succeeded or if the channel was already closed[*]
 	 * (possibly by a previously unsuccessful call to this function)
@@ -777,6 +773,7 @@
 	 *
 	 * [*] indicated by ldc_close() returning a value of EFAULT
 	 */
+	err = ldc_close(vport->ldc_handle);
 	if ((err != 0) && (err != EFAULT))
 		return (err);
 
@@ -798,6 +795,7 @@
 i_vldc_close_port(vldc_t *vldcp, uint_t portno)
 {
 	vldc_port_t	*vport;
+	vldc_minor_t	*vminor;
 	int		rv = DDI_SUCCESS;
 
 	vport = &(vldcp->port[portno]);
@@ -807,6 +805,8 @@
 	D1("i_vldc_close_port: vldc@%d:%d: closing port\n",
 	    vport->inst, vport->minorp->portno);
 
+	vminor = vport->minorp;
+
 	switch (vport->status) {
 	case VLDC_PORT_CLOSED:
 		/* nothing to do */
@@ -816,9 +816,27 @@
 
 	case VLDC_PORT_READY:
 	case VLDC_PORT_RESET:
-		rv = i_vldc_ldc_close(vport);
-		if (rv != 0)
+		do {
+			rv = i_vldc_ldc_close(vport);
+			if (rv != EAGAIN)
+				break;
+
+			/*
+			 * EAGAIN indicates that ldc_close() failed because
+			 * ldc callback thread is active for the channel.
+			 * cv_timedwait() is used to release vminor->lock and
+			 * allow ldc callback thread to complete.
+			 * after waking up, check if the port has been closed
+			 * by another thread in the meantime.
+			 */
+			(void) cv_timedwait(&vminor->cv, &vminor->lock,
+			    ddi_get_lbolt() + drv_usectohz(vldc_close_delay));
+			rv = 0;
+		} while (vport->status != VLDC_PORT_CLOSED);
+
+		if ((rv != 0) || (vport->status == VLDC_PORT_CLOSED))
 			return (rv);
+
 		break;
 
 	case VLDC_PORT_OPEN:
@@ -837,7 +855,7 @@
 	kmem_free(vport->send_buf, vport->mtu);
 	kmem_free(vport->recv_buf, vport->mtu);
 
-	if (strcmp(vport->minorp->sname, VLDC_HVCTL_SVCNAME) == 0)
+	if (strcmp(vminor->sname, VLDC_HVCTL_SVCNAME) == 0)
 		kmem_free(vport->cookie_buf, vldc_max_cookie);
 
 	vport->status = VLDC_PORT_CLOSED;
--- a/usr/src/uts/sun4v/sys/ldc_impl.h	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/uts/sun4v/sys/ldc_impl.h	Tue Nov 21 13:13:17 2006 -0800
@@ -121,6 +121,9 @@
 #define	LDC_MAX_RETRIES	1000
 #define	LDC_DELAY	1
 
+/* delay(usec) between channel unregister retries in ldc_close() */
+#define	LDC_CLOSE_DELAY	1
+
 /*
  * LDC Version information
  */
--- a/usr/src/uts/sun4v/sys/vldc_impl.h	Tue Nov 21 07:12:45 2006 -0800
+++ b/usr/src/uts/sun4v/sys/vldc_impl.h	Tue Nov 21 13:13:17 2006 -0800
@@ -67,6 +67,9 @@
 /* indicates an invalid port number */
 #define	VLDC_INVALID_PORTNO	((uint_t)-1)
 
+/* delay(in us) used to wait for pending callback to complete */
+#define	VLDC_CLOSE_DELAY	MICROSEC	/* 1sec */
+
 /*
  * Minor node number to port number mapping table.
  *