changeset 6702:c62be08ca808

6692273 RDSIB prints message to console during boot 6695142 existing Oracle RAC nodes panic in rds-ib when new node join the cluster.
author agiri
date Fri, 23 May 2008 11:34:29 -0700
parents 4213fadfdec4
children b961f9b565e9
files usr/src/uts/common/io/ib/clients/rds/rdsib.c usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c usr/src/uts/common/io/ib/clients/rds/rdsib_debug.c usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c
diffstat 5 files changed, 78 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib.c	Fri May 23 10:59:13 2008 -0700
+++ b/usr/src/uts/common/io/ib/clients/rds/rdsib.c	Fri May 23 11:34:29 2008 -0700
@@ -268,14 +268,16 @@
 	rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
 	    TASKQ_DEFAULTPRI, 0);
 	if (rds_taskq == NULL) {
-		RDS_DPRINTF1(LABEL, "ddi_taskq_create failed for rds_taskq");
+		RDS_DPRINTF1("rdsib_attach",
+		    "ddi_taskq_create failed for rds_taskq");
 		rdsib_dev_info = NULL;
 		return (DDI_FAILURE);
 	}
 
 	ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
 	if (ret != DDI_SUCCESS) {
-		cmn_err(CE_CONT, "ddi_create_minor_node failed: %d", ret);
+		RDS_DPRINTF1("rdsib_attach",
+		    "ddi_create_minor_node failed: %d", ret);
 		ddi_taskq_destroy(rds_taskq);
 		rds_taskq = NULL;
 		rdsib_dev_info = NULL;
@@ -295,7 +297,8 @@
 
 	ret = rdsib_initialize_ib();
 	if (ret != 0) {
-		cmn_err(CE_CONT, "rdsib_initialize_ib failed: %d", ret);
+		RDS_DPRINTF1("rdsib_attach",
+		    "rdsib_initialize_ib failed: %d", ret);
 		ddi_taskq_destroy(rds_taskq);
 		rds_taskq = NULL;
 		rdsib_dev_info = NULL;
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c	Fri May 23 10:59:13 2008 -0700
+++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c	Fri May 23 11:34:29 2008 -0700
@@ -120,7 +120,7 @@
 
 	/* validate service id */
 	if (reqp->req_service_id == RDS_SERVICE_ID) {
-		RDS_DPRINTF0(LABEL, "Version Mismatch: Remote system "
+		RDS_DPRINTF2(LABEL, "Version Mismatch: Remote system "
 		    "(GUID: 0x%llx) is running an older version of RDS",
 		    rgid.gid_guid);
 		return (IBT_CM_REJECT);
@@ -148,7 +148,7 @@
 	    ntohl(ipcm_info.SRCIP), ntohl(ipcm_info.DSTIP), cmp.cmp_eptype);
 
 	if (cmp.cmp_version != RDS_VERSION) {
-		RDS_DPRINTF0(LABEL, "Version Mismatch: Local version: %d "
+		RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d "
 		    "Remote version: %d", RDS_VERSION, cmp.cmp_version);
 		return (IBT_CM_REJECT);
 	}
@@ -602,9 +602,9 @@
 	    evp->cm_event.failed.cf_reason);
 
 	if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) {
-		RDS_DPRINTF0(LABEL,
+		RDS_DPRINTF2(LABEL,
 		    "Received REJ with reason IBT_CM_INVALID_SID: "
-		    "The remote system could be running an older RDS version");
+		    "RDS may not be loaded on the remote system");
 	}
 
 	if (evp->cm_channel == NULL) {
@@ -653,7 +653,7 @@
 			    rds_cleanup_passive_session, (void *)sp,
 			    DDI_NOSLEEP);
 			if (ret != DDI_SUCCESS) {
-				RDS_DPRINTF1("rds_handle_cm_event_failure",
+				RDS_DPRINTF2("rds_handle_cm_event_failure",
 				    "SP(%p) TaskQ dispatch FAILED:%d", sp, ret);
 			}
 			return (IBT_CM_ACCEPT);
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_debug.c	Fri May 23 10:59:13 2008 -0700
+++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_debug.c	Fri May 23 11:34:29 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -189,9 +189,9 @@
 			    (uintptr_t)rds_buf_sptr;
 
 			bcopy((caddr_t)rds_print_buf,
-				(caddr_t)rds_buf_sptr, left);
+			    (caddr_t)rds_buf_sptr, left);
 			bcopy((caddr_t)rds_print_buf + left,
-				(caddr_t)rds_debug_buf, len - left);
+			    (caddr_t)rds_debug_buf, len - left);
 			rds_buf_sptr = rds_debug_buf + len - left;
 		} else {
 			bcopy((caddr_t)rds_print_buf, rds_buf_sptr, len);
@@ -204,10 +204,8 @@
 
 	/*
 	 * LINTR, L5-L2 message may go to the rds_debug_buf
-	 * L1 messages will go to the log buf in non-debug kernels and
-	 * to console and log buf in debug kernels
-	 * L0 messages are warnings and will go to msgbuf in non-debug kernels
-	 * and to console and log buf in debug kernels
+	 * L1 messages will go to the /var/adm/messages (debug & non-debug).
+	 * L0 messages will go to console (debug & non-debug).
 	 */
 	switch (level) {
 	case RDS_LOG_LINTR:
@@ -220,13 +218,12 @@
 		}
 		break;
 	case RDS_LOG_L1:
-#ifdef DEBUG
-		cmn_err(CE_CONT, "%s", rds_print_buf);
-#else
 		if (!rds_buffer_dprintf) {
 			cmn_err(CE_CONT, "^%s", rds_print_buf);
+		} else {
+			/* go to messages file */
+			cmn_err(CE_CONT, "!%s", rds_print_buf);
 		}
-#endif
 		break;
 	case RDS_LOG_L0:
 		/* Strip the "\n" added earlier */
@@ -236,11 +233,8 @@
 		if (msg_ptr[len - 1] == '\n') {
 			msg_ptr[len - 1] = '\0';
 		}
-#ifdef DEBUG
-		cmn_err(CE_CONT, rds_print_buf);
-#else
-		cmn_err(CE_CONT, "!%s", rds_print_buf);
-#endif
+		/* go to console */
+		cmn_err(CE_CONT, "^%s", rds_print_buf);
 		break;
 	}
 
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c	Fri May 23 10:59:13 2008 -0700
+++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c	Fri May 23 11:34:29 2008 -0700
@@ -275,25 +275,35 @@
 	}
 }
 
-static void
+static boolean_t
 rds_add_session(rds_session_t *sp, boolean_t locked)
 {
+	boolean_t retval = B_TRUE;
+
 	RDS_DPRINTF2("rds_add_session", "Enter: SP(%p)", sp);
 
 	if (!locked) {
 		rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER);
 	}
 
-	sp->session_nextp = rdsib_statep->rds_sessionlistp;
-	rdsib_statep->rds_sessionlistp = sp;
-	rdsib_statep->rds_nsessions++;
+	/* Don't allow more sessions than configured in rdsib.conf */
+	if (rdsib_statep->rds_nsessions >= (MaxNodes - 1)) {
+		RDS_DPRINTF1("rds_add_session", "Max session limit reached");
+		retval = B_FALSE;
+	} else {
+		sp->session_nextp = rdsib_statep->rds_sessionlistp;
+		rdsib_statep->rds_sessionlistp = sp;
+		rdsib_statep->rds_nsessions++;
+		RDS_INCR_SESS();
+	}
 
 	if (!locked) {
 		rw_exit(&rdsib_statep->rds_sessionlock);
 	}
-	RDS_INCR_SESS();
 
 	RDS_DPRINTF2("rds_add_session", "Return: SP(%p)", sp);
+
+	return (retval);
 }
 
 /* Session lookup based on destination IP or destination node guid */
@@ -465,7 +475,7 @@
 
 	hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
 	if (hcap == NULL) {
-		RDS_DPRINTF1("rds_session_init", "SGID is on an uninitialized "
+		RDS_DPRINTF2("rds_session_init", "SGID is on an uninitialized "
 		    "HCA: %llx", sp->session_lgid.gid_guid);
 		return (-1);
 	}
@@ -527,14 +537,14 @@
 
 	hcap = rds_gid_to_hcap(rdsib_statep, lgid);
 	if (hcap == NULL) {
-		RDS_DPRINTF1("rds_session_reinit", "SGID is on an "
+		RDS_DPRINTF2("rds_session_reinit", "SGID is on an "
 		    "uninitialized HCA: %llx", lgid.gid_guid);
 		return (-1);
 	}
 
 	hcap1 = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
 	if (hcap1 == NULL) {
-		RDS_DPRINTF1("rds_session_reinit", "Seems like HCA %llx "
+		RDS_DPRINTF2("rds_session_reinit", "Seems like HCA %llx "
 		    "is unplugged", sp->session_lgid.gid_guid);
 	} else if (hcap->hca_guid == hcap1->hca_guid) {
 		/*
@@ -880,7 +890,7 @@
 			break;
 		}
 
-		RDS_DPRINTF1(LABEL, "ibt_get_ip_paths failed, ret: %d ", ret);
+		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d ", ret);
 
 		/* wait 1 sec before re-trying */
 		delay(drv_usectohz(1000000));
@@ -1186,6 +1196,15 @@
 	RDS_DPRINTF2("rds_session_create", "Enter: 0x%p 0x%x 0x%x, type: %d",
 	    statep, localip, remip, type);
 
+	/* Check if there is space for a new session */
+	rw_enter(&statep->rds_sessionlock, RW_READER);
+	if (statep->rds_nsessions >= (MaxNodes - 1)) {
+		rw_exit(&statep->rds_sessionlock);
+		RDS_DPRINTF1("rds_session_create", "No More Sessions allowed");
+		return (NULL);
+	}
+	rw_exit(&statep->rds_sessionlock);
+
 	/* Allocate and initialize global buffer pool */
 	ret = rds_init_recv_caches(statep);
 	if (ret != 0) {
@@ -1262,7 +1281,17 @@
 	}
 
 	/* Insert this session into the list */
-	rds_add_session(newp, B_TRUE);
+	if (rds_add_session(newp, B_TRUE) != B_TRUE) {
+		/* No room to add this session */
+		rw_exit(&statep->rds_sessionlock);
+		rw_destroy(&newp->session_lock);
+		rw_destroy(&newp->session_local_portmap_lock);
+		rw_destroy(&newp->session_remote_portmap_lock);
+		mutex_destroy(&dataep->ep_lock);
+		mutex_destroy(&ctrlep->ep_lock);
+		kmem_free(newp, sizeof (rds_session_t));
+		return (NULL);
+	}
 
 	/* unlock the session list */
 	rw_exit(&statep->rds_sessionlock);
@@ -1301,7 +1330,7 @@
 		    IBT_PATH_NO_FLAGS, &ipattr, &newp->session_pinfo,
 		    NULL, NULL);
 		if (ret != IBT_SUCCESS) {
-			RDS_DPRINTF1(LABEL, "ibt_get_ip_paths failed, ret: %d "
+			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d "
 			    "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix,
 			    lgid.gid_guid, rgid.gid_prefix, rgid.gid_guid);
 
@@ -2049,7 +2078,7 @@
 			    IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo,
 			    NULL, NULL);
 			if (ret != IBT_SUCCESS) {
-				RDS_DPRINTF1("rds_sendmsg",
+				RDS_DPRINTF2("rds_sendmsg",
 				    "ibt_get_ip_paths failed, ret: %d ", ret);
 
 				rw_enter(&sp->session_lock, RW_WRITER);
@@ -2222,7 +2251,7 @@
 			    pktp->dh_recvport);
 			rds_stall_port(ep->ep_sp, pktp->dh_recvport, RDS_LOCAL);
 		} else {
-			RDS_DPRINTF1(LABEL, "rds_deliver_new_msg returned: %d",
+			RDS_DPRINTF2(LABEL, "rds_deliver_new_msg returned: %d",
 			    ret);
 		}
 	}
@@ -2238,7 +2267,7 @@
 		RDS_INCR_TXACKS();
 		ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix);
 		if (ret != IBT_SUCCESS) {
-			RDS_DPRINTF1(LABEL, "EP(%p): ibt_post_send for "
+			RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send for "
 			    "acknowledgement failed: %d, SQ depth: %d",
 			    ep, ret, ep->ep_sndpool.pool_nbusy);
 			mutex_enter(&ep->ep_lock);
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c	Fri May 23 10:59:13 2008 -0700
+++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c	Fri May 23 11:34:29 2008 -0700
@@ -109,7 +109,7 @@
 	/* The SQ size should not be more than that supported by the HCA */
 	if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) ||
 	    ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) {
-		RDS_DPRINTF0("RDSIB", "MaxDataSendBuffers + %d is greater "
+		RDS_DPRINTF2("RDSIB", "MaxDataSendBuffers + %d is greater "
 		    "than that supported by the HCA driver "
 		    "(%d + %d > %d or %d), lowering it to a supported value.",
 		    RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS,
@@ -124,7 +124,7 @@
 	/* The RQ size should not be more than that supported by the HCA */
 	if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) ||
 	    (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) {
-		RDS_DPRINTF0("RDSIB", "MaxDataRecvBuffers is greater than that "
+		RDS_DPRINTF2("RDSIB", "MaxDataRecvBuffers is greater than that "
 		    "supported by the HCA driver (%d > %d or %d), lowering it "
 		    "to a supported value.", MaxDataRecvBuffers,
 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
@@ -137,7 +137,7 @@
 	/* The SQ size should not be more than that supported by the HCA */
 	if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) ||
 	    (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) {
-		RDS_DPRINTF0("RDSIB", "MaxCtrlSendBuffers is greater than that "
+		RDS_DPRINTF2("RDSIB", "MaxCtrlSendBuffers is greater than that "
 		    "supported by the HCA driver (%d > %d or %d), lowering it "
 		    "to a supported value.", MaxCtrlSendBuffers,
 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
@@ -150,7 +150,7 @@
 	/* The RQ size should not be more than that supported by the HCA */
 	if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) ||
 	    (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) {
-		RDS_DPRINTF0("RDSIB", "MaxCtrlRecvBuffers is greater than that "
+		RDS_DPRINTF2("RDSIB", "MaxCtrlRecvBuffers is greater than that "
 		    "supported by the HCA driver (%d > %d or %d), lowering it "
 		    "to a supported value.", MaxCtrlRecvBuffers,
 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
@@ -162,7 +162,7 @@
 
 	/* The MaxRecvMemory should be less than that supported by the HCA */
 	if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) {
-		RDS_DPRINTF0("RDSIB", "MaxRecvMemory is greater than that "
+		RDS_DPRINTF2("RDSIB", "MaxRecvMemory is greater than that "
 		    "supported by the HCA driver (%d > %d), lowering it to %d",
 		    NDataRX * RdsPktSize, hattrp->hca_max_memr_len,
 		    hattrp->hca_max_memr_len);
@@ -507,7 +507,7 @@
 		RDS_INCR_TXACKS();
 		ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix);
 		if (ret != IBT_SUCCESS) {
-			RDS_DPRINTF1("rds_send_acknowledgement",
+			RDS_DPRINTF2("rds_send_acknowledgement",
 			    "EP(%p): ibt_post_send for acknowledgement "
 			    "failed: %d, SQ depth: %d",
 			    ep, ret, ep->ep_sndpool.pool_nbusy);
@@ -583,7 +583,7 @@
 		ret = ddi_taskq_dispatch(rds_taskq,
 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
 		if (ret != DDI_SUCCESS) {
-			RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d",
+			RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
 			    ret);
 			mutex_enter(&recvqp->qp_lock);
 			recvqp->qp_taskqpending = B_FALSE;
@@ -677,7 +677,7 @@
 		ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf,
 		    (void *)ep->ep_chanhdl, DDI_NOSLEEP);
 		if (ret != DDI_SUCCESS) {
-			RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d",
+			RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
 			    ret);
 			mutex_enter(&recvqp->qp_lock);
 			recvqp->qp_taskqpending = B_FALSE;
@@ -722,7 +722,7 @@
 
 		ret = ibt_post_recv(chanhdl, wrp, jx, &kx);
 		if ((ret != IBT_SUCCESS) || (kx != jx)) {
-			RDS_DPRINTF1(LABEL, "ibt_post_recv for %d WRs failed: "
+			RDS_DPRINTF2(LABEL, "ibt_post_recv for %d WRs failed: "
 			    "%d", npost, ret);
 			npost -= kx;
 			break;
@@ -750,7 +750,7 @@
 		ret = ddi_taskq_dispatch(rds_taskq,
 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
 		if (ret != DDI_SUCCESS) {
-			RDS_DPRINTF1("rds_post_recv_buf",
+			RDS_DPRINTF2("rds_post_recv_buf",
 			    "ddi_taskq_dispatch failed: %d", ret);
 			mutex_enter(&recvqp->qp_lock);
 			recvqp->qp_taskqpending = B_FALSE;
@@ -831,7 +831,7 @@
 		ret = ddi_taskq_dispatch(rds_taskq,
 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
 		if (ret != DDI_SUCCESS) {
-			RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d",
+			RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
 			    ret);
 			mutex_enter(&recvqp->qp_lock);
 			recvqp->qp_taskqpending = B_FALSE;
@@ -1086,7 +1086,7 @@
 		(void) rds_is_recvq_empty(ep, B_TRUE);
 		ret = ibt_free_channel(ep->ep_chanhdl);
 		if (ret != IBT_SUCCESS) {
-			RDS_DPRINTF1("rds_ep_free_rc_channel", "EP(%p) "
+			RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) "
 			    "ibt_free_channel returned: %d", ep, ret);
 		}
 		ep->ep_chanhdl = NULL;
@@ -1099,7 +1099,7 @@
 	if (ep->ep_sendcq != NULL) {
 		ret = ibt_free_cq(ep->ep_sendcq);
 		if (ret != IBT_SUCCESS) {
-			RDS_DPRINTF1("rds_ep_free_rc_channel",
+			RDS_DPRINTF2("rds_ep_free_rc_channel",
 			    "EP(%p) - for sendcq, ibt_free_cq returned %d",
 			    ep, ret);
 		}
@@ -1113,7 +1113,7 @@
 	if (ep->ep_recvcq != NULL) {
 		ret = ibt_free_cq(ep->ep_recvcq);
 		if (ret != IBT_SUCCESS) {
-			RDS_DPRINTF1("rds_ep_free_rc_channel",
+			RDS_DPRINTF2("rds_ep_free_rc_channel",
 			    "EP(%p) - for recvcq, ibt_free_cq returned %d",
 			    ep, ret);
 		}