changeset 9913:b5cbbe12c671

6818976 fwflash: hermon plugin does not support MHQH29-XTC 6680837 IBCM Needs ULP reference counts for MCG Join/Leave 6783183 OFUV require new project private interfaces for CM event handling
author Shantkumar Hiremath<Shantkumar.Hiremath@Sun.COM>
date Fri, 19 Jun 2009 10:28:43 -0700
parents ebed7eb11a25
children 15092dda0737
files usr/src/cmd/fwflash/plugins/hdrs/MELLANOX.h usr/src/uts/common/io/ib/ibtl/ibtl_chan.c usr/src/uts/common/io/ib/ibtl/ibtl_qp.c usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c usr/src/uts/common/io/warlock/ibcm.wlcmd usr/src/uts/common/sys/ib/ibtl/ibti_cm.h usr/src/uts/common/sys/ib/ibtl/ibvti.h usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h
diffstat 10 files changed, 333 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/fwflash/plugins/hdrs/MELLANOX.h	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/cmd/fwflash/plugins/hdrs/MELLANOX.h	Fri Jun 19 10:28:43 2009 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -181,6 +181,11 @@
 	{ "375-3605-01",	"SUN0160000001",	"Sun Mirage QDR" },
 	{ "375-3606-01",	"SUN0150000001",	"Sun Falcon QDR" },
 	{ "MHJH29-XTC",		"MT_04E0110003",	"Eagle QDR" },
+	{ "MHJH29-XSC",		"MT_0500120005", "Eagle QDR PCIe Gen 2.0" },
+	{ "MHQH29-XTC",		"MT_04E0120005", "Eagle QDR PCIe Gen 2.0" },
+	{ "MHQH19-XTC",		"MT_0C40110009", "Falcon QDR PCIe Gen 2.0" },
+	{ "MHQH29-XTC",		"MT_0BB0110003", "Falcon QDR PCIe Gen 2.0" },
+	{ "MHQH29-XTC",		"MT_0BB0120003", "Falcon QDR PCIe Gen 2.0" },
 	{ "375-3551-05",	"SUN0080000001",	"Sun C48-IB-NEM" }
 };
 
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_chan.c	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_chan.c	Fri Jun 19 10:28:43 2009 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * ibtl_chan.c
  *
@@ -201,13 +198,6 @@
 		return (retval);
 	}
 
-	/*
-	 * The IBTA spec does not include the signal type or PD on a QP
-	 * query operation. In order to implement the "CLONE" feature
-	 * we need to cache these values.
-	 */
-	chanp->ch_qp.qp_flags = qp_attr.qp_flags;
-	chanp->ch_qp.qp_pd_hdl = qp_attr.qp_pd_hdl;
 	*rc_chan_p = chanp;
 
 	IBTF_DPRINTF_L3(ibtl_chan, "ibt_alloc_rc_channel(%p): - SUCCESS (%p)",
@@ -534,13 +524,6 @@
 		return (retval);
 	}
 
-	/*
-	 * The IBTA spec does not include the signal type or PD on a QP
-	 * query operation. In order to implement the "CLONE" feature
-	 * we need to cache these values.
-	 */
-	chanp->ch_qp.qp_flags = qp_attr.qp_flags;
-	chanp->ch_qp.qp_pd_hdl = qp_attr.qp_pd_hdl;
 	*ud_chan_p = chanp;
 
 	IBTF_DPRINTF_L3(ibtl_chan, "ibt_alloc_ud_channel(%p): - SUCCESS (%p)",
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_qp.c	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_qp.c	Fri Jun 19 10:28:43 2009 -0700
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/ib/ibtl/impl/ibtl.h>
 #include <sys/ib/ibtl/impl/ibtl_cm.h>
 
@@ -178,6 +176,13 @@
 	chanp->ch_qp.qp_send_cq = qp_attrp->qp_scq_hdl;
 	chanp->ch_qp.qp_recv_cq = qp_attrp->qp_rcq_hdl;
 	chanp->ch_current_state = IBT_STATE_RESET;
+	/*
+	 * The IBTA spec does not include the signal type or PD on a QP
+	 * query operation. In order to implement the "CLONE" feature
+	 * we need to cache these values.  Mostly used by TI client.
+	 */
+	chanp->ch_qp.qp_flags = qp_attrp->qp_flags;
+	chanp->ch_qp.qp_pd_hdl = qp_attrp->qp_pd_hdl;
 	mutex_init(&chanp->ch_cm_mutex, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&chanp->ch_cm_cv, NULL, CV_DEFAULT, NULL);
 
@@ -385,6 +390,10 @@
 	mutex_init(&chanp->ch_cm_mutex, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&chanp->ch_cm_cv, NULL, CV_DEFAULT, NULL);
 
+	/* Updating these variable, so that debugger shows correct values. */
+	chanp->ch_qp.qp_flags = qp_attrp->qp_flags;
+	chanp->ch_qp.qp_pd_hdl = qp_attrp->qp_pd_hdl;
+
 	mutex_enter(&hca_hdl->ha_mutex);
 	hca_hdl->ha_qp_cnt++;
 	mutex_exit(&hca_hdl->ha_mutex);
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c	Fri Jun 19 10:28:43 2009 -0700
@@ -40,7 +40,7 @@
 static ibcm_status_t	ibcm_init(void);
 static ibcm_status_t	ibcm_fini(void);
 
-/* Routines to initialize and destory CM global locks and CVs */
+/* Routines to initialize and destroy CM global locks and CVs */
 static void		ibcm_init_locks(void);
 static void		ibcm_fini_locks(void);
 
@@ -88,6 +88,7 @@
 taskq_t			*ibcm_taskq = NULL;
 int			taskq_dispatch_fail_cnt;
 
+kmutex_t		ibcm_mcglist_lock;	/* MCG list lock */
 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
@@ -187,8 +188,6 @@
 	ibcm_state_data_t	head;
 } ibcm_open;
 
-static void ibcm_open_task(void *);
-
 /*
  * Flow control logic for SA access and close_rc_channel calls follows.
  */
@@ -254,7 +253,7 @@
 /* the following globals are CM tunables */
 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
 
-uint32_t	ibcm_max_retries = IBCM_MAX_RETRIES;
+uint8_t		ibcm_max_retries = IBCM_MAX_RETRIES;
 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
@@ -262,7 +261,6 @@
 
 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
 uint32_t	ibcm_wait_for_acc_cnt_timeout = 2000000;	/* 2 sec */
-uint32_t	ibcm_wait_for_res_cnt_timeout = 2000000;	/* 2 sec */
 
 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
@@ -475,6 +473,7 @@
 
 	/* Create all global locks within cm module */
 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&ibcm_mcglist_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -501,6 +500,7 @@
 {
 	/* Destroy all global locks within cm module */
 	mutex_destroy(&ibcm_svc_info_lock);
+	mutex_destroy(&ibcm_mcglist_lock);
 	mutex_destroy(&ibcm_timeout_list_lock);
 	mutex_destroy(&ibcm_global_hca_lock);
 	mutex_destroy(&ibcm_sa_open_lock);
@@ -825,7 +825,7 @@
 {
 	int			i;
 	ibt_status_t		status;
-	uint_t			nports = 0;
+	uint8_t			nports = 0;
 	ibcm_hca_info_t		*hcap;
 	ibt_hca_attr_t		hca_attrs;
 
@@ -996,17 +996,6 @@
 	while (hcap->hca_res_cnt > 0)
 		cv_wait(&ibcm_global_hca_cv, &ibcm_global_hca_lock);
 
-	if (hcap->hca_res_cnt != 0) {
-		/* We got a timeout waiting for hca_res_cnt to become 0 */
-		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
-		    " to timeout on res_cnt %d, \n Some CM connections are "
-		    "still in transient state, looks like we need to wait "
-		    "some more time (ibcm_wait_for_res_cnt_timeout).",
-		    hcap->hca_res_cnt);
-		hcap->hca_state = IBCM_HCA_ACTIVE;
-		return (IBCM_FAILURE);
-	}
-
 	/* Re-assert the while loop step above */
 	ASSERT(hcap->hca_sidr_list == NULL);
 	avl_destroy(&hcap->hca_active_tree);
@@ -1931,8 +1920,8 @@
 	ibtl_cm_sm_init_fail_t	*ifail;
 	ib_gid_t		*sgidp;
 
-	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices: ibt_hdl = %p",
-	    ibt_hdl);
+	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices(%p, %s)",
+	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl));
 
 	mutex_enter(&ibcm_sm_notice_serialize_lock);
 
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_sm.c	Fri Jun 19 10:28:43 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -4535,7 +4535,6 @@
 			mutex_enter(&ud_statep->ud_state_mutex);
 			IBCM_UD_REF_CNT_DECR(ud_statep);
 			mutex_exit(&ud_statep->ud_state_mutex);
-			ibcm_dec_hca_res_cnt(hcap);
 			ibcm_delete_ud_state_data(ud_statep);
 			return;
 		}
@@ -4544,7 +4543,6 @@
 		ud_statep->ud_svc_id = b2h64(sidr_reqp->sidr_req_service_id);
 		ud_statep->ud_state  = IBCM_STATE_SIDR_REQ_RCVD;
 		ud_statep->ud_clnt_proceed = IBCM_BLOCK;
-		ud_statep->ud_hcap = hcap;
 
 		mutex_enter(&ibcm_svc_info_lock);
 
@@ -4907,6 +4905,7 @@
 	ibcm_ud_state_data_t	*ud_statep = (ibcm_ud_state_data_t *)arg;
 
 	mutex_enter(&ud_statep->ud_state_mutex);
+	ud_statep->ud_timerid = 0;
 
 	IBTF_DPRINTF_L3(cmlog, "ibcm_sidr_timeout_cb: ud_statep 0x%p "
 	    "state = 0x%x", ud_statep, ud_statep->ud_state);
@@ -6115,6 +6114,8 @@
 	ibcm_clnt_reply_info_t	clnt_info;
 
 	IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_req: statep 0x%p", statep);
+	IBTF_DPRINTF_L4(cmlog, "ibcm_cep_state_req: SID 0x%lX",
+	    b2h64(cm_req_msgp->req_svc_id));
 	/* client handler should be valid */
 	ASSERT(statep->cm_handler != NULL);
 
@@ -6240,8 +6241,10 @@
 	ibcm_insert_trace(statep, IBCM_TRACE_CALLED_REQ_RCVD_EVENT);
 
 	/* Invoke the client handler */
+	statep->req_msgp = cm_req_msgp;
 	cb_status = statep->cm_handler(statep->state_cm_private, &event,
 	    &ret_args, priv_data, IBT_REP_PRIV_DATA_SZ);
+	statep->req_msgp = NULL;
 
 	ibcm_insert_trace(statep, IBCM_TRACE_RET_REQ_RCVD_EVENT);
 
@@ -6395,7 +6398,8 @@
 			return (IBCM_SEND_REJ);
 		}
 
-		if (qp_attrs.qp_info.qp_state != IBT_STATE_INIT) {
+		if (qp_attrs.qp_info.qp_state != IBT_STATE_INIT &&
+		    statep->skip_rtr == 0) {
 			IBTF_DPRINTF_L3(cmlog, "ibcm_process_cep_req_cm_hdlr: "
 			    "qp state != INIT on server");
 			*reject_reason = IBT_CM_CHAN_INVALID_STATE;
@@ -6403,8 +6407,30 @@
 			    IBT_CM_FAILURE_REQ, IBT_CM_CHAN_INVALID_STATE,
 			    NULL, 0);
 			return (IBCM_SEND_REJ);
+		} else if (qp_attrs.qp_info.qp_state != IBT_STATE_RTR &&
+		    statep->skip_rtr == 1) {
+			IBTF_DPRINTF_L3(cmlog, "ibcm_process_cep_req_cm_hdlr: "
+			    "qp state != RTR on server");
+			*reject_reason = IBT_CM_CHAN_INVALID_STATE;
+			ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
+			    IBT_CM_FAILURE_REQ, IBT_CM_CHAN_INVALID_STATE,
+			    NULL, 0);
+			return (IBCM_SEND_REJ);
 		}
 
+		if (statep->skip_rtr &&
+		    qp_attrs.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
+		    statep->prim_port) {
+			IBTF_DPRINTF_L2(cmlog, "ibcm_process_cep_req_cm_hdlr: "
+			    "QP port invalid");
+			*reject_reason = IBT_CM_CHAN_INVALID_STATE;
+			ibcm_handler_conn_fail(statep, IBT_CM_FAILURE_REJ_SENT,
+			    IBT_CM_FAILURE_REQ, IBT_CM_CHAN_INVALID_STATE,
+			    NULL, 0);
+			return (IBCM_SEND_REJ);
+		} else if (statep->skip_rtr)
+			goto skip_init_trans;
+
 		/* Init to Init, if required */
 		if (qp_attrs.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
 		    statep->prim_port) {
@@ -6456,6 +6482,8 @@
 				    status);
 			}
 		}
+skip_init_trans:
+		/* Do sanity tests even if we are skipping RTR */
 
 		/* fill in the REP msg based on ret_args from client */
 		if (clnt_info->reply_event->rep.cm_rdma_ra_out >
@@ -6521,6 +6549,9 @@
 		bcopy(&local_ca_guid, rep_msgp->rep_local_ca_guid,
 		    sizeof (ib_guid_t));
 
+		if (statep->skip_rtr)
+			goto skip_rtr_trans;
+
 		/* Transition QP from Init to RTR state */
 		if (ibcm_invoke_qp_modify(statep, cm_req_msg, rep_msgp) !=
 		    IBT_SUCCESS) {
@@ -6533,6 +6564,7 @@
 			    IBT_CM_FAILURE_REQ, IBT_CM_CI_FAILURE, NULL, 0);
 			return (IBCM_SEND_REJ);
 		}
+skip_rtr_trans:
 
 		/*
 		 * Link statep and channel, once CM determines it is
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c	Fri Jun 19 10:28:43 2009 -0700
@@ -687,7 +687,7 @@
 		req_msgp->req_mtu_plus = IB_MTU_1K << 4 |
 		    chan_args->oc_path_rnr_retry_cnt;
 		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p PathMTU"
-		    " overidden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K,
+		    " overridden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K,
 		    chan_args->oc_path->pi_path_mtu);
 	} else
 		req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 |
@@ -3815,6 +3815,77 @@
 	kmem_free(pup, sizeof (ibcm_port_up_t));
 }
 
+ibt_status_t
+ibt_ofuvcm_get_req_data(void *session_id, ibt_ofuvcm_req_data_t *req_data)
+{
+	ibcm_state_data_t 	*statep = (ibcm_state_data_t *)session_id;
+	ibcm_req_msg_t 		*req_msgp;
+
+	IBTF_DPRINTF_L3(cmlog, "ibt_get_ofuvcm_req_data: session_id %p",
+	    session_id);
+	mutex_enter(&statep->state_mutex);
+	if ((statep->state != IBCM_STATE_REQ_RCVD) &&
+	    (statep->state != IBCM_STATE_MRA_SENT)) {
+		IBTF_DPRINTF_L2(cmlog, "ibt_get_ofuvcm_req_data: Invalid "
+		    "State %x", statep->state);
+		mutex_exit(&statep->state_mutex);
+		return (IBT_CHAN_STATE_INVALID);
+	}
+	if (statep->mode == IBCM_ACTIVE_MODE) {
+		IBTF_DPRINTF_L2(cmlog, "ibt_get_ofuvcm_req_data: Active mode "
+		    "not supported");
+		mutex_exit(&statep->state_mutex);
+		return (IBT_INVALID_PARAM);
+	}
+	ASSERT(statep->req_msgp);
+
+	/*
+	 * Fill in the additional req message values reqired for
+	 * RTR transition.
+	 * Should the PSN be same as the active side??
+	 */
+	req_msgp = (ibcm_req_msg_t *)statep->req_msgp;
+	req_data->req_rnr_nak_time = ibcm_default_rnr_nak_time;
+	req_data->req_path_mtu = req_msgp->req_mtu_plus >> 4;
+	req_data->req_rq_psn = b2h32(req_msgp->req_starting_psn_plus) >> 8;
+	mutex_exit(&statep->state_mutex);
+	return (IBT_SUCCESS);
+}
+
+ibt_status_t
+ibt_ofuvcm_proceed(ibt_cm_event_type_t event, void *session_id,
+    ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
+    void *priv_data, ibt_priv_data_len_t priv_data_len)
+{
+	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
+	ibt_status_t		ret;
+
+	IBTF_DPRINTF_L3(cmlog, "ibt_ofuvcm_proceed chan 0x%p event %x "
+	    "status %x session_id %p", statep->channel, event, status,
+	    session_id);
+
+	IBTF_DPRINTF_L5(cmlog, "ibt_ofuvcm_proceed chan 0x%p "
+	    "cm_event_data %p, priv_data %p priv_data_len %x",
+	    statep->channel, cm_event_data, priv_data, priv_data_len);
+
+	/* validate session_id and status */
+	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
+		IBTF_DPRINTF_L2(cmlog, "ibt_ofuvcm_proceed : Invalid Args");
+		return (IBT_INVALID_PARAM);
+	}
+
+	if (event != IBT_CM_EVENT_REQ_RCV) {
+		IBTF_DPRINTF_L2(cmlog, "ibt_ofuvcm_proceed : only for REQ_RCV");
+		return (IBT_INVALID_PARAM);
+	}
+	mutex_enter(&statep->state_mutex);
+	statep->skip_rtr = 1;
+	mutex_exit(&statep->state_mutex);
+
+	ret = ibt_cm_proceed(event, session_id, status, cm_event_data,
+	    priv_data, priv_data_len);
+	return (ret);
+}
 
 /*
  * Function:
@@ -3989,6 +4060,10 @@
 		ibcm_handle_cep_req_response(statep, response, reject_reason,
 		    arej_len);
 
+		mutex_enter(&statep->state_mutex);
+		statep->skip_rtr = 0;
+		mutex_exit(&statep->state_mutex);
+
 	} else if (proceed_targs->event == IBT_CM_EVENT_REP_RCV) {
 		response =
 		    ibcm_process_cep_rep_cm_hdlr(statep, proceed_targs->status,
@@ -4573,6 +4648,22 @@
 
 #endif
 
+/* For MCG List search */
+typedef struct ibcm_mcg_list_s {
+	struct ibcm_mcg_list_s	*ml_next;
+	ib_gid_t		ml_sgid;
+	ib_gid_t		ml_mgid;
+	ib_pkey_t		ml_pkey;
+	ib_qkey_t		ml_qkey;
+	uint_t			ml_refcnt;
+	uint8_t			ml_jstate;
+} ibcm_mcg_list_t;
+
+ibcm_mcg_list_t	*ibcm_mcglist = NULL;
+
+_NOTE(MUTEX_PROTECTS_DATA(ibcm_mcglist_lock, ibcm_mcg_list_s))
+_NOTE(MUTEX_PROTECTS_DATA(ibcm_mcglist_lock, ibcm_mcglist))
+
 typedef struct ibcm_join_mcg_tqarg_s {
 	ib_gid_t		rgid;
 	ibt_mcg_attr_t		mcg_attr;
@@ -4583,6 +4674,135 @@
 
 _NOTE(READ_ONLY_DATA(ibcm_join_mcg_tqarg_s))
 
+void
+ibcm_add_incr_mcg_entry(sa_mcmember_record_t *mcg_req,
+    sa_mcmember_record_t *mcg_resp)
+{
+	ibcm_mcg_list_t	*new = NULL;
+	ibcm_mcg_list_t	*head = NULL;
+
+	IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: MGID %llX:%llX"
+	    "\n SGID %llX:%llX, JState %X)", mcg_req->MGID.gid_prefix,
+	    mcg_req->MGID.gid_guid, mcg_req->PortGID.gid_prefix,
+	    mcg_req->PortGID.gid_guid, mcg_req->JoinState);
+
+	mutex_enter(&ibcm_mcglist_lock);
+	head = ibcm_mcglist;
+
+	while (head != NULL) {
+		if ((head->ml_mgid.gid_guid == mcg_resp->MGID.gid_guid) &&
+		    (head->ml_mgid.gid_prefix == mcg_resp->MGID.gid_prefix) &&
+		    (head->ml_sgid.gid_guid == mcg_resp->PortGID.gid_guid)) {
+			/* Increment the count */
+			head->ml_refcnt++;
+			/* OR the join_state value, we need this during leave */
+			head->ml_jstate |= mcg_req->JoinState;
+
+			IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: Entry "
+			    "FOUND: refcnt %d JState %X", head->ml_refcnt,
+			    head->ml_jstate);
+
+			mutex_exit(&ibcm_mcglist_lock);
+			return;
+		}
+		head = head->ml_next;
+	}
+	mutex_exit(&ibcm_mcglist_lock);
+
+	IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: Create NEW Entry ");
+
+	/* If we are here, either list is empty or match couldn't be found */
+	new = kmem_zalloc(sizeof (ibcm_mcg_list_t), KM_SLEEP);
+
+	mutex_enter(&ibcm_mcglist_lock);
+	/* Initialize the fields */
+	new->ml_sgid = mcg_resp->PortGID;
+	new->ml_mgid = mcg_resp->MGID;
+	new->ml_qkey = mcg_req->Q_Key;
+	new->ml_pkey = mcg_req->P_Key;
+	new->ml_refcnt = 1; /* As this is the first entry */
+	new->ml_jstate = mcg_req->JoinState;
+	new->ml_next = NULL;
+
+	new->ml_next = ibcm_mcglist;
+	ibcm_mcglist = new;
+	mutex_exit(&ibcm_mcglist_lock);
+}
+
+/*
+ * ibcm_del_decr_mcg_entry
+ *
+ * Return value:
+ * IBCM_SUCCESS		Entry found and ref_cnt is now zero. So go-ahead and
+ * 			leave the MCG group. The return arg *jstate will have
+ * 			a valid join_state value that needed to be used by
+ * 			xxx_leave_mcg().
+ * IBCM_LOOKUP_EXISTS	Entry found and ref_cnt is decremented but is NOT zero.
+ * 			So do not leave the MCG group yet.
+ * IBCM_LOOKUP_FAIL	Entry is NOT found.
+ */
+ibcm_status_t
+ibcm_del_decr_mcg_entry(sa_mcmember_record_t *mcg_req, uint8_t *jstate)
+{
+	ibcm_mcg_list_t	*head, *prev;
+
+	IBTF_DPRINTF_L3(cmlog, "ibcm_del_decr_mcg_entry: MGID %llX:%llX"
+	    "\n SGID %llX:%llX, JState %X)", mcg_req->MGID.gid_prefix,
+	    mcg_req->MGID.gid_guid, mcg_req->PortGID.gid_prefix,
+	    mcg_req->PortGID.gid_guid, mcg_req->JoinState);
+
+	*jstate = 0;
+
+	mutex_enter(&ibcm_mcglist_lock);
+	head = ibcm_mcglist;
+	prev = NULL;
+
+	while (head != NULL) {
+		if ((head->ml_mgid.gid_guid == mcg_req->MGID.gid_guid) &&
+		    (head->ml_mgid.gid_prefix == mcg_req->MGID.gid_prefix) &&
+		    (head->ml_sgid.gid_guid == mcg_req->PortGID.gid_guid)) {
+			if (!(head->ml_jstate & mcg_req->JoinState)) {
+				IBTF_DPRINTF_L2(cmlog, "ibcm_del_decr_mcg_entry"
+				    ": JoinState mismatch %X %X)",
+				    head->ml_jstate, mcg_req->JoinState);
+			}
+			/* Decrement the count */
+			head->ml_refcnt--;
+
+			if (head->ml_refcnt == 0) {
+				*jstate = head->ml_jstate;
+
+				IBTF_DPRINTF_L3(cmlog, "ibcm_del_decr_mcg_entry"
+				    ": refcnt is ZERO, so delete the entry ");
+				if ((head == ibcm_mcglist) || (prev == NULL)) {
+					ibcm_mcglist = head->ml_next;
+				} else if (prev != NULL) {
+					prev->ml_next = head->ml_next;
+				}
+				mutex_exit(&ibcm_mcglist_lock);
+
+				kmem_free(head, sizeof (ibcm_mcg_list_t));
+				return (IBCM_SUCCESS);
+			}
+			mutex_exit(&ibcm_mcglist_lock);
+			return (IBCM_LOOKUP_EXISTS);
+		}
+		prev = head;
+		head = head->ml_next;
+	}
+	mutex_exit(&ibcm_mcglist_lock);
+
+	/*
+	 * If we are here, something went wrong, we don't have the entry
+	 * for that MCG being joined.
+	 */
+	IBTF_DPRINTF_L2(cmlog, "ibcm_del_decr_mcg_entry: Match NOT "
+	    "Found ");
+
+	return (IBCM_LOOKUP_FAIL);
+}
+
+
 /*
  * Function:
  *	ibt_join_mcg
@@ -4879,8 +5099,11 @@
 		mcg_info_p->mc_adds_vect.av_sgid_ix = hca_port.hp_sgid_ix;
 		mcg_info_p->mc_adds_vect.av_src_path = 0;
 
+		/* Add or Incr the matching MCG entry. */
+		ibcm_add_incr_mcg_entry(&mcg_req, mcg_resp);
 		/* Deallocate the memory allocated by SA for mcg_resp. */
 		kmem_free(mcg_resp, length);
+
 		retval = IBT_SUCCESS;
 	} else {
 		retval = IBT_MCG_RECORDS_NOT_FOUND;
@@ -4959,10 +5182,12 @@
 	uint64_t		component_mask = 0;
 	int			sa_retval;
 	ibt_status_t		retval;
+	ibcm_status_t		ret;
 	ibtl_cm_hca_port_t	hca_port;
 	size_t			length;
 	void			*results_p;
 	ibcm_hca_info_t		*hcap;
+	uint8_t			jstate = 0;
 
 	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, %llX:%llX)",
 	    rgid.gid_prefix, rgid.gid_guid, mc_gid.gid_prefix, mc_gid.gid_guid);
@@ -5004,6 +5229,22 @@
 	mcg_req.JoinState = mc_join_state;
 	component_mask |= SA_MC_COMPMASK_JOINSTATE;
 
+	ret = ibcm_del_decr_mcg_entry(&mcg_req, &jstate);
+	if (ret == IBCM_LOOKUP_EXISTS) {
+		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Multiple JoinMCG record "
+		    " still exists, we shall leave for last leave_mcg call");
+		return (IBT_SUCCESS);
+	} else if (ret == IBCM_LOOKUP_FAIL) {
+		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: No Record found, "
+		    "continue with leave_mcg call");
+	} else if ((ret == IBCM_SUCCESS) && (jstate != 0)) {
+		/*
+		 * Update with cached "jstate", as this will be OR'ed of
+		 * all ibt_join_mcg() calls for this record.
+		 */
+		mcg_req.JoinState = jstate;
+	}
+
 	retval = ibtl_cm_get_hca_port(rgid, 0, &hca_port);
 	if (retval != IBT_SUCCESS) {
 		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: Failed to get port info "
--- a/usr/src/uts/common/io/warlock/ibcm.wlcmd	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/io/warlock/ibcm.wlcmd	Fri Jun 19 10:28:43 2009 -0700
@@ -63,6 +63,9 @@
 root	ibt_release_ip_sid 
 root	ibt_get_src_ip
 
+root	ibt_ofuvcm_get_req_data
+root	ibt_ofuvcm_proceed
+
 root	ibcm_arp_timeout
 root	ibcm_arp_get_srcip_plist
 root	ibcm_arp_lrput
--- a/usr/src/uts/common/sys/ib/ibtl/ibti_cm.h	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/sys/ib/ibtl/ibti_cm.h	Fri Jun 19 10:28:43 2009 -0700
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_IB_IBTL_IBTI_CM_H
 #define	_SYS_IB_IBTL_IBTI_CM_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * ibti_cm.h
  *
@@ -437,6 +435,13 @@
 	ib_eecn_t	req_opaque2;
 } ibt_cm_req_rcv_t;
 
+typedef struct ibt_ofuvcm_req_data_s {
+	uint32_t		req_rq_psn:24;
+	uint32_t		reserved:8;
+	ib_mtu_t		req_path_mtu;
+	ibt_rnr_nak_time_t	req_rnr_nak_time;
+} ibt_ofuvcm_req_data_t;
+
 /*
  * The IBT_CM_EVENT_CONN_CLOSED event is generated by the CM when a connection
  * has been closed. The reason the connection was closed is given in the
--- a/usr/src/uts/common/sys/ib/ibtl/ibvti.h	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/sys/ib/ibtl/ibvti.h	Fri Jun 19 10:28:43 2009 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef	_SYS_IB_IBTL_IBVTI_H
 #define	_SYS_IB_IBTL_IBVTI_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * ibvti.h
  *
@@ -260,6 +257,12 @@
  */
 ibt_status_t ibt_get_module_failure(ibt_failure_type_t type, uint64_t ena);
 
+ibt_status_t ibt_ofuvcm_get_req_data(void *, ibt_ofuvcm_req_data_t *);
+
+ibt_status_t ibt_ofuvcm_proceed(ibt_cm_event_type_t, void *,
+    ibt_cm_status_t, ibt_cm_proceed_reply_t *, void *,
+    ibt_priv_data_len_t);
+
 #ifdef __cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h	Fri Jun 19 10:07:38 2009 -0700
+++ b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h	Fri Jun 19 10:28:43 2009 -0700
@@ -421,6 +421,7 @@
 	boolean_t		delete_mra_msg;
 	boolean_t		stale;
 	boolean_t		delete_state_data;
+	boolean_t		skip_rtr;
 
 	boolean_t		open_done;
 	boolean_t		close_done;
@@ -465,6 +466,9 @@
 
 	struct ibcm_conn_trace_s	*conn_trace;
 
+	/* For ibt_ofuvcm_get_req_data() */
+	void			*req_msgp;
+
 } ibcm_state_data_t;
 
 _NOTE(MUTEX_PROTECTS_DATA(ibcm_state_data_s::state_mutex,
@@ -2049,7 +2053,7 @@
 extern ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr,
 				*ibcm_ud_timeout_list_tail;
 /* Default global retry counts */
-extern uint32_t		ibcm_max_retries;
+extern uint8_t		ibcm_max_retries;
 extern uint32_t		ibcm_max_sa_retries;
 extern int		ibcm_sa_timeout_delay;	/* in ticks */
 
@@ -2068,6 +2072,7 @@
 
 /* Global locks */
 extern kmutex_t		ibcm_svc_info_lock;
+extern kmutex_t		ibcm_mcglist_lock;
 extern kmutex_t		ibcm_global_hca_lock;
 extern kmutex_t		ibcm_qp_list_lock;
 extern kmutex_t		ibcm_timeout_list_lock;