changeset 10231:d1cbfdc8cfa8

6605724 detection of SM fail-over must be sped up
author Rajkumar Sivaprakasam <Rajkumar.Sivaprakasam@Sun.COM>
date Fri, 31 Jul 2009 12:37:07 -0700
parents ea2823eb6ac6
children f37b85f7e03e
files usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h
diffstat 4 files changed, 64 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c	Fri Jul 31 10:26:44 2009 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c	Fri Jul 31 12:37:07 2009 -0700
@@ -1679,6 +1679,7 @@
 	if (ibdm_is_dev_mgt_supported(gid_info) != IBDM_SUCCESS) {
 		mutex_enter(&gid_info->gl_mutex);
 		gid_info->gl_state = IBDM_GID_PROBING_FAILED;
+		gid_info->gl_is_dm_capable = B_FALSE;
 		mutex_exit(&gid_info->gl_mutex);
 		ibdm_delete_glhca_list(gid_info);
 		mutex_enter(&ibdm.ibdm_mutex);
@@ -1688,6 +1689,13 @@
 		return;
 	}
 
+	/*
+	 * This GID is Device management capable
+	 */
+	mutex_enter(&gid_info->gl_mutex);
+	gid_info->gl_is_dm_capable = B_TRUE;
+	mutex_exit(&gid_info->gl_mutex);
+
 	/* Get the nodeguid and portguid of the port */
 	if (ibdm_get_node_port_guids(gid_info->gl_sa_hdl, gid_info->gl_dlid,
 	    &node_guid, &port_guid) != IBDM_SUCCESS) {
@@ -4284,6 +4292,7 @@
 				node_gid->gl_gid = temp_gid;
 				node_gid->gl_ngids++;
 			}
+			new_gid->gl_is_dm_capable = B_TRUE;
 			new_gid->gl_nodeguid = nodeguid;
 			new_gid->gl_portguid = dgid.gid_guid;
 			ibdm_addto_glhcalist(new_gid, hca_list);
@@ -5805,21 +5814,48 @@
 	if (ibmf_saa_event != IBMF_SAA_EVENT_GID_UNAVAILABLE)
 		return;
 
-	event_arg = (ibdm_saa_event_arg_t *)kmem_alloc(
-	    sizeof (ibdm_saa_event_arg_t), KM_SLEEP);
-	event_arg->ibmf_saa_handle = ibmf_saa_handle;
-	event_arg->ibmf_saa_event = ibmf_saa_event;
-	bcopy(event_details, &event_arg->event_details,
-	    sizeof (ibmf_saa_event_details_t));
-	event_arg->callback_arg = callback_arg;
-
-	if (taskq_dispatch(system_taskq, ibdm_saa_event_taskq,
-	    (void *)event_arg, TQ_NOSLEEP) == NULL) {
+	/*
+	 * GID UNAVAIL EVENT: Try to locate the GID in the GID list.
+	 * If we don't find it we just return.
+	 */
+	mutex_enter(&ibdm.ibdm_mutex);
+	gid_info = ibdm.ibdm_dp_gidlist_head;
+	while (gid_info) {
+		if (gid_info->gl_portguid ==
+		    event_details->ie_gid.gid_guid) {
+			break;
+		}
+		gid_info = gid_info->gl_next;
+	}
+	mutex_exit(&ibdm.ibdm_mutex);
+	if (gid_info == NULL) {
 		IBTF_DPRINTF_L2("ibdm", "\tsaa_event_cb: "
-		    "taskq_dispatch failed");
-		ibdm_free_saa_event_arg(event_arg);
+		    "GID for GUID %llX not found during GID UNAVAIL event",
+		    event_details->ie_gid.gid_guid);
 		return;
 	}
+
+	/*
+	 * If this GID is DM capable, we'll have to check whether this DGID
+	 * is reachable via another port.
+	 */
+	if (gid_info->gl_is_dm_capable == B_TRUE) {
+		event_arg = (ibdm_saa_event_arg_t *)kmem_alloc(
+		    sizeof (ibdm_saa_event_arg_t), KM_SLEEP);
+		event_arg->ibmf_saa_handle = ibmf_saa_handle;
+		event_arg->ibmf_saa_event = ibmf_saa_event;
+		bcopy(event_details, &event_arg->event_details,
+		    sizeof (ibmf_saa_event_details_t));
+		event_arg->callback_arg = callback_arg;
+
+		if (taskq_dispatch(system_taskq, ibdm_saa_event_taskq,
+		    (void *)event_arg, TQ_NOSLEEP) == NULL) {
+			IBTF_DPRINTF_L2("ibdm", "\tsaa_event_cb: "
+			    "taskq_dispatch failed");
+			ibdm_free_saa_event_arg(event_arg);
+			return;
+		}
+	}
 }
 
 /*
@@ -6680,7 +6716,7 @@
 				 * going down. This is ensured by
 				 * setting gl_disconnected to 1.
 				 */
-				if (gid_info->gl_nodeguid == 0)
+				if (gid_info->gl_is_dm_capable == B_FALSE)
 					gid_info->gl_disconnected = 1;
 				else
 					ibdm_reset_gidinfo(gid_info);
--- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c	Fri Jul 31 10:26:44 2009 -0700
+++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c	Fri Jul 31 12:37:07 2009 -0700
@@ -78,6 +78,7 @@
 int	ibmf_saa_trans_wait_time = IBMF_SAA_TRANS_WAIT_TIME_IN_SECS;
 int	ibmf_saa_max_resp_time = IBMF_SAA_MAX_RESP_TIME;
 int	ibmf_saa_max_subnet_timeout = IBMF_SAA_MAX_SUBNET_TIMEOUT;
+int	ibmf_saa_retrans_retries = IBMF_SAA_RETRANS_RETRIES;
 
 /*
  * ibmf_saa_impl_init:
@@ -3138,6 +3139,16 @@
 			msgp->im_msgbufs_send.im_bufs_mad_hdr->TransactionID =
 			    h2b64(saa_portp->saa_pt_current_tid++);
 
+			/*
+			 * We are going to retry the access to the SM but
+			 * Master SMLID could have changed due to a port change
+			 * event. So update the remote_lid of the message with
+			 * the SMLID from saa_portp for this port before the
+			 * retry.
+			 */
+			msgp->im_local_addr.ia_remote_lid =
+			    saa_portp->saa_pt_ibmf_addr_info.ia_remote_lid;
+
 			bcopy(&saa_portp->saa_pt_ibmf_retrans,
 			    &ibmf_retrans, sizeof (ibmf_retrans_t));
 
@@ -3710,7 +3721,7 @@
 	_NOTE(ASSUMING_PROTECTED(*saa_portp))
 
 	saa_portp->saa_pt_ibmf_retrans.retrans_retries =
-	    IBMF_SAA_RETRANS_RETRIES;
+	    ibmf_saa_retrans_retries;
 	/*
 	 * For the first transaction (generally getting the
 	 * classportinfo) have ibmf pick our timeouts.  It should be using the
--- a/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h	Fri Jul 31 10:26:44 2009 -0700
+++ b/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h	Fri Jul 31 12:37:07 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -176,6 +176,7 @@
 	ib_guid_t		gl_nodeguid;
 	ib_guid_t		gl_portguid;
 	ib_pkey_t		gl_p_key;
+	boolean_t		gl_is_dm_capable;
 	boolean_t		gl_redirected;
 	uint32_t		gl_redirect_dlid;
 	uint32_t		gl_redirect_QP;
--- a/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h	Fri Jul 31 10:26:44 2009 -0700
+++ b/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h	Fri Jul 31 12:37:07 2009 -0700
@@ -40,7 +40,7 @@
 #define	SAA_MAX_CLIENTS_PER_PORT	100
 #define	SAA_MAD_BASE_VERSION		1
 #define	SAA_MAD_CLASS_VERSION		2
-#define	IBMF_SAA_RETRANS_RETRIES 	2
+#define	IBMF_SAA_RETRANS_RETRIES 	0
 #define	IBMF_SAA_MAX_SUBNET_TIMEOUT 	20
 #define	IBMF_SAA_MAX_RESP_TIME		20
 #define	IBMF_SAA_MAX_BUSY_RETRY_COUNT	10