Mercurial > illumos > illumos-gate
changeset 10231:d1cbfdc8cfa8
6605724 detection of SM fail-over must be sped up
author | Rajkumar Sivaprakasam <Rajkumar.Sivaprakasam@Sun.COM> |
---|---|
date | Fri, 31 Jul 2009 12:37:07 -0700 |
parents | ea2823eb6ac6 |
children | f37b85f7e03e |
files | usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h |
diffstat | 4 files changed, 64 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c Fri Jul 31 10:26:44 2009 -0700 +++ b/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c Fri Jul 31 12:37:07 2009 -0700 @@ -1679,6 +1679,7 @@ if (ibdm_is_dev_mgt_supported(gid_info) != IBDM_SUCCESS) { mutex_enter(&gid_info->gl_mutex); gid_info->gl_state = IBDM_GID_PROBING_FAILED; + gid_info->gl_is_dm_capable = B_FALSE; mutex_exit(&gid_info->gl_mutex); ibdm_delete_glhca_list(gid_info); mutex_enter(&ibdm.ibdm_mutex); @@ -1688,6 +1689,13 @@ return; } + /* + * This GID is Device management capable + */ + mutex_enter(&gid_info->gl_mutex); + gid_info->gl_is_dm_capable = B_TRUE; + mutex_exit(&gid_info->gl_mutex); + /* Get the nodeguid and portguid of the port */ if (ibdm_get_node_port_guids(gid_info->gl_sa_hdl, gid_info->gl_dlid, &node_guid, &port_guid) != IBDM_SUCCESS) { @@ -4284,6 +4292,7 @@ node_gid->gl_gid = temp_gid; node_gid->gl_ngids++; } + new_gid->gl_is_dm_capable = B_TRUE; new_gid->gl_nodeguid = nodeguid; new_gid->gl_portguid = dgid.gid_guid; ibdm_addto_glhcalist(new_gid, hca_list); @@ -5805,21 +5814,48 @@ if (ibmf_saa_event != IBMF_SAA_EVENT_GID_UNAVAILABLE) return; - event_arg = (ibdm_saa_event_arg_t *)kmem_alloc( - sizeof (ibdm_saa_event_arg_t), KM_SLEEP); - event_arg->ibmf_saa_handle = ibmf_saa_handle; - event_arg->ibmf_saa_event = ibmf_saa_event; - bcopy(event_details, &event_arg->event_details, - sizeof (ibmf_saa_event_details_t)); - event_arg->callback_arg = callback_arg; - - if (taskq_dispatch(system_taskq, ibdm_saa_event_taskq, - (void *)event_arg, TQ_NOSLEEP) == NULL) { + /* + * GID UNAVAIL EVENT: Try to locate the GID in the GID list. + * If we don't find it we just return. + */ + mutex_enter(&ibdm.ibdm_mutex); + gid_info = ibdm.ibdm_dp_gidlist_head; + while (gid_info) { + if (gid_info->gl_portguid == + event_details->ie_gid.gid_guid) { + break; + } + gid_info = gid_info->gl_next; + } + mutex_exit(&ibdm.ibdm_mutex); + if (gid_info == NULL) { IBTF_DPRINTF_L2("ibdm", "\tsaa_event_cb: " - "taskq_dispatch failed"); - ibdm_free_saa_event_arg(event_arg); + "GID for GUID %llX not found during GID UNAVAIL event", + event_details->ie_gid.gid_guid); return; } + + /* + * If this GID is DM capable, we'll have to check whether this DGID + * is reachable via another port. + */ + if (gid_info->gl_is_dm_capable == B_TRUE) { + event_arg = (ibdm_saa_event_arg_t *)kmem_alloc( + sizeof (ibdm_saa_event_arg_t), KM_SLEEP); + event_arg->ibmf_saa_handle = ibmf_saa_handle; + event_arg->ibmf_saa_event = ibmf_saa_event; + bcopy(event_details, &event_arg->event_details, + sizeof (ibmf_saa_event_details_t)); + event_arg->callback_arg = callback_arg; + + if (taskq_dispatch(system_taskq, ibdm_saa_event_taskq, + (void *)event_arg, TQ_NOSLEEP) == NULL) { + IBTF_DPRINTF_L2("ibdm", "\tsaa_event_cb: " + "taskq_dispatch failed"); + ibdm_free_saa_event_arg(event_arg); + return; + } + } } /* @@ -6680,7 +6716,7 @@ * going down. This is ensured by * setting gl_disconnected to 1. */ - if (gid_info->gl_nodeguid == 0) + if (gid_info->gl_is_dm_capable == B_FALSE) gid_info->gl_disconnected = 1; else ibdm_reset_gidinfo(gid_info);
--- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c Fri Jul 31 10:26:44 2009 -0700 +++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_saa_impl.c Fri Jul 31 12:37:07 2009 -0700 @@ -78,6 +78,7 @@ int ibmf_saa_trans_wait_time = IBMF_SAA_TRANS_WAIT_TIME_IN_SECS; int ibmf_saa_max_resp_time = IBMF_SAA_MAX_RESP_TIME; int ibmf_saa_max_subnet_timeout = IBMF_SAA_MAX_SUBNET_TIMEOUT; +int ibmf_saa_retrans_retries = IBMF_SAA_RETRANS_RETRIES; /* * ibmf_saa_impl_init: @@ -3138,6 +3139,16 @@ msgp->im_msgbufs_send.im_bufs_mad_hdr->TransactionID = h2b64(saa_portp->saa_pt_current_tid++); + /* + * We are going to retry the access to the SM but + * Master SMLID could have changed due to a port change + * event. So update the remote_lid of the message with + * the SMLID from saa_portp for this port before the + * retry. + */ + msgp->im_local_addr.ia_remote_lid = + saa_portp->saa_pt_ibmf_addr_info.ia_remote_lid; + bcopy(&saa_portp->saa_pt_ibmf_retrans, &ibmf_retrans, sizeof (ibmf_retrans_t)); @@ -3710,7 +3721,7 @@ _NOTE(ASSUMING_PROTECTED(*saa_portp)) saa_portp->saa_pt_ibmf_retrans.retrans_retries = - IBMF_SAA_RETRANS_RETRIES; + ibmf_saa_retrans_retries; /* * For the first transaction (generally getting the * classportinfo) have ibmf pick our timeouts. It should be using the
--- a/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h Fri Jul 31 10:26:44 2009 -0700 +++ b/usr/src/uts/common/sys/ib/mgt/ibdm/ibdm_impl.h Fri Jul 31 12:37:07 2009 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -176,6 +176,7 @@ ib_guid_t gl_nodeguid; ib_guid_t gl_portguid; ib_pkey_t gl_p_key; + boolean_t gl_is_dm_capable; boolean_t gl_redirected; uint32_t gl_redirect_dlid; uint32_t gl_redirect_QP;
--- a/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h Fri Jul 31 10:26:44 2009 -0700 +++ b/usr/src/uts/common/sys/ib/mgt/ibmf/ibmf_saa_impl.h Fri Jul 31 12:37:07 2009 -0700 @@ -40,7 +40,7 @@ #define SAA_MAX_CLIENTS_PER_PORT 100 #define SAA_MAD_BASE_VERSION 1 #define SAA_MAD_CLASS_VERSION 2 -#define IBMF_SAA_RETRANS_RETRIES 2 +#define IBMF_SAA_RETRANS_RETRIES 0 #define IBMF_SAA_MAX_SUBNET_TIMEOUT 20 #define IBMF_SAA_MAX_RESP_TIME 20 #define IBMF_SAA_MAX_BUSY_RETRY_COUNT 10