view usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c @ 13779:bf40125f4b37

3063 many instances of unlike enum comparison Reviewed by: Robert Mustacchi <rm@joyent.com> Approved by: Garrett D'Amore <garrett@damore.org>
author Richard Lowe <richlowe@richlowe.net>
date Sun, 08 Jul 2012 03:19:56 +0100
parents b65a8427f8fe
children
line wrap: on
line source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/ib/mgt/ibcm/ibcm_impl.h>
#include <sys/ib/ibtl/ibti.h>
#include <sys/ib/mgt/ibcm/ibcm_arp.h>

/*
 * ibcm_ti.c
 *	These routines implement the Communication Manager's interfaces to IBTL.
 */

/* CM rc recycle task args structure definition */
typedef struct ibcm_taskq_recycle_arg_s {
	ibt_channel_hdl_t	rc_chan;
	ibt_cep_flags_t		control;
	uint8_t			hca_port_num;
	ibt_recycle_handler_t	func;
	void			*arg;
} ibcm_taskq_recycle_arg_t;

_NOTE(READ_ONLY_DATA(ibcm_taskq_recycle_arg_s))

static ibt_status_t	ibcm_init_reply_addr(ibcm_hca_info_t *hcap,
    ibcm_mad_addr_t *reply_addr, ibt_chan_open_args_t *chan_args,
    ibt_chan_open_flags_t flags, ib_time_t *cm_pkt_lt, ib_lid_t prim_slid);
static void		ibcm_process_abort_via_taskq(void *args);
static ibt_status_t	ibcm_process_rc_recycle_ret(void *recycle_arg);
static ibt_status_t	ibcm_process_join_mcg(void *taskq_arg);
static void		ibcm_process_async_join_mcg(void *tq_arg);

ibt_status_t ibcm_get_node_rec(ibmf_saa_handle_t, sa_node_record_t *,
    uint64_t c_mask, void *, size_t *);

static ibt_status_t ibcm_close_rc_channel(ibt_channel_hdl_t channel,
    ibcm_state_data_t *statep, ibt_execution_mode_t mode);

/* Address Record management definitions */
#define	IBCM_DAPL_ATS_NAME	"DAPL Address Translation Service"
#define	IBCM_DAPL_ATS_SID	0x10000CE100415453ULL
#define	IBCM_DAPL_ATS_NBYTES	16
ibcm_svc_info_t *ibcm_ar_svcinfop;
ibcm_ar_t	*ibcm_ar_list;

/*
 * Tunable parameter to turnoff the overriding of pi_path_mtu value.
 *	1 	By default override the path record's pi_path_mtu value to
 *		IB_MTU_1K for all RC channels. This is done only for the
 *		channels established on Tavor HCA and the path's pi_path_mtu
 *		is greater than IB_MTU_1K.
 *	0	Do not override, use pi_path_mtu by default.
 */
int	ibcm_override_path_mtu = 1;

#ifdef DEBUG
static void	ibcm_print_reply_addr(ibt_channel_hdl_t channel,
		    ibcm_mad_addr_t *cm_reply_addr);
#endif

_NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_hdl}))

/* access is controlled between ibcm_sm.c and ibcm_ti.c by CVs */
_NOTE(SCHEME_PROTECTS_DATA("Serialized access by CV", {ibt_rc_returns_t
    ibt_ud_returns_t ibt_ap_returns_t ibt_ar_t}))

/*
 * Typically, clients initialize these args in one api call, and use in
 * another api
 */
_NOTE(SCHEME_PROTECTS_DATA("Expected usage of ibtl api by client",
    {ibt_path_info_s ibt_cep_path_s ibt_adds_vect_s ibt_mcg_info_s ib_gid_s
    ibt_ud_dest_attr_s ibt_ud_dest_s ibt_srv_data_s ibt_redirect_info_s}))

/*
 * ibt_open_rc_channel()
 *	ibt_open_rc_channel opens a communication channel on the specified
 *	channel to the specified service. For connection service type qp's
 *	the CM initiates the CEP to establish the connection and transitions
 *	the QP/EEC to the "Ready to send" State modifying the QP/EEC's
 *	attributes as necessary.
 *	The implementation of this function assumes that alt path is different
 *	from primary path. It is assumed that the Path functions ensure that.
 *
 * RETURN VALUES:
 *	IBT_SUCCESS	on success (or respective failure on error)
 */
ibt_status_t
ibt_open_rc_channel(ibt_channel_hdl_t channel, ibt_chan_open_flags_t flags,
    ibt_execution_mode_t mode, ibt_chan_open_args_t *chan_args,
    ibt_rc_returns_t *ret_args)
{
	/* all fields that are related to REQ MAD formation */

	ib_pkey_t		prim_pkey;
	ib_lid_t		primary_slid, alternate_slid;
	ib_qpn_t		local_qpn = 0;
	ib_guid_t		hca_guid;
	ib_qkey_t		local_qkey = 0;
	ib_eecn_t		local_eecn = 0;
	ib_eecn_t		remote_eecn = 0;
	boolean_t		primary_grh;
	boolean_t		alternate_grh = B_FALSE;
	ib_lid_t		base_lid;
	ib_com_id_t		local_comid;
	ibmf_msg_t		*ibmf_msg, *ibmf_msg_dreq;
	ibcm_req_msg_t		*req_msgp;

	uint8_t			rdma_in, rdma_out;
	uint8_t			cm_retries;
	uint64_t		local_cm_proc_time;	/* In usec */
	uint8_t			local_cm_resp_time;	/* IB time */
	uint64_t		remote_cm_resp_time;	/* In usec */
	uint32_t		starting_psn = 0;

	/* CM path related fields */
	ibmf_handle_t		ibmf_hdl;
	ibcm_qp_list_t		*cm_qp_entry;
	ibcm_mad_addr_t		cm_reply_addr;

	uint8_t			cm_pkt_lt;

	/* Local args for ibtl/internal CM functions called within */
	ibt_status_t		status;
	ibcm_status_t		lkup_status;
	ibt_qp_query_attr_t	qp_query_attr;

	/* Other misc local args */
	ibt_priv_data_len_t	len;
	ibcm_hca_info_t		*hcap;
	ibcm_state_data_t	*statep;
	uint8_t			port_no;

	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel(chan %p, %X, %x, %p, %p)",
	    channel, flags, mode, chan_args, ret_args);

	if (IBCM_INVALID_CHANNEL(channel)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: invalid channel");
		return (IBT_CHAN_HDL_INVALID);
	}

	/* cm handler should always be specified */
	if (chan_args->oc_cm_handler == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "CM handler is not be specified", channel);
		return (IBT_INVALID_PARAM);
	}

	if (mode == IBT_NONBLOCKING) {
		if (ret_args != NULL) {
			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
			    " ret_args should be NULL when called in "
			    "non-blocking mode", channel);
			return (IBT_INVALID_PARAM);
		}
	} else if (mode == IBT_BLOCKING) {
		if (ret_args == NULL) {
			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
			    " ret_args should be Non-NULL when called in "
			    "blocking mode", channel);
			return (IBT_INVALID_PARAM);
		}
		if (ret_args->rc_priv_data_len > IBT_REP_PRIV_DATA_SZ) {
			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
			    " private data length is too large", channel);
			return (IBT_INVALID_PARAM);
		}
		if ((ret_args->rc_priv_data_len > 0) &&
		    (ret_args->rc_priv_data == NULL)) {
			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
			    " rc_priv_data_len > 0, but rc_priv_data NULL",
			    channel);
			return (IBT_INVALID_PARAM);
		}
	} else { /* any other mode is not valid for ibt_open_rc_channel */
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "invalid mode %x specified", channel, mode);
		return (IBT_INVALID_PARAM);
	}

	/*
	 * XXX: no support yet for ibt_chan_open_flags_t - IBT_OCHAN_DUP
	 */
	if (flags & IBT_OCHAN_DUP) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Unsupported Flags specified: 0x%X", channel, flags);
		return (IBT_INVALID_PARAM);
	}

	if ((flags & IBT_OCHAN_REDIRECTED) &&
	    (flags & IBT_OCHAN_PORT_REDIRECTED)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Illegal to specify IBT_OCHAN_REDIRECTED and "
		    "IBT_OCHAN_PORT_REDIRECTED flags together", channel);
		return (IBT_INVALID_PARAM);
	}

	if (((flags & IBT_OCHAN_REDIRECTED) &&
	    (chan_args->oc_cm_redirect_info == NULL)) ||
	    ((flags & IBT_OCHAN_PORT_REDIRECTED) &&
	    (chan_args->oc_cm_cep_path == NULL))) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Redirect flag specified, but respective arg is NULL",
		    channel);
		return (IBT_INVALID_PARAM);
	}

	if ((flags & IBT_OCHAN_REDIRECTED) &&
	    (chan_args->oc_cm_redirect_info->rdi_dlid == 0) &&
	    (chan_args->oc_cm_redirect_info->rdi_gid.gid_guid == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Either rdi_dlid or rdi_gid must be specified for"
		    " IBT_OCHAN_REDIRECTED", channel);
		return (IBT_INVALID_PARAM);
	}

	/* primary dlid and hca_port_num should never be zero */
	port_no = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;

	if ((IBCM_PRIM_ADDS_VECT(chan_args).av_dlid == 0) && (port_no == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Primary Path's information is not valid", channel);
		return (IBT_INVALID_PARAM);
	}

	/* validate SID */
	if (chan_args->oc_path->pi_sid == 0) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "ERROR: Service ID in path information is 0", channel);
		return (IBT_INVALID_PARAM);
	}
	IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p  SID %llX",
	    channel, chan_args->oc_path->pi_sid);

	/* validate rnr_retry_cnt (enum has more than 3 bits) */
	if ((uint_t)chan_args->oc_path_rnr_retry_cnt > IBT_RNR_INFINITE_RETRY) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "ERROR: oc_path_rnr_retry_cnt(%d) is out of range",
		    channel, chan_args->oc_path_rnr_retry_cnt);
		return (IBT_INVALID_PARAM);
	}

	/*
	 * Ensure that client is not re-using a QP that is still associated
	 * with a statep
	 */
	IBCM_GET_CHAN_PRIVATE(channel, statep);
	if (statep != NULL) {
		IBCM_RELEASE_CHAN_PRIVATE(channel);
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Channel being re-used on active side", channel);
		return (IBT_CHAN_IN_USE);
	}

	/* Get GUID from Channel */
	hca_guid = ibt_channel_to_hca_guid(channel);

	/* validate QP's hca guid with that from primary path  */
	if (hca_guid != chan_args->oc_path->pi_hca_guid) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "GUID from Channel and primary path don't match", channel);
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Channel GUID %llX primary path GUID %llX", channel,
		    hca_guid, chan_args->oc_path->pi_hca_guid);
		return (IBT_CHAN_HDL_INVALID);
	}

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "Local HCA GUID %llX", channel, hca_guid);

	status = ibt_query_qp(channel, &qp_query_attr);
	if (status != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "ibt_query_qp failed %d", channel, status);
		return (status);
	}

	/* If client specified "no port change on QP" */
	if ((qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
	    port_no) && (flags & IBT_OCHAN_PORT_FIXED)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "chan port %d and path port %d does not match", channel,
		    qp_query_attr.qp_info.qp_transport.rc.rc_path. \
		    cep_hca_port_num, port_no);
		return (IBT_INVALID_PARAM);
	}

	if (qp_query_attr.qp_info.qp_trans != IBT_RC_SRV) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Invalid Channel type: Applicable only to RC Channel",
		    channel);
		return (IBT_CHAN_SRV_TYPE_INVALID);
	}

	/* Check if QP is in INIT state or not */
	if (qp_query_attr.qp_info.qp_state != IBT_STATE_INIT) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "QP is not in INIT state %x", channel,
		    qp_query_attr.qp_info.qp_state);
		return (IBT_CHAN_STATE_INVALID);
	}

	local_qpn = qp_query_attr.qp_qpn;

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p Active QPN 0x%x",
	    channel, local_qpn);

#ifdef	NO_EEC_SUPPORT_YET

	if (flags & IBT_OCHAN_RDC_EXISTS) {
		ibt_eec_query_attr_t	eec_query_attr;

		local_qkey = qp_query_attr.qp_info.qp_transport.rd_qkey;

		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: RD");

		status = ibt_query_eec(channel, &eec_query_attr);
		if (status != IBT_SUCCESS) {
			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
			    " ibt_query_eec failed %d", channel, status);
			return (status);
		}
		local_eecn = eec_query_attr.eec_eecn;
	}

#endif
	if (chan_args->oc_path->pi_prim_pkt_lt > ibcm_max_ib_pkt_lt) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Huge PktLifeTime %d, Max is %d", channel,
		    chan_args->oc_path->pi_prim_pkt_lt, ibcm_max_ib_pkt_lt);
		return (IBT_PATH_PKT_LT_TOO_HIGH);
	}

	/* If no HCA found return failure */
	if ((hcap = ibcm_find_hca_entry(hca_guid)) == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "hcap is NULL. Probably hca is not in active state",
		    channel);
		return (IBT_CHAN_HDL_INVALID);
	}

	rdma_out = chan_args->oc_rdma_ra_out;
	rdma_in = chan_args->oc_rdma_ra_in;

	if ((rdma_in > hcap->hca_max_rdma_in_qp) ||
	    (rdma_out > hcap->hca_max_rdma_out_qp)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "rdma in %d/out %d values exceed hca limits(%d/%d)",
		    channel, rdma_in, rdma_out, hcap->hca_max_rdma_in_qp,
		    hcap->hca_max_rdma_out_qp);
		ibcm_dec_hca_acc_cnt(hcap);
		return (IBT_INVALID_PARAM);
	}

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "rdma_in %d rdma_out %d", channel, rdma_in, rdma_out);

	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no,
	    NULL, &base_lid);
	if (status != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "primary port_num %d not active", channel, port_no);
		ibcm_dec_hca_acc_cnt(hcap);
		return (status);
	}

	/* Validate P_KEY Index */
	status = ibt_index2pkey_byguid(hcap->hca_guid, port_no,
	    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix, &prim_pkey);
	if (status != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "Invalid Primary PKeyIx %x", channel,
		    IBCM_PRIM_CEP_PATH(chan_args).cep_pkey_ix);
		ibcm_dec_hca_acc_cnt(hcap);
		return (status);
	}

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "primary_port_num %d primary_pkey 0x%x", channel, port_no,
	    prim_pkey);

	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
	    != IBT_SUCCESS)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "ibmf reg or callback setup failed during re-initialize",
		    channel);
		ibcm_dec_hca_acc_cnt(hcap);
		return (status);
	}

	ibmf_hdl = hcap->hca_port_info[port_no - 1].port_ibmf_hdl;
	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "primary ibmf_hdl = 0x%p", channel, ibmf_hdl);

	primary_slid = base_lid + IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: channel 0x%p "
	    "primary SLID = %x", channel, primary_slid);

	/* check first if alternate path exists or not as it is OPTIONAL */
	if (IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num != 0) {
		uint8_t	alt_port_no;

		alt_port_no = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;

		if (chan_args->oc_path->pi_alt_pkt_lt > ibcm_max_ib_pkt_lt) {
			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
			    "Huge Alt Pkt lt %d", channel,
			    chan_args->oc_path->pi_alt_pkt_lt);
			ibcm_dec_hca_acc_cnt(hcap);
			return (IBT_PATH_PKT_LT_TOO_HIGH);
		}

		if (port_no != alt_port_no) {

			status = ibt_get_port_state_byguid(hcap->hca_guid,
			    alt_port_no, NULL, &base_lid);
			if (status != IBT_SUCCESS) {

				IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
				    "chan 0x%p alt_port_num %d inactive %d",
				    channel, alt_port_no, status);
				ibcm_dec_hca_acc_cnt(hcap);
				return (status);
			}

		}
		alternate_slid =
		    base_lid + IBCM_ALT_ADDS_VECT(chan_args).av_src_path;

		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "alternate SLID = %x", channel, alternate_slid);
	}

	/*
	 * only pkey needs to be zero'ed, because all other fields are set in
	 * in ibcm_init_reply_addr. But, let's bzero the complete struct for
	 * any future modifications.
	 */
	bzero(&cm_reply_addr, sizeof (cm_reply_addr));

	/* Initialize the MAD destination address in stored_reply_addr */
	if ((status = ibcm_init_reply_addr(hcap, &cm_reply_addr, chan_args,
	    flags, &cm_pkt_lt, primary_slid)) != IBT_SUCCESS) {

		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "ibcm_init_reply_addr failed status %d ", channel, status);
		ibcm_dec_hca_acc_cnt(hcap);
		return (status);
	}


	/* Initialize the pkey for CM MAD communication */
	if (cm_reply_addr.rcvd_addr.ia_p_key == 0)
		cm_reply_addr.rcvd_addr.ia_p_key = prim_pkey;

#ifdef DEBUG
	ibcm_print_reply_addr(channel, &cm_reply_addr);
#endif

	/* Retrieve an ibmf qp for sending CM MADs */
	if ((cm_qp_entry = ibcm_find_qp(hcap, port_no,
	    cm_reply_addr.rcvd_addr.ia_p_key)) == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "unable to allocate ibmf qp for CM MADs", channel);
		ibcm_dec_hca_acc_cnt(hcap);
		return (IBT_INSUFF_RESOURCE);
	}


	if (ibcm_alloc_comid(hcap, &local_comid) != IBCM_SUCCESS) {
		ibcm_release_qp(cm_qp_entry);
		ibcm_dec_hca_acc_cnt(hcap);
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan 0x%p"
		    " Unable to allocate comid", channel);
		return (IBT_INSUFF_KERNEL_RESOURCE);
	}

	/* allocate an IBMF mad buffer (REQ) */
	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg,
	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
		ibcm_release_qp(cm_qp_entry);
		ibcm_free_comid(hcap, local_comid);
		ibcm_dec_hca_acc_cnt(hcap);
		return (status);
	}

	/* allocate an IBMF mad buffer (DREQ) */
	if ((status = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg_dreq,
	    MAD_METHOD_SEND)) != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
		    "chan 0x%p ibcm_alloc_out_msg failed", channel);
		(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
		ibcm_release_qp(cm_qp_entry);
		ibcm_free_comid(hcap, local_comid);
		ibcm_dec_hca_acc_cnt(hcap);
		return (status);
	}

	/* Init to Init, if QP's port does not match with path information */
	if (qp_query_attr.qp_info.qp_transport.rc.rc_path.cep_hca_port_num !=
	    IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num) {

		ibt_qp_info_t		qp_info;
		ibt_cep_modify_flags_t	cep_flags;

		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
		    "chan 0x%p chan port %d", channel,
		    qp_query_attr.qp_info.qp_transport.rc.rc_path.\
		    cep_hca_port_num);

		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
		    "chan 0x%p path port %d", channel, port_no);

		bzero(&qp_info, sizeof (qp_info));
		/* For now, set it to RC type */

		qp_info.qp_trans = IBT_RC_SRV;
		qp_info.qp_state = IBT_STATE_INIT;
		qp_info.qp_transport.rc.rc_path.cep_hca_port_num = port_no;

		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;

		status = ibt_modify_qp(channel, cep_flags, &qp_info, NULL);

		if (status != IBT_SUCCESS) {
			IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: "
			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
			ibcm_release_qp(cm_qp_entry);
			ibcm_free_comid(hcap, local_comid);
			ibcm_dec_hca_acc_cnt(hcap);
			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg);
			(void) ibcm_free_out_msg(ibmf_hdl, &ibmf_msg_dreq);
			return (status);
		} else
			IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: "
			    "chan 0x%p ibt_modify_qp() = %d", channel, status);
	}

	/* allocate ibcm_state_data_t before grabbing the WRITER lock */
	statep = kmem_zalloc(sizeof (ibcm_state_data_t), KM_SLEEP);
	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
	lkup_status = ibcm_lookup_msg(IBCM_OUTGOING_REQ, local_comid, 0, 0,
	    hcap, &statep);
	rw_exit(&hcap->hca_state_rwlock);

	/* CM should be seeing this for the first time */
	ASSERT(lkup_status == IBCM_LOOKUP_NEW);

	/* Increment the hca's resource count */
	ibcm_inc_hca_res_cnt(hcap);

	/* Once a resource created on hca, no need to hold the acc cnt */
	ibcm_dec_hca_acc_cnt(hcap);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))

	statep->timerid = 0;
	statep->local_hca_guid = hca_guid;
	statep->local_qpn = local_qpn;
	statep->stored_reply_addr.cm_qp_entry = cm_qp_entry;
	statep->prim_port = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;
	statep->alt_port = IBCM_ALT_CEP_PATH(chan_args).cep_hca_port_num;


	/* Save "statep" as channel's CM private data.  */
	statep->channel = channel;
	IBCM_SET_CHAN_PRIVATE(statep->channel, statep);

	statep->stored_msg = ibmf_msg;
	statep->dreq_msg = ibmf_msg_dreq;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*req_msgp))

	/* Start filling in the REQ MAD */
	req_msgp = (ibcm_req_msg_t *)IBCM_OUT_MSGP(statep->stored_msg);
	req_msgp->req_local_comm_id = h2b32(local_comid);
	req_msgp->req_svc_id = h2b64(chan_args->oc_path->pi_sid);
	req_msgp->req_local_ca_guid = h2b64(hca_guid);
	req_msgp->req_local_qkey = h2b32(local_qkey);	/* for EEC/RD */

	/* Bytes 32-35 are req_local_qpn and req_off_resp_resources */
	req_msgp->req_local_qpn_plus = h2b32(local_qpn << 8 | rdma_in);

	/* Bytes 36-39 are req_local_eec_no and req_off_initiator_depth */
	req_msgp->req_local_eec_no_plus = h2b32(local_eecn << 8 | rdma_out);

	if (flags & IBT_OCHAN_REMOTE_CM_TM)
		remote_cm_resp_time = chan_args->oc_remote_cm_time;
	else
		remote_cm_resp_time = ibcm_remote_response_time;

	/*
	 * Bytes 40-43 - remote_eecn, remote_cm_resp_time, tran_type,
	 * IBT_CM_FLOW_CONTROL is always set by default.
	 */
	req_msgp->req_remote_eecn_plus = h2b32(
	    remote_eecn << 8 | (ibt_usec2ib(remote_cm_resp_time) & 0x1f) << 3 |
	    IBT_RC_SRV << 1 | IBT_CM_FLOW_CONTROL);

	if (flags & IBT_OCHAN_LOCAL_CM_TM)
		local_cm_proc_time = chan_args->oc_local_cm_time;
	else
		local_cm_proc_time = ibcm_local_processing_time;

	local_cm_resp_time = ibt_usec2ib(local_cm_proc_time +
	    2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt) +
	    ibcm_sw_delay);

	/* save retry count */
	statep->cep_retry_cnt = chan_args->oc_path_retry_cnt;

	if (flags & IBT_OCHAN_STARTING_PSN)
		starting_psn = chan_args->oc_starting_psn;

	if (local_cm_resp_time > 0x1f)
		local_cm_resp_time = 0x1f;

	/* Bytes 44-47 are req_starting_psn, local_cm_resp_time and retry_cnt */
	req_msgp->req_starting_psn_plus = h2b32(starting_psn << 8 |
	    local_cm_resp_time << 3 | statep->cep_retry_cnt);

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "Prim Pkt lt (IB time) 0x%x", channel,
	    chan_args->oc_path->pi_prim_pkt_lt);

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "local_cm_proc_time(usec) %d ", channel, local_cm_proc_time);

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "local_cm_resp_time(ib_time) %d", channel, local_cm_resp_time);

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "remote_cm_resp_time (usec) %d", channel, remote_cm_resp_time);

	statep->starting_psn = starting_psn;

	/* Pkey - bytes 48-49 */
	req_msgp->req_part_key = h2b16(prim_pkey);

	if (flags & IBT_OCHAN_CM_RETRY)
		cm_retries = chan_args->oc_cm_retry_cnt;
	else
		cm_retries = ibcm_max_retries;

	statep->max_cm_retries = statep->remaining_retry_cnt = cm_retries;
	req_msgp->req_max_cm_retries_plus = statep->max_cm_retries << 4;

	/*
	 * Check whether SRQ is associated with this Channel, if yes, then
	 * set the SRQ Exists bit in the REQ.
	 */
	if (qp_query_attr.qp_srq != NULL) {
		req_msgp->req_max_cm_retries_plus |= (1 << 3);
	}

	/*
	 * By default on Tavor, we override the PathMTU to 1K.
	 * To turn this off, set ibcm_override_path_mtu = 0.
	 */
	if (ibcm_override_path_mtu && IBCM_IS_HCA_TAVOR(hcap) &&
	    (chan_args->oc_path->pi_path_mtu > IB_MTU_1K)) {
		req_msgp->req_mtu_plus = IB_MTU_1K << 4 |
		    chan_args->oc_path_rnr_retry_cnt;
		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p PathMTU"
		    " overridden to IB_MTU_1K(%d) from %d", channel, IB_MTU_1K,
		    chan_args->oc_path->pi_path_mtu);
	} else
		req_msgp->req_mtu_plus = chan_args->oc_path->pi_path_mtu << 4 |
		    chan_args->oc_path_rnr_retry_cnt;

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p CM retry cnt %d"
	    " staring PSN %x", channel, cm_retries, starting_psn);


#ifdef	NO_EEC_SUPPORT_YET
	if (flags & IBT_OCHAN_RDC_EXISTS)
		req_msgp->req_mtu_plus |= 8;
#endif

	/* Initialize the "primary" port stuff next - bytes 52-95 */
	req_msgp->req_primary_l_port_lid = h2b16(primary_slid);
	req_msgp->req_primary_r_port_lid =
	    h2b16(IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);
	req_msgp->req_primary_l_port_gid.gid_prefix =
	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_prefix);
	req_msgp->req_primary_l_port_gid.gid_guid =
	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_sgid.gid_guid);
	req_msgp->req_primary_r_port_gid.gid_prefix =
	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix);
	req_msgp->req_primary_r_port_gid.gid_guid =
	    h2b64(IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);
	primary_grh = IBCM_PRIM_ADDS_VECT(chan_args).av_send_grh;

	statep->remote_hca_guid = /* not correct, but helpful for debugging */
	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid;

	/* Bytes 88-91 - primary_flowlbl, and primary_srate */
	req_msgp->req_primary_flow_label_plus =
	    h2b32(((primary_grh == B_TRUE) ?
	    (IBCM_PRIM_ADDS_VECT(chan_args).av_flow << 12) : 0) |
	    IBCM_PRIM_ADDS_VECT(chan_args).av_srate);
	req_msgp->req_primary_traffic_class = (primary_grh == B_TRUE) ?
	    IBCM_PRIM_ADDS_VECT(chan_args).av_tclass : 0;
	req_msgp->req_primary_hop_limit = (primary_grh == B_TRUE) ?
	    IBCM_PRIM_ADDS_VECT(chan_args).av_hop : 1;
	req_msgp->req_primary_sl_plus =
	    IBCM_PRIM_ADDS_VECT(chan_args).av_srvl << 4 |
	    ((primary_grh == B_TRUE) ? 0 : 8);

	req_msgp->req_primary_localtime_plus =
	    ibt_usec2ib((2 * ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt)) +
	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;

	IBTF_DPRINTF_L2(cmlog, "ibt_open_rc_channel: chan %p statep %p",
	    channel, statep);
	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "active hca_ack_delay (usec) %d", channel,
	    req_msgp->req_primary_localtime_plus);

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "Sent primary cep timeout (IB Time) %d", channel,
	    hcap->hca_ack_delay);

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p prim_dlid %x ",
	    channel, IBCM_PRIM_ADDS_VECT(chan_args).av_dlid);

	IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
	    "prim GID %llX:%llX", channel,
	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_prefix,
	    IBCM_PRIM_ADDS_VECT(chan_args).av_dgid.gid_guid);

	/* Initialize the "alternate" port stuff - optional */
	if (chan_args->oc_path->pi_alt_cep_path.cep_hca_port_num != 0) {
		ib_gid_t	tmp_gid;

		req_msgp->req_alt_l_port_lid = h2b16(alternate_slid);
		req_msgp->req_alt_r_port_lid =
		    h2b16(IBCM_ALT_ADDS_VECT(chan_args).av_dlid);
		/*
		 * doing all this as req_alt_r/l_port_gid is at offset
		 * 100, 116 which is not divisible by 8
		 */

		tmp_gid.gid_prefix =
		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix);
		tmp_gid.gid_guid =
		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
		bcopy(&tmp_gid, &req_msgp->req_alt_r_port_gid[0],
		    sizeof (ib_gid_t));
		tmp_gid.gid_prefix =
		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_prefix);
		tmp_gid.gid_guid =
		    h2b64(IBCM_ALT_ADDS_VECT(chan_args).av_sgid.gid_guid);

		bcopy(&tmp_gid, &req_msgp->req_alt_l_port_gid[0],
		    sizeof (ib_gid_t));
		alternate_grh = IBCM_ALT_ADDS_VECT(chan_args).av_send_grh;

		/* Bytes 132-135 - alternate_flow_label, and alternate srate */
		req_msgp->req_alt_flow_label_plus = h2b32(
		    (((alternate_grh == B_TRUE) ?
		    (IBCM_ALT_ADDS_VECT(chan_args).av_flow << 12) : 0) |
		    IBCM_ALT_ADDS_VECT(chan_args).av_srate));
		req_msgp->req_alt_traffic_class = (alternate_grh == B_TRUE) ?
		    IBCM_ALT_ADDS_VECT(chan_args).av_tclass : 0;
		req_msgp->req_alt_hop_limit = (alternate_grh == B_TRUE) ?
		    IBCM_ALT_ADDS_VECT(chan_args).av_hop : 1;
		req_msgp->req_alt_sl_plus =
		    IBCM_ALT_ADDS_VECT(chan_args).av_srvl << 4 |
		    ((alternate_grh == B_TRUE) ? 0 : 8);
		req_msgp->req_alt_localtime_plus = ibt_usec2ib((2 *
		    ibt_ib2usec(chan_args->oc_path->pi_alt_pkt_lt)) +
		    ibt_ib2usec(hcap->hca_ack_delay)) << 3;

		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "alt_dlid %x ", channel,
		    IBCM_ALT_ADDS_VECT(chan_args).av_dlid);

		IBTF_DPRINTF_L5(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "alt GID %llX:%llX", channel,
		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_prefix,
		    IBCM_ALT_ADDS_VECT(chan_args).av_dgid.gid_guid);
	}

	len = min(chan_args->oc_priv_data_len, IBT_REQ_PRIV_DATA_SZ);
	if ((len > 0) && chan_args->oc_priv_data)
		bcopy(chan_args->oc_priv_data, req_msgp->req_private_data, len);

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*req_msgp))

	/* return_data is filled up in the state machine code */
	if (ret_args != NULL) {
		statep->open_return_data = ret_args;
	}

	/* initialize some statep fields here */
	statep->mode = IBCM_ACTIVE_MODE;
	statep->hcap = hcap;

	statep->cm_handler = chan_args->oc_cm_handler;
	statep->state_cm_private = chan_args->oc_cm_clnt_private;

	statep->pkt_life_time =
	    ibt_ib2usec(chan_args->oc_path->pi_prim_pkt_lt);

	statep->timer_value = ibt_ib2usec(ibt_usec2ib(
	    2 * ibt_ib2usec(cm_pkt_lt) + remote_cm_resp_time));

	/* Initialize statep->stored_reply_addr */
	statep->stored_reply_addr.ibmf_hdl = ibmf_hdl;

	/* Initialize stored reply addr fields */
	statep->stored_reply_addr.grh_hdr = cm_reply_addr.grh_hdr;
	statep->stored_reply_addr.rcvd_addr = cm_reply_addr.rcvd_addr;
	statep->stored_reply_addr.grh_exists = cm_reply_addr.grh_exists;
	statep->stored_reply_addr.port_num = cm_reply_addr.port_num;

	/*
	 * The IPD on local/active side is calculated by path functions,
	 * hence available in the args of ibt_open_rc_channel
	 */
	statep->local_srate = IBCM_PRIM_ADDS_VECT(chan_args).av_srate;
	statep->local_alt_srate = IBCM_ALT_ADDS_VECT(chan_args).av_srate;

	/* Store the source path bits for primary and alt paths */
	statep->prim_src_path_bits = IBCM_PRIM_ADDS_VECT(chan_args).av_src_path;
	statep->alt_src_path_bits = IBCM_ALT_ADDS_VECT(chan_args).av_src_path;

	statep->open_flow = 1;
	statep->open_done = B_FALSE;
	statep->state = statep->timer_stored_state = IBCM_STATE_REQ_SENT;
	IBCM_REF_CNT_INCR(statep);	/* Decremented before return */
	IBCM_REF_CNT_INCR(statep);	/* Decremented after REQ is posted */
	statep->send_mad_flags |= IBCM_REQ_POST_BUSY;

	/*
	 * Skip moving channel to error state during close, for OFUV clients.
	 * OFUV clients transition the channel to error state by itself.
	 */
	if (flags & IBT_OCHAN_OFUV)
		statep->is_this_ofuv_chan = B_TRUE;

	IBCM_OUT_HDRP(statep->stored_msg)->AttributeID =
	    h2b16(IBCM_INCOMING_REQ + IBCM_ATTR_BASE_ID);

	IBCM_OUT_HDRP(statep->stored_msg)->TransactionID =
	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_REQ, statep->local_comid,
	    0));

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))

	ibtl_cm_chan_is_opening(channel);

	ibcm_open_enqueue(statep);

	mutex_enter(&statep->state_mutex);

	if (mode == IBT_BLOCKING) {

		/* wait for REQ/REP/RTU */
		while (statep->open_done != B_TRUE) {
			cv_wait(&statep->block_client_cv, &statep->state_mutex);
		}

		/*
		 * In the case that open_channel() fails because of a
		 * REJ or timeout, change retval to IBT_CM_FAILURE
		 */
		if (statep->open_return_data->rc_status != IBT_CM_SUCCESS) {
			status = IBT_CM_FAILURE;
			ibtl_cm_chan_open_is_aborted(channel);
		}

		IBTF_DPRINTF_L3(cmlog, "ibt_open_rc_channel: chan 0x%p "
		    "ret status %d cm status %d", channel, status,
		    statep->open_return_data->rc_status);
	}

	/* decrement the ref-count before leaving here */
	IBCM_REF_CNT_DECR(statep);

	mutex_exit(&statep->state_mutex);

	IBTF_DPRINTF_L4(cmlog, "ibt_open_rc_channel: chan 0x%p done", channel);
	return (status);
}

/*
 * ibcm_init_reply_addr:
 *
 * The brief description of functionality below.
 *
 * For IBT_OCHAN_PORT_REDIRECTED (ie., port redirected case):
 *	Build CM path from chan_args->oc_cm_cep_path
 *	Set CM pkt lt (ie.,life time) to chan_args->oc_cm_pkt_lt
 *
 * For IBT_OCHAN_REDIRECTED (ie., port and CM redirected case):
 *	If Redirect LID is specified,
 *		If Redirect GID is not specified or specified to be on the same
 *		    subnet, then
 *			Build CM path from chan_args->oc_cm_redirect_info
 *			Set CM pkt lt to subnet timeout
 *		Else (ie., GID specified, but on a different subnet)
 *			Do a path lookup to build CM Path and set CM pkt lt
 *
 */
static ibt_status_t
ibcm_init_reply_addr(ibcm_hca_info_t *hcap, ibcm_mad_addr_t *reply_addr,
    ibt_chan_open_args_t *chan_args, ibt_chan_open_flags_t flags,
    ib_time_t *cm_pkt_lt, ib_lid_t prim_slid)
{
	ibt_adds_vect_t	*cm_adds;
	ibt_path_info_t	path;
	boolean_t	cm_grh;
	ibt_status_t	status;

	IBTF_DPRINTF_L5(cmlog, "ibcm_init_reply_addr:");

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*reply_addr))

	/*
	 * sending side CM lid/gid/port num are not based on any redirect
	 * params. These values are set to primary RC path lid/gid/port num.
	 * In the future, these values can be set based on framework policy
	 * decisions ensuring reachability.
	 */
	reply_addr->grh_hdr.ig_sender_gid =
	    IBCM_PRIM_ADDS_VECT(chan_args).av_sgid;
	reply_addr->rcvd_addr.ia_local_lid = prim_slid;
	reply_addr->port_num = IBCM_PRIM_CEP_PATH(chan_args).cep_hca_port_num;

	if (flags & IBT_OCHAN_PORT_REDIRECTED) {
		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
		    "IBT_OCHAN_PORT_REDIRECTED specified");

		status = ibt_index2pkey_byguid(hcap->hca_guid,
		    chan_args->oc_cm_cep_path->cep_hca_port_num,
		    chan_args->oc_cm_cep_path->cep_pkey_ix,
		    &reply_addr->rcvd_addr.ia_p_key);

		if (status != IBT_SUCCESS) {
			IBTF_DPRINTF_L2(cmlog, "ibcm_init_rely_addr: Invalid "
			    "CM PKeyIx %x port_num %x",
			    chan_args->oc_cm_cep_path->cep_pkey_ix,
			    chan_args->oc_cm_cep_path->cep_hca_port_num);
			return (status);
		}

		cm_adds = &(chan_args->oc_cm_cep_path->cep_adds_vect);
		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: dlid = %x",
		    cm_adds->av_dlid);

		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
		reply_addr->rcvd_addr.ia_remote_qno = 1;
		*cm_pkt_lt = chan_args->oc_cm_pkt_lt;

	} else if (flags & IBT_OCHAN_REDIRECTED) {
		ibt_redirect_info_t	*redirect_info;
		ibt_hca_portinfo_t	*port_infop;
		uint_t			psize, nports;

		IBTF_DPRINTF_L4(cmlog, "ibcm_init_rely_addr: "
		    "IBT_OCHAN_REDIRECTED specified");

		redirect_info = chan_args->oc_cm_redirect_info;

		if ((redirect_info->rdi_gid.gid_prefix == 0) ||
		    (redirect_info->rdi_gid.gid_guid == 0)) {
			IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
			    "ERROR: Re-direct GID value NOT Provided.");
			return (IBT_INVALID_PARAM);
		}

		/* As per spec definition 1.1, it's always IB_GSI_QKEY */
		reply_addr->rcvd_addr.ia_q_key = redirect_info->rdi_qkey;
		reply_addr->rcvd_addr.ia_remote_qno = redirect_info->rdi_qpn;
		reply_addr->rcvd_addr.ia_p_key = redirect_info->rdi_pkey;

		/*
		 * if LID is non-zero in classportinfo then use classportinfo
		 * fields to form CM MAD destination address.
		 */
		if (redirect_info->rdi_dlid != 0) {
			status = ibtl_cm_query_hca_ports_byguid(hcap->hca_guid,
			    reply_addr->port_num, &port_infop, &nports, &psize);
			if ((status != IBT_SUCCESS) || (nports == 0)) {
				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
				    "Query Ports Failed: %d", status);
				return (status);
			} else if (port_infop->p_subnet_timeout >
			    ibcm_max_ib_pkt_lt) {
				IBTF_DPRINTF_L2(cmlog, "ibcm_init_reply_addr: "
				    "large subnet timeout %x port_no %x",
				    port_infop->p_subnet_timeout,
				    reply_addr->port_num);
				ibt_free_portinfo(port_infop, psize);
				return (IBT_PATH_PKT_LT_TOO_HIGH);
			} else {
				IBTF_DPRINTF_L3(cmlog, "ibcm_init_reply_addr: "
				    "subnet timeout %x port_no %x",
				    port_infop->p_subnet_timeout,
				    reply_addr->port_num);

				*cm_pkt_lt =
				    ibt_ib2usec(min(ibcm_max_ib_mad_pkt_lt,
				    port_infop->p_subnet_timeout));

				ibt_free_portinfo(port_infop, psize);
			}

			reply_addr->rcvd_addr.ia_remote_lid =
			    redirect_info->rdi_dlid;
			reply_addr->rcvd_addr.ia_service_level =
			    redirect_info->rdi_sl;
			reply_addr->grh_exists = B_TRUE;
			reply_addr->grh_hdr.ig_recver_gid =
			    redirect_info->rdi_gid;
			reply_addr->grh_hdr.ig_tclass =
			    redirect_info->rdi_tclass;
			reply_addr->grh_hdr.ig_flow_label =
			    redirect_info->rdi_flow;

			/* Classportinfo doesn't have hoplimit field */
			reply_addr->grh_hdr.ig_hop_limit = 1;
			return (IBT_SUCCESS);

		} else {
			ibt_path_attr_t	path_attr;
			ib_gid_t	path_dgid[1];

			/*
			 * If GID is specified, and LID is zero in classportinfo
			 * do a path lookup using specified GID, Pkey,
			 * in classportinfo
			 */

			bzero(&path_attr, sizeof (path_attr));

			path_attr.pa_dgids = &path_dgid[0];
			path_attr.pa_dgids[0] = redirect_info->rdi_gid;

			/*
			 * use reply_addr below, as sender_gid in reply_addr
			 * may have been set above based on some policy decision
			 * for originating end point for CM MADs above
			 */
			path_attr.pa_sgid = reply_addr->grh_hdr.ig_sender_gid;
			path_attr.pa_num_dgids = 1;
			path_attr.pa_pkey = redirect_info->rdi_pkey;

			if ((status = ibt_get_paths(ibcm_ibt_handle,
			    IBT_PATH_PKEY, &path_attr, 1, &path, NULL)) !=
			    IBT_SUCCESS)
				return (status);

			/* Initialize cm_adds */
			cm_adds = &path.pi_prim_cep_path.cep_adds_vect;
			*cm_pkt_lt = path.pi_prim_pkt_lt;
		}

	} else	{ /* cm_pkey initialized in ibt_open_rc_channel */
		reply_addr->rcvd_addr.ia_q_key = IB_GSI_QKEY;
		reply_addr->rcvd_addr.ia_remote_qno = 1;
		*cm_pkt_lt = chan_args->oc_path->pi_prim_pkt_lt;
		cm_adds = &(IBCM_PRIM_ADDS_VECT(chan_args));
	}


	cm_grh = cm_adds->av_send_grh;
	reply_addr->grh_exists = cm_grh;

	reply_addr->rcvd_addr.ia_remote_lid =
	    cm_adds->av_dlid;
	reply_addr->grh_hdr.ig_recver_gid =
	    cm_adds->av_dgid;
	reply_addr->grh_hdr.ig_flow_label =
	    cm_adds->av_flow & IB_GRH_FLOW_LABEL_MASK;
	reply_addr->grh_hdr.ig_tclass =
	    (cm_grh == B_TRUE) ? cm_adds->av_tclass : 0;
	reply_addr->grh_hdr.ig_hop_limit =
	    (cm_grh == B_TRUE) ? cm_adds->av_hop : 1;
	reply_addr->rcvd_addr.ia_service_level =
	    cm_adds->av_srvl;

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*reply_addr))

	return (IBT_SUCCESS);
}


/*
 * ibt_prime_close_rc_channel()
 *	It allocates resources required for close channel operation, so
 *	ibt_close_rc_channel can be called from interrupt routine.
 *
 * INPUTS:
 *	channel			The address of an ibt_channel_t struct that
 *				specifies the channel to open.
 *
 * RETURN VALUES:
 *	IBT_SUCCESS	on success(or respective failure on error)
 *
 * Clients are typically expected to call this function in established state
 */
ibt_status_t
ibt_prime_close_rc_channel(ibt_channel_hdl_t channel)
{
	ibcm_state_data_t	*statep;
	ibt_status_t		status = IBT_SUCCESS;

	IBTF_DPRINTF_L3(cmlog, "ibt_prime_close_rc_channel(%p)", channel);

	/* validate channel, first */
	if (IBCM_INVALID_CHANNEL(channel)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
		    "invalid channel", channel);
		return (IBT_CHAN_HDL_INVALID);
	}

	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
		    "Invalid Channel type: Applicable only to RC Channel",
		    channel);
		return (IBT_CHAN_SRV_TYPE_INVALID);
	}

	/* get the statep */
	IBCM_GET_CHAN_PRIVATE(channel, statep);

	/*
	 * This can happen, if the statep is already gone by a DREQ from
	 * the remote side
	 */

	if (statep == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
		    "statep NULL", channel);
		return (IBT_SUCCESS);
	}

	mutex_enter(&statep->state_mutex);
	IBCM_RELEASE_CHAN_PRIVATE(channel);
	if (statep->state != IBCM_STATE_ESTABLISHED) {
		mutex_exit(&statep->state_mutex);
		return (IBT_CHAN_STATE_INVALID);
	}
	IBCM_REF_CNT_INCR(statep);
	IBTF_DPRINTF_L4(cmlog, "ibt_prime_close_rc_channel: chan 0x%p statep %p"
	    " state %x", channel, statep, statep->state);
	mutex_exit(&statep->state_mutex);

	/* clients could pre-allocate dreq mad, even before connection est */
	if (statep->dreq_msg == NULL)
		status = ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl,
		    &statep->dreq_msg, MAD_METHOD_SEND);

	mutex_enter(&statep->state_mutex);
	IBCM_REF_CNT_DECR(statep);
	mutex_exit(&statep->state_mutex);

	if (status != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_prime_close_rc_channel: chan 0x%p "
		    "ibcm_alloc_out_msg failed ", channel);
		return (status);
	}

	/* If this message isn't seen then ibt_prime_close_rc_channel failed */
	IBTF_DPRINTF_L5(cmlog, "ibt_prime_close_rc_channel: chan 0x%p done",
	    channel);

	return (IBT_SUCCESS);
}

/*
 * ibt_close_rc_channel()
 *	It closes an established channel.
 *
 * RETURN VALUES:
 *	IBT_SUCCESS	on success(or respective failure on error)
 */
ibt_status_t
ibt_close_rc_channel(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
    void *priv_data, ibt_priv_data_len_t priv_data_len, uint8_t *ret_status,
    void *ret_priv_data, ibt_priv_data_len_t *ret_priv_data_len_p)
{
	ibcm_state_data_t	*statep;

	IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel(%p, %x, %p, %d, %p)",
	    channel, mode, priv_data, priv_data_len,
	    (ret_priv_data_len_p == NULL) ? 0 : *ret_priv_data_len_p);

	/* validate channel, first */
	if (IBCM_INVALID_CHANNEL(channel)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
		    "invalid channel", channel);
		return (IBT_CHAN_HDL_INVALID);
	}

	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
		    "Invalid Channel type: Applicable only to RC Channel",
		    channel);
		return (IBT_CHAN_SRV_TYPE_INVALID);
	}

	if (mode == IBT_BLOCKING) {
		/* valid only for BLOCKING MODE */
		if ((ret_priv_data_len_p != NULL) &&
		    (*ret_priv_data_len_p > IBT_DREP_PRIV_DATA_SZ)) {
			IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p"
			    " private data len %d is too large", channel,
			    *ret_priv_data_len_p);
			return (IBT_INVALID_PARAM);
		}
	} else if ((mode != IBT_NONBLOCKING) && (mode != IBT_NOCALLBACKS)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
		    "invalid mode %x specified", channel, mode);
		return (IBT_INVALID_PARAM);
	}

	if (ibtl_cm_is_chan_closing(channel) ||
	    ibtl_cm_is_chan_closed(channel)) {
		if (ret_status)
			*ret_status = IBT_CM_CLOSED_ALREADY;

		/* No private data to return to the client */
		if (ret_priv_data_len_p != NULL)
			*ret_priv_data_len_p = 0;

		if ((mode == IBT_BLOCKING) ||
		    (mode == IBT_NOCALLBACKS)) {
			IBCM_GET_CHAN_PRIVATE(channel, statep);
			if (statep == NULL)
				return (IBT_SUCCESS);
			mutex_enter(&statep->state_mutex);
			IBCM_RELEASE_CHAN_PRIVATE(channel);
			IBCM_REF_CNT_INCR(statep);
			while (statep->close_done != B_TRUE)
				cv_wait(&statep->block_client_cv,
				    &statep->state_mutex);
			IBCM_REF_CNT_DECR(statep);
			mutex_exit(&statep->state_mutex);
		}

		IBTF_DPRINTF_L3(cmlog, "ibt_close_rc_channel: chan 0x%p "
		    "already marked for closing", channel);

		return (IBT_SUCCESS);
	}

	/* get the statep */
	IBCM_GET_CHAN_PRIVATE(channel, statep);
	if (statep == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
		    "statep NULL", channel);
		return (IBT_CHAN_STATE_INVALID);
	}

	mutex_enter(&statep->state_mutex);

	if (statep->dreq_msg == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_close_rc_channel: chan 0x%p "
		    "Fatal Error: dreq_msg is NULL", channel);
		IBCM_RELEASE_CHAN_PRIVATE(channel);
		mutex_exit(&statep->state_mutex);
		return (IBT_CHAN_STATE_INVALID);
	}

	if ((ret_priv_data == NULL) || (ret_priv_data_len_p == NULL)) {
		statep->close_ret_priv_data = NULL;
		statep->close_ret_priv_data_len = NULL;
	} else {
		statep->close_ret_priv_data = ret_priv_data;
		statep->close_ret_priv_data_len = ret_priv_data_len_p;
	}

	priv_data_len = min(priv_data_len, IBT_DREQ_PRIV_DATA_SZ);
	if ((priv_data != NULL) && (priv_data_len > 0)) {
		bcopy(priv_data, ((ibcm_dreq_msg_t *)
		    IBCM_OUT_MSGP(statep->dreq_msg))->dreq_private_data,
		    priv_data_len);
	}
	statep->close_ret_status = ret_status;

	IBCM_RELEASE_CHAN_PRIVATE(channel);
	IBCM_REF_CNT_INCR(statep);

	if (mode != IBT_NONBLOCKING) {
		return (ibcm_close_rc_channel(channel, statep, mode));
	}

	/* IBT_NONBLOCKING */
	ibcm_close_enqueue(statep);
	mutex_exit(&statep->state_mutex);

	return (IBT_SUCCESS);
}

void
ibcm_close_start(ibcm_state_data_t *statep)
{
	mutex_enter(&statep->state_mutex);
	(void) ibcm_close_rc_channel(statep->channel, statep, IBT_NONBLOCKING);
}

static
ibt_status_t
ibcm_close_rc_channel(ibt_channel_hdl_t channel, ibcm_state_data_t *statep,
    ibt_execution_mode_t mode)
{
	ibcm_hca_info_t		*hcap;

	_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&statep->state_mutex));
	ASSERT(MUTEX_HELD(&statep->state_mutex));

	IBTF_DPRINTF_L3(cmlog, "ibcm_close_rc_channel: chan 0x%p statep %p",
	    channel, statep);

	hcap = statep->hcap;

	/* HCA must have been in active state. If not, it's a client bug */
	if (!IBCM_ACCESS_HCA_OK(hcap)) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_close_rc_channel: chan 0x%p "
		    "hcap 0x%p not active", channel, hcap);
		IBCM_REF_CNT_DECR(statep);
		mutex_exit(&statep->state_mutex);
		return (IBT_CHAN_HDL_INVALID);
	}

	if (statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
		while (statep->cep_in_rts == IBCM_BLOCK)
			cv_wait(&statep->block_mad_cv, &statep->state_mutex);
	}

	/* Do TRANSIENT_DREQ check after TRANSIENT_ESTABLISHED check */
	while (statep->state == IBCM_STATE_TRANSIENT_DREQ_SENT)
		cv_wait(&statep->block_mad_cv, &statep->state_mutex);

	IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
	    "connection state is %x", channel, statep->state);

	/* If state is in pre-established states, abort the connection est */
	if (statep->state != IBCM_STATE_ESTABLISHED) {
		statep->cm_retries++;	/* ensure connection trace is dumped */

		/* No DREP private data possible */
		if (statep->close_ret_priv_data_len != NULL)
			*statep->close_ret_priv_data_len = 0;

		/*
		 * If waiting for a response mad, then cancel the timer,
		 * and delete the connection
		 */
		if (statep->state == IBCM_STATE_REQ_SENT ||
		    statep->state == IBCM_STATE_REP_SENT ||
		    statep->state == IBCM_STATE_REP_WAIT ||
		    statep->state == IBCM_STATE_MRA_REP_RCVD) {
			timeout_id_t		timer_val = statep->timerid;
			ibcm_conn_state_t	old_state;

			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
			    "chan 0x%p connection aborted in state %x", channel,
			    statep->state);

			old_state = statep->state;
			statep->state = IBCM_STATE_DELETE;

			if (mode == IBT_NONBLOCKING) {
				if (taskq_dispatch(ibcm_taskq,
				    ibcm_process_abort_via_taskq, statep,
				    TQ_NOSLEEP) == 0) {

					IBCM_REF_CNT_DECR(statep);
					statep->state = old_state;
					mutex_exit(&statep->state_mutex);
					return (IBT_INSUFF_KERNEL_RESOURCE);
				}	/* if taskq_dispatch succeeds */
				/* Cancel the timer */
				statep->timerid = 0;
				mutex_exit(&statep->state_mutex);
			} else {
				/* Cancel the timer */
				statep->timerid = 0;
				mutex_exit(&statep->state_mutex);
				(void) taskq_dispatch(ibcm_taskq,
				    ibcm_process_abort_via_taskq, statep,
				    TQ_SLEEP);
			}

			/* cancel the currently running timer */
			if (timer_val != 0)
				(void) untimeout(timer_val);

			/* wait until cm handler returns for BLOCKING cases */
			mutex_enter(&statep->state_mutex);
			if ((mode == IBT_BLOCKING) ||
			    (mode == IBT_NOCALLBACKS)) {
				while (statep->close_done != B_TRUE)
					cv_wait(&statep->block_client_cv,
					    &statep->state_mutex);
			}

			if (statep->close_ret_status)
				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
			mutex_exit(&statep->state_mutex);

			/*
			 * It would ideal to post a REJ MAD, but that would
			 * be non-conformance to spec. Hence, delete the state
			 * data. Assuming that happens quickly, any retransmits
			 * from the remote are replied by CM with reject
			 * reason " no valid com id". That would stop remote
			 * sending any more MADs.
			 */
			ibcm_delete_state_data(statep);
			return (IBT_SUCCESS);

		/* if CM busy in cm handler, wait until cm handler returns */
		} else if (statep->state == IBCM_STATE_REQ_RCVD ||
		    statep->state == IBCM_STATE_REP_RCVD ||
		    statep->state == IBCM_STATE_MRA_SENT ||
		    statep->state == IBCM_STATE_MRA_REP_SENT) {

			/* take control of statep */
			statep->abort_flag |= IBCM_ABORT_CLIENT;

			IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
			    "chan 0x%p connection aborted in state = %x",
			    channel, statep->state);

			/*
			 * wait until state machine modifies qp state to error,
			 * including disassociating statep and QP
			 */
			if ((mode == IBT_BLOCKING) || (mode == IBT_NOCALLBACKS))
				while (statep->close_done != B_TRUE)
					cv_wait(&statep->block_client_cv,
					    &statep->state_mutex);

			/* a sanity setting */
			if (mode == IBT_NOCALLBACKS)
				statep->cm_handler = NULL;
			IBCM_REF_CNT_DECR(statep);

			/*
			 * In rare situations, connection attempt could be
			 * terminated for some other reason, before abort is
			 * processed, but CM still returns ret_status as abort
			 */
			if (statep->close_ret_status)
				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
			mutex_exit(&statep->state_mutex);

			/*
			 * REJ MAD is posted by the CM state machine for this
			 * case, hence state structure is deleted in the
			 * state machine processing.
			 */
			return (IBT_SUCCESS);

		} else if ((statep->state == IBCM_STATE_TIMEWAIT) ||
		    (statep->state == IBCM_STATE_DELETE)) {

			/* State already in timewait, so no return priv data */
			IBCM_REF_CNT_DECR(statep);

			/* The teardown has already been done */
			if (statep->close_ret_status)
				*statep->close_ret_status =
				    IBT_CM_CLOSED_ALREADY;
			mutex_exit(&statep->state_mutex);

			return (IBT_SUCCESS);

		} else if ((statep->state == IBCM_STATE_DREQ_RCVD) ||
		    (statep->state == IBCM_STATE_DREQ_SENT) ||
		    (statep->state == IBCM_STATE_DREP_RCVD) ||
		    ((statep->state == IBCM_STATE_TIMED_OUT) &&
		    (statep->timedout_state == IBCM_STATE_DREQ_SENT))) {

			/*
			 * Either the remote or local client has already
			 * initiated the teardown.  IBCM_STATE_DREP_RCVD is
			 * possible, if CM initiated teardown without client's
			 * knowledge, for stale handling, etc.,
			 */
			if (mode == IBT_NOCALLBACKS) {
				if (statep->close_nocb_state == IBCM_UNBLOCK) {
					statep->close_nocb_state = IBCM_FAIL;
					/* enable free qp after return */
					ibtl_cm_chan_is_closing(
					    statep->channel);
				} else while (statep->close_nocb_state ==
				    IBCM_BLOCK)
					cv_wait(&statep->block_client_cv,
					    &statep->state_mutex);
				statep->cm_handler = NULL; /* sanity setting */
				if (statep->close_ret_status)
					*statep->close_ret_status =
					    IBT_CM_CLOSED_ALREADY;
			} else if (mode == IBT_BLOCKING) {
				/* wait until state is moved to timewait */
				while (statep->close_done != B_TRUE)
					cv_wait(&statep->block_client_cv,
					    &statep->state_mutex);
			}

			IBCM_REF_CNT_DECR(statep);
			mutex_exit(&statep->state_mutex);

			/* ret_status is set in state machine code */
			return (IBT_SUCCESS);

		} else if (statep->state == IBCM_STATE_TIMED_OUT) {

			if ((mode == IBT_BLOCKING) ||
			    (mode == IBT_NOCALLBACKS)) {

				/*
				 * wait until cm handler invocation and
				 * disassociation between statep and channel
				 * is complete
				 */
				while (statep->close_done != B_TRUE)
					cv_wait(&statep->block_client_cv,
					    &statep->state_mutex);
			}

			if (statep->close_ret_status)
				*statep->close_ret_status = IBT_CM_CLOSED_ABORT;
			IBCM_REF_CNT_DECR(statep);
			mutex_exit(&statep->state_mutex);

			return (IBT_SUCCESS);
		} else {
			IBCM_REF_CNT_DECR(statep);
			mutex_exit(&statep->state_mutex);

			return (IBT_CM_FAILURE);
		}
	}

	ASSERT(statep->close_nocb_state != IBCM_BLOCK);

	if (mode == IBT_NOCALLBACKS) {
		statep->close_nocb_state = IBCM_FAIL;
		statep->cm_handler = NULL;
		ibtl_cm_chan_is_closing(statep->channel);
		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: "
		    "NOCALLBACKS on in statep = %p", statep);
	}

	if (statep->state != IBCM_STATE_ESTABLISHED) {
		goto lost_race;
	}

	/*
	 * Cancel/wait for any pending ibt_set_alt_path, and
	 * release state mutex
	 */
	ibcm_sync_lapr_idle(statep);

	ibcm_close_enter();

	mutex_enter(&statep->state_mutex);
	if (statep->state != IBCM_STATE_ESTABLISHED) {
		ibcm_close_exit();
		goto lost_race;
	}

	statep->state = IBCM_STATE_TRANSIENT_DREQ_SENT;
	statep->timerid = 0;
	statep->close_done = B_FALSE;
	statep->close_flow = 1;
	mutex_exit(&statep->state_mutex);

	ibcm_post_dreq_mad(statep);

	mutex_enter(&statep->state_mutex);

lost_race:
	if (mode == IBT_BLOCKING) {

		/* wait for DREP */
		while (statep->close_done != B_TRUE)
			cv_wait(&statep->block_client_cv,
			    &statep->state_mutex);

		IBTF_DPRINTF_L4(cmlog, "ibcm_close_rc_channel: chan 0x%p "
		    "done blocking", channel);
	}

	IBCM_REF_CNT_DECR(statep);
	mutex_exit(&statep->state_mutex);

	/* If this message isn't seen then ibt_close_rc_channel failed */
	IBTF_DPRINTF_L5(cmlog, "ibcm_close_rc_channel: chan 0x%p done",
	    channel);

	return (IBT_SUCCESS);
}

ibt_status_t
ibt_recycle_rc(ibt_channel_hdl_t rc_chan, ibt_cep_flags_t control,
    uint8_t hca_port_num, ibt_recycle_handler_t func, void *arg)
{
	ibcm_state_data_t		*statep;
	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg;
	ibt_qp_query_attr_t		qp_attr;
	ibt_status_t			retval;

	IBTF_DPRINTF_L3(cmlog, "ibt_recycle_rc (%p, 0x%X, %d, %p, %p)", rc_chan,
	    control, hca_port_num, func, arg);

	if (IBCM_INVALID_CHANNEL(rc_chan)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_recycle_rc: invalid channel");
		return (IBT_CHAN_HDL_INVALID);
	}

	/* check qp state */
	retval = ibt_query_qp(rc_chan, &qp_attr);

	if (retval != IBT_SUCCESS)
		return (retval);

	if (qp_attr.qp_info.qp_trans != IBT_RC_SRV)
		return (IBT_CHAN_SRV_TYPE_INVALID);

	if (qp_attr.qp_info.qp_state != IBT_STATE_ERROR)
		return (IBT_CHAN_STATE_INVALID);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))

	ibcm_tq_recycle_arg = kmem_alloc(sizeof (ibcm_taskq_recycle_arg_t),
	    KM_SLEEP);

	ibcm_tq_recycle_arg->rc_chan		= rc_chan;
	ibcm_tq_recycle_arg->control		= control;
	ibcm_tq_recycle_arg->hca_port_num	= hca_port_num;
	ibcm_tq_recycle_arg->func		= func;
	ibcm_tq_recycle_arg->arg		= arg;

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibcm_tq_recycle_arg))

	IBCM_GET_CHAN_PRIVATE(rc_chan, statep);

	/*
	 * If non-blocking ie., func specified and channel has not yet completed
	 * the timewait, then schedule the work for later
	 */
	if ((func != NULL) && (statep != NULL)) {
		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
		statep->recycle_arg = ibcm_tq_recycle_arg;
		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(statep->recycle_arg))
		return (IBT_SUCCESS);
	}

	/*
	 * if blocking ie., func specified, and channel has not yet completed
	 * the timewait, then block until the channel completes the timewait
	 */
	if (statep != NULL)
		IBCM_RELEASE_CHAN_PRIVATE(rc_chan);
	IBCM_WAIT_CHAN_PRIVATE(rc_chan);

	if (func) {	/* NON BLOCKING case. Taskq for QP state change */
		(void) taskq_dispatch(ibcm_taskq, ibcm_process_rc_recycle,
		    ibcm_tq_recycle_arg, TQ_SLEEP);
		return (IBT_SUCCESS);
	} else	/* BLOCKING case */
		return (ibcm_process_rc_recycle_ret(ibcm_tq_recycle_arg));
}

void
ibcm_process_rc_recycle(void *recycle_arg)
{
	(void) ibcm_process_rc_recycle_ret(recycle_arg);
}

static ibt_status_t
ibcm_process_rc_recycle_ret(void *recycle_arg)
{
	ibt_qp_info_t			qp_info;
	ibt_status_t			ibt_status = IBT_SUCCESS;
	ibt_cep_modify_flags_t		cep_flags;
	ibt_qp_query_attr_t		qp_attr;
	ibcm_taskq_recycle_arg_t	*ibcm_tq_recycle_arg =
	    (ibcm_taskq_recycle_arg_t *)recycle_arg;

	/* QP must have been in error state */
	ibt_status = ibt_query_qp(ibcm_tq_recycle_arg->rc_chan, &qp_attr);
	if (ibt_status != IBT_SUCCESS)
		IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
		    "chanp %p ibt_query_qp() = %d",
		    ibcm_tq_recycle_arg->rc_chan, ibt_status);
	else {
		/* perform the QP state change from ERROR to RESET */
		bzero(&qp_info, sizeof (qp_info));

		qp_info.qp_trans = IBT_RC_SRV;
		qp_info.qp_state = IBT_STATE_RESET;

		/* Call modify_qp to move to RESET state */
		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
		    IBT_CEP_SET_STATE, &qp_info, NULL);

		if (ibt_status != IBT_SUCCESS)
			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
			    "chanp %p ibt_modify_qp() = %d for ERROR to RESET",
			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
	}

	if (ibt_status == IBT_SUCCESS) {

		qp_info.qp_state = IBT_STATE_INIT;

		/* set flags for all mandatory args from RESET to INIT */
		cep_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PORT;
		cep_flags |= IBT_CEP_SET_RDMA_R | IBT_CEP_SET_RDMA_W;
		cep_flags |= IBT_CEP_SET_ATOMIC;

		qp_info.qp_transport.rc.rc_path.cep_hca_port_num =
		    ibcm_tq_recycle_arg->hca_port_num;
		qp_info.qp_flags |=
		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_RD;
		qp_info.qp_flags |=
		    ibcm_tq_recycle_arg->control & IBT_CEP_RDMA_WR;
		qp_info.qp_flags |=
		    ibcm_tq_recycle_arg->control & IBT_CEP_ATOMIC;

		/* Always use the existing pkey */
		qp_info.qp_transport.rc.rc_path.cep_pkey_ix =
		    qp_attr. qp_info.qp_transport.rc.rc_path.cep_pkey_ix;

		/* Call modify_qp to move to INIT state */
		ibt_status = ibt_modify_qp(ibcm_tq_recycle_arg->rc_chan,
		    cep_flags, &qp_info, NULL);

		if (ibt_status != IBT_SUCCESS)
			IBTF_DPRINTF_L2(cmlog, "ibcm_process_rc_recycle_ret: "
			    "chanp %p ibt_modify_qp() = %d for RESET to INIT",
			    ibcm_tq_recycle_arg->rc_chan, ibt_status);
	}

	/* Change the QP CM state to indicate QP being re-used */
	if (ibt_status == IBT_SUCCESS)
		ibtl_cm_chan_is_reused(ibcm_tq_recycle_arg->rc_chan);

	/* Call func, if defined */
	if (ibcm_tq_recycle_arg->func)
		(*(ibcm_tq_recycle_arg->func))(ibt_status,
		    ibcm_tq_recycle_arg->arg);

	kmem_free(ibcm_tq_recycle_arg, sizeof (ibcm_taskq_recycle_arg_t));

	return (ibt_status);
}

static void
ibcm_process_abort_via_taskq(void *args)
{
	ibcm_state_data_t	*statep = (ibcm_state_data_t *)args;

	ibcm_process_abort(statep);
	mutex_enter(&statep->state_mutex);
	IBCM_REF_CNT_DECR(statep);
	mutex_exit(&statep->state_mutex);
}

/*
 * Local UD CM Handler's private data, used during ibt_request_ud_dest() in
 * Non-Blocking mode operations.
 */
typedef struct ibcm_local_handler_s {
	ibt_cm_ud_handler_t	actual_cm_handler;
	void			*actual_cm_private;
	ibt_ud_dest_t		*dest_hdl;
} ibcm_local_handler_t;

_NOTE(READ_ONLY_DATA(ibcm_local_handler_s))

/*
 * Local UD CM Handler, used when ibt_alloc_ud_dest() is issued in
 * NON-Blocking mode.
 *
 * Out here, we update the UD Destination handle with
 * the obtained DQPN and QKey (from SIDR REP) and invokes actual client
 * handler that was specified by the client.
 */
static ibt_cm_status_t
ibcm_local_cm_handler(void *priv, ibt_cm_ud_event_t *event,
    ibt_cm_ud_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	ibcm_local_handler_t	*handler_priv = (ibcm_local_handler_t *)priv;

	IBTF_DPRINTF_L4(cmlog, "ibcm_local_cm_handler: event %d",
	    event->cm_type);

	ASSERT(handler_priv != NULL);

	switch (event->cm_type) {
	case IBT_CM_UD_EVENT_SIDR_REP:
		/* Update QPN & QKey from event into destination handle. */
		if (handler_priv->dest_hdl != NULL) {
			handler_priv->dest_hdl->ud_dst_qpn =
			    event->cm_event.sidr_rep.srep_remote_qpn;
			handler_priv->dest_hdl->ud_qkey =
			    event->cm_event.sidr_rep.srep_remote_qkey;
		}

		/* Invoke the client handler - inform only, so ignore retval */
		(void) handler_priv->actual_cm_handler(
		    handler_priv->actual_cm_private, event, ret_args, priv_data,
		    len);

		/* Free memory allocated for local handler's private data. */
		if (handler_priv != NULL)
			kmem_free(handler_priv, sizeof (*handler_priv));

		break;
	default:
		IBTF_DPRINTF_L2(cmlog, "ibcm_local_cm_handler: ERROR");
		break;
	}

	return (IBT_CM_ACCEPT);
}


/* Validate the input UD destination attributes.  */
static ibt_status_t
ibcm_validate_dqpn_data(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
    ibt_ud_returns_t *ret_args)
{
	/* cm handler must always be specified */
	if (mode == IBT_NONBLOCKING && attr->ud_cm_handler == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
		    "CM handler is not specified ");
		return (IBT_INVALID_PARAM);
	}

	if (mode == IBT_NONBLOCKING) {
		if (ret_args != NULL) {
			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
			    "ret_args should be NULL when called in "
			    "non-blocking mode");
			return (IBT_INVALID_PARAM);
		}
	} else if (mode == IBT_BLOCKING) {
		if (ret_args == NULL) {
			IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
			    "ret_args should be Non-NULL when called in "
			    "blocking mode");
			return (IBT_INVALID_PARAM);
		}
	} else {
		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
		    "invalid mode %x specified ", mode);
		return (IBT_INVALID_PARAM);
	}

	if (attr->ud_sid == 0) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
		    "ServiceID must be specified. ");
		return (IBT_INVALID_PARAM);
	}

	if (attr->ud_addr == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: "
		    "Address Info NULL");
		return (IBT_INVALID_PARAM);
	}

	/* Validate SGID */
	if ((attr->ud_addr->av_sgid.gid_prefix == 0) ||
	    (attr->ud_addr->av_sgid.gid_guid == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid SGID");
		return (IBT_INVALID_PARAM);
	}
	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: SGID<%llX:%llX>",
	    attr->ud_addr->av_sgid.gid_prefix,
	    attr->ud_addr->av_sgid.gid_guid);

	/* Validate DGID */
	if ((attr->ud_addr->av_dgid.gid_prefix == 0) ||
	    (attr->ud_addr->av_dgid.gid_guid == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_validate_dqpn_data: Invalid DGID");
		return (IBT_INVALID_PARAM);
	}
	IBTF_DPRINTF_L3(cmlog, "ibcm_validate_dqpn_data: DGID<%llX:%llX>",
	    attr->ud_addr->av_dgid.gid_prefix,
	    attr->ud_addr->av_dgid.gid_guid);

	return (IBT_SUCCESS);
}


/* Perform SIDR to retrieve DQPN and QKey.  */
static ibt_status_t
ibcm_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
    ibt_ud_returns_t *ret_args)
{
	ibt_status_t		retval;
	ib_pkey_t		ud_pkey;
	ibmf_handle_t		ibmf_hdl;
	ibmf_msg_t		*ibmf_msg;
	ibcm_hca_info_t		*hcap;
	ibcm_sidr_req_msg_t	*sidr_req_msgp;
	ibcm_ud_state_data_t	*ud_statep;
	ibtl_cm_hca_port_t	port;
	ibcm_sidr_srch_t	sidr_entry;
	ibcm_qp_list_t		*cm_qp_entry;

	/* Retrieve HCA GUID value from the available SGID info. */
	retval = ibtl_cm_get_hca_port(attr->ud_addr->av_sgid, 0, &port);
	if ((retval != IBT_SUCCESS) || (port.hp_port == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
		    "ibtl_cm_get_hca_port failed: %d", retval);
		return (retval);
	}

	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: "
	    "HCA GUID:%llX, port_num:%d", port.hp_hca_guid, port.hp_port);

	/* Lookup the HCA info for this GUID */
	if ((hcap = ibcm_find_hca_entry(port.hp_hca_guid)) == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: hcap is NULL");
		return (IBT_HCA_INVALID);
	}

	/* Return failure if the HCA device or Port is not operational */

	if ((retval = ibt_get_port_state_byguid(port.hp_hca_guid, port.hp_port,
	    NULL, NULL)) != IBT_SUCCESS) {
		/* Device Port is not in good state, don't use it. */
		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: Invalid "
		    "port specified or port not active");
		ibcm_dec_hca_acc_cnt(hcap);
		return (retval);
	}

	retval = ibt_index2pkey_byguid(port.hp_hca_guid, port.hp_port,
	    attr->ud_pkey_ix, &ud_pkey);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
		    "Failed to convert index2pkey: %d", retval);
		ibcm_dec_hca_acc_cnt(hcap);
		return (retval);
	}

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(sidr_entry))

	/* Allocate a new request id */
	if (ibcm_alloc_reqid(hcap, &sidr_entry.srch_req_id) == IBCM_FAILURE) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
		    "no req id available");
		ibcm_dec_hca_acc_cnt(hcap);
		return (IBT_INSUFF_KERNEL_RESOURCE);
	}

	if ((hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl == NULL) &&
	    ((retval = ibcm_hca_reinit_port(hcap, port.hp_port - 1))
	    != IBT_SUCCESS)) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: "
		    "ibmf reg or callback setup failed during re-initialize");
		return (retval);
	}

	ibmf_hdl = hcap->hca_port_info[port.hp_port - 1].port_ibmf_hdl;

	/* find the ibmf QP to post the SIDR REQ */
	if ((cm_qp_entry = ibcm_find_qp(hcap, port.hp_port, ud_pkey)) ==
	    NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF QP allocation"
		    " failed");
		ibcm_dec_hca_acc_cnt(hcap);
		return (IBT_INSUFF_RESOURCE);
	}

	if ((retval = ibcm_alloc_out_msg(ibmf_hdl, &ibmf_msg, MAD_METHOD_SEND))
	    != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_ud_get_dqpn: IBMF MSG allocation"
		    " failed");
		ibcm_release_qp(cm_qp_entry);
		ibcm_dec_hca_acc_cnt(hcap);
		return (retval);
	}

	sidr_entry.srch_lid = port.hp_base_lid;
	sidr_entry.srch_gid = attr->ud_addr->av_sgid;
	sidr_entry.srch_grh_exists = attr->ud_addr->av_send_grh;
	sidr_entry.srch_mode = IBCM_ACTIVE_MODE;

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(sidr_entry))

	/* do various allocations needed here */
	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);

	(void) ibcm_find_sidr_entry(&sidr_entry, hcap, &ud_statep,
	    IBCM_FLAG_ADD);
	rw_exit(&hcap->hca_sidr_list_lock);

	/* Increment hca's resource count */
	ibcm_inc_hca_res_cnt(hcap);

	/* After a resource created on hca, no need to hold the acc cnt */
	ibcm_dec_hca_acc_cnt(hcap);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_statep))

	/* Initialize some ud_statep fields */
	ud_statep->ud_stored_msg = ibmf_msg;
	ud_statep->ud_svc_id = attr->ud_sid;
	ud_statep->ud_pkt_life_time =
	    ibt_ib2usec(attr->ud_pkt_lt);
	ud_statep->ud_stored_reply_addr.cm_qp_entry = cm_qp_entry;

	/* set remaining retry cnt */
	ud_statep->ud_remaining_retry_cnt = ud_statep->ud_max_cm_retries;

	/*
	 * Get UD handler and corresponding args which is pass it back
	 * as first argument for the handler.
	 */
	ud_statep->ud_state_cm_private = attr->ud_cm_private;

	if (mode == IBT_BLOCKING)
		ud_statep->ud_return_data = ret_args;
	else
		ud_statep->ud_cm_handler = attr->ud_cm_handler;

	/* Initialize the fields of ud_statep->ud_stored_reply_addr */
	ud_statep->ud_stored_reply_addr.grh_exists = attr->ud_addr->av_send_grh;
	ud_statep->ud_stored_reply_addr.ibmf_hdl = ibmf_hdl;
	ud_statep->ud_stored_reply_addr.grh_hdr.ig_hop_limit =
	    attr->ud_addr->av_hop;
	ud_statep->ud_stored_reply_addr.grh_hdr.ig_sender_gid =
	    attr->ud_addr->av_sgid;
	ud_statep->ud_stored_reply_addr.grh_hdr.ig_recver_gid =
	    attr->ud_addr->av_dgid;
	ud_statep->ud_stored_reply_addr.grh_hdr.ig_tclass =
	    attr->ud_addr->av_tclass;
	ud_statep->ud_stored_reply_addr.grh_hdr.ig_flow_label =
	    attr->ud_addr->av_flow & IB_GRH_FLOW_LABEL_MASK;

	/* needs to be derived based on the base LID and path bits */
	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_local_lid =
	    port.hp_base_lid;
	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_lid =
	    attr->ud_addr->av_dlid;
	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_p_key = ud_pkey;
	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_q_key = IB_GSI_QKEY;
	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_service_level =
	    attr->ud_addr->av_srvl;

	/*
	 * This may be enchanced later, to use a remote qno based on past
	 * redirect rej mad responses. This would be the place to specify
	 * appropriate remote qno
	 */
	ud_statep->ud_stored_reply_addr.rcvd_addr.ia_remote_qno = 1;

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))

	/* Initialize the SIDR REQ message fields */
	sidr_req_msgp =
	    (ibcm_sidr_req_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg);

	sidr_req_msgp->sidr_req_request_id = h2b32(ud_statep->ud_req_id);
	sidr_req_msgp->sidr_req_service_id = h2b64(attr->ud_sid);
	sidr_req_msgp->sidr_req_pkey = h2b16(ud_pkey);
	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->AttributeID =
	    h2b16(IBCM_INCOMING_SIDR_REQ + IBCM_ATTR_BASE_ID);

	if ((attr->ud_priv_data != NULL) && (attr->ud_priv_data_len > 0)) {
		bcopy(attr->ud_priv_data, sidr_req_msgp->sidr_req_private_data,
		    min(attr->ud_priv_data_len, IBT_SIDR_REQ_PRIV_DATA_SZ));
	}

	/* Send out the SIDR REQ message */
	ud_statep->ud_state = IBCM_STATE_SIDR_REQ_SENT;
	ud_statep->ud_timer_stored_state = IBCM_STATE_SIDR_REQ_SENT;
	IBCM_UD_REF_CNT_INCR(ud_statep); /* for non-blocking SIDR REQ post */
	ud_statep->ud_timer_value = ibt_ib2usec(ibcm_max_sidr_rep_proctime) +
	    (ud_statep->ud_pkt_life_time * 2);

	IBCM_OUT_HDRP(ud_statep->ud_stored_msg)->TransactionID =
	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_SIDR_REQ,
	    ud_statep->ud_req_id, 0));

	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: timer_value in HZ = %x",
	    ud_statep->ud_timer_value);

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ud_statep))
	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sidr_req_msgp))

	ibcm_post_ud_mad(ud_statep, ud_statep->ud_stored_msg,
	    ibcm_post_sidr_req_complete, ud_statep);

	mutex_enter(&ud_statep->ud_state_mutex);

	/* Wait for SIDR_REP */
	if (mode == IBT_BLOCKING) {
		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: blocking");

		while (ud_statep->ud_blocking_done != B_TRUE) {
			cv_wait(&ud_statep->ud_block_client_cv,
			    &ud_statep->ud_state_mutex);
		}

		IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: finished blocking");

		if (ret_args->ud_status == IBT_CM_SREP_QPN_VALID) {
			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: DQPN = %x, "
			    "status = %x, QKey = %x", ret_args->ud_dqpn,
			    ret_args->ud_status, ret_args->ud_qkey);

		} else {
			IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: Status<%x>",
			    ret_args->ud_status);
			retval = IBT_CM_FAILURE;
		}
	}

	IBCM_UD_REF_CNT_DECR(ud_statep);
	mutex_exit(&ud_statep->ud_state_mutex);

	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_get_dqpn: done");

	return (retval);
}


/*
 * Function:
 *	ibt_request_ud_dest
 * Input:
 *	ud_dest		A previously allocated UD destination handle.
 *	mode		This function can execute in blocking or non blocking
 *			modes.
 *	attr		UD destination attributes to be modified.
 * Output:
 *	ud_ret_args	If the function is called in blocking mode, ud_ret_args
 *			should be a pointer to an ibt_ud_returns_t struct.
 * Returns:
 *	IBT_SUCCESS
 * Description:
 *	Modify a previously allocated UD destination handle based on the
 *	results of doing the SIDR protocol.
 */
ibt_status_t
ibt_request_ud_dest(ibt_ud_dest_hdl_t ud_dest, ibt_execution_mode_t mode,
    ibt_ud_dest_attr_t *attr, ibt_ud_returns_t *ud_ret_args)
{
	ibt_status_t		retval;
	ibt_ud_dest_t		*ud_destp;
	ibcm_local_handler_t	*local_handler_priv = NULL;

	IBTF_DPRINTF_L3(cmlog, "ibt_request_ud_dest(%p, %x, %p, %p)",
	    ud_dest, mode, attr, ud_ret_args);

	retval = ibcm_validate_dqpn_data(attr, mode, ud_ret_args);
	if (retval != IBT_SUCCESS) {
		return (retval);
	}

	ud_destp = ud_dest;

	/* Allocate an Address handle. */
	retval = ibt_modify_ah(ud_destp->ud_dest_hca, ud_destp->ud_ah,
	    attr->ud_addr);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
		    "Address Handle Modification failed: %d", retval);
		return (retval);
	}

	if (mode == IBT_NONBLOCKING) {
		/*
		 * In NON-BLOCKING mode, and we need to update the destination
		 * handle with the DQPN and QKey that are obtained from
		 * SIDR REP, hook-up our own handler, so that we can catch
		 * the event, and we ourselves call the actual client's
		 * ud_cm_handler, in our handler.
		 */

		/* Allocate memory for local handler's private data. */
		local_handler_priv =
		    kmem_alloc(sizeof (*local_handler_priv), KM_SLEEP);

		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*local_handler_priv))

		local_handler_priv->actual_cm_handler = attr->ud_cm_handler;
		local_handler_priv->actual_cm_private = attr->ud_cm_private;
		local_handler_priv->dest_hdl = ud_destp;

		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*local_handler_priv))

		attr->ud_cm_handler = ibcm_local_cm_handler;
		attr->ud_cm_private = local_handler_priv;
	}

	/* In order to get DQPN and Destination QKey, perform SIDR */
	retval = ibcm_ud_get_dqpn(attr, mode, ud_ret_args);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_request_ud_dest: "
		    "Failed to get DQPN: %d", retval);

		/* Free memory allocated for local handler's private data. */
		if (local_handler_priv != NULL)
			kmem_free(local_handler_priv,
			    sizeof (*local_handler_priv));
		return (retval);
	}

	/*
	 * Fill in the dqpn and dqkey as obtained from ud_ret_args,
	 * values will be valid only on BLOCKING mode.
	 */
	if (mode == IBT_BLOCKING) {
		ud_destp->ud_dst_qpn = ud_ret_args->ud_dqpn;
		ud_destp->ud_qkey = ud_ret_args->ud_qkey;
	}

	return (retval);
}

/*
 * Function:
 *	ibt_ud_get_dqpn
 * Input:
 *	attr		A pointer to an ibt_ud_dest_attr_t struct that are
 *			required for SIDR REQ message. Not specified attributes
 *			should be set to "NULL" or "0".
 *			ud_sid, ud_addr and ud_pkt_lt must be specified.
 *	mode		This function can execute in blocking or non blocking
 *			modes.
 * Output:
 *	returns		If the function is called in blocking mode, returns
 *			should be a pointer to an ibt_ud_returns_t struct.
 * Return:
 *	IBT_SUCCESS	on success or respective failure on error.
 * Description:
 *	Finds the destination QPN at the specified destination that the
 *	specified service can be reached on. The IBTF CM initiates the
 *	service ID resolution protocol (SIDR) to determine a destination QPN.
 *
 * NOTE: SIDR_REQ is initiated from active side.
 */
ibt_status_t
ibt_ud_get_dqpn(ibt_ud_dest_attr_t *attr, ibt_execution_mode_t mode,
    ibt_ud_returns_t *returns)
{
	ibt_status_t		retval;

	IBTF_DPRINTF_L3(cmlog, "ibt_ud_get_dqpn(%p, %x, %p)",
	    attr, mode, returns);

	retval = ibcm_validate_dqpn_data(attr, mode, returns);
	if (retval != IBT_SUCCESS) {
		return (retval);
	}

	return (ibcm_ud_get_dqpn(attr, mode, returns));
}


/*
 * ibt_cm_delay:
 *	A client CM handler function can call this function
 *	to extend its response time to a CM event.
 * INPUTS:
 *	flags		Indicates what CM message processing is being delayed
 *			by the CM handler, valid values are:
 *				IBT_CM_DELAY_REQ
 *				IBT_CM_DELAY_REP
 *				IBT_CM_DELAY_LAP
 *	cm_session_id	The session ID that was passed to client srv_handler
 *			by the CM
 *	service_time	The extended service time
 *	priv_data	Vendor specific data to be sent in the CM generated
 *			MRA message. Should be NULL if not specified.
 *	len		The number of bytes of data specified by priv_data.
 *
 * RETURN VALUES:
 *	IBT_SUCCESS	on success (or respective failure on error)
 */
ibt_status_t
ibt_cm_delay(ibt_cmdelay_flags_t flags, void *cm_session_id,
    clock_t service_time, void *priv_data, ibt_priv_data_len_t len)
{
	uint8_t			msg_typ = 0;
	ibcm_mra_msg_t		*mra_msgp;
	ibcm_state_data_t	*statep;
	ibt_status_t		status;

	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay(0x%x, %p, 0x%x)",
	    flags, cm_session_id, service_time);

	/*
	 * Make sure channel is associated with a statep
	 */
	statep = (ibcm_state_data_t *)cm_session_id;

	if (statep == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: statep NULL");
		return (IBT_INVALID_PARAM);
	}

	IBTF_DPRINTF_L4(cmlog, "ibt_cm_delay: statep %p", statep);

	/* Allocate an ibmf msg for mra, if not allocated yet */
	if (statep->mra_msg == NULL) {
		if ((status = ibcm_alloc_out_msg(
		    statep->stored_reply_addr.ibmf_hdl, &statep->mra_msg,
		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: chan 0x%p"
			    "IBMF MSG allocation failed", statep->channel);
			return (status);
		}
	}

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mra_msgp))

	mra_msgp = (ibcm_mra_msg_t *)IBCM_OUT_MSGP(statep->mra_msg);
	mra_msgp->mra_local_comm_id = h2b32(statep->local_comid);
	mra_msgp->mra_remote_comm_id = h2b32(statep->remote_comid);

	/* fill in rest of MRA's fields - Message MRAed and Service Timeout */
	if (flags == IBT_CM_DELAY_REQ) {
		msg_typ = IBT_CM_MRA_TYPE_REQ;
	} else if (flags == IBT_CM_DELAY_REP) {
		msg_typ = IBT_CM_MRA_TYPE_REP;
	} else if (flags == IBT_CM_DELAY_LAP) {
		msg_typ = IBT_CM_MRA_TYPE_LAP;
	}

	mra_msgp->mra_message_type_plus = msg_typ << 6;
	mra_msgp->mra_service_timeout_plus = ibt_usec2ib(service_time) << 3;

	len = min(len, IBT_MRA_PRIV_DATA_SZ);
	if (priv_data && (len > 0))
		bcopy(priv_data, mra_msgp->mra_private_data, len);

	IBCM_OUT_HDRP(statep->mra_msg)->AttributeID =
	    h2b16(IBCM_INCOMING_MRA + IBCM_ATTR_BASE_ID);

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mra_msgp))

	mutex_enter(&statep->state_mutex);

	if ((statep->mode == IBCM_ACTIVE_MODE) &&
	    (statep->state == IBCM_STATE_REP_RCVD)) {
		statep->state = IBCM_STATE_MRA_REP_SENT;
	} else if (statep->mode == IBCM_PASSIVE_MODE) {
		if (statep->state == IBCM_STATE_REQ_RCVD) {
			statep->state = IBCM_STATE_MRA_SENT;
		} else if (statep->ap_state == IBCM_AP_STATE_LAP_RCVD) {
			statep->ap_state = IBCM_AP_STATE_MRA_LAP_RCVD;
		} else {
			IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
			    "/ap_state/mode %x, %x, %x", statep->state,
			    statep->ap_state, statep->mode);
			mutex_exit(&statep->state_mutex);
			return (IBT_CHAN_STATE_INVALID);
		}
	} else {
		IBTF_DPRINTF_L2(cmlog, "ibt_cm_delay: invalid state "
		    "/ap_state/mode %x, %x, %x", statep->state,
		    statep->ap_state, statep->mode);
		mutex_exit(&statep->state_mutex);

		return (IBT_CHAN_STATE_INVALID);
	}
	/* service time is usecs, stale_clock is nsecs */
	statep->stale_clock = gethrtime() +
	    (hrtime_t)ibt_ib2usec(ibt_usec2ib(service_time)) * (1000 *
	    statep->max_cm_retries);

	statep->send_mad_flags |= IBCM_MRA_POST_BUSY;
	IBCM_REF_CNT_INCR(statep);	/* for ibcm_post_mra_complete */
	mutex_exit(&statep->state_mutex);

	IBCM_OUT_HDRP(statep->mra_msg)->TransactionID =
	    IBCM_OUT_HDRP(statep->stored_msg)->TransactionID;

	/* post the MRA mad in blocking mode, as no timers involved */
	ibcm_post_rc_mad(statep, statep->mra_msg, ibcm_post_mra_complete,
	    statep);
	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_MRA);
	/* If this message isn't seen then ibt_cm_delay failed */
	IBTF_DPRINTF_L3(cmlog, "ibt_cm_delay: done !!");

	return (IBT_SUCCESS);
}


/*
 * ibt_register_service()
 *	Register a service with the IBCM
 *
 * INPUTS:
 *	ibt_hdl		The IBT client handle returned to the client
 *			on an ibt_attach() call.
 *
 *	srv		The address of a ibt_srv_desc_t that describes
 *			the service, containing the following:
 *
 *		sd_ud_handler	The Service CM UD event Handler.
 *		sd_handler	The Service CM RC/UC/RD event Handler.
 *		sd_flags	Service flags (peer-to-peer, or not).
 *
 *	sid		This tells CM if the service is local (sid is 0) or
 *			wellknown (sid is the starting service id of the range).
 *
 *	num_sids	The number of contiguous service-ids to reserve.
 *
 *	srv_hdl		The address of a service identification handle, used
 *			to deregister a service, and to bind GIDs to.
 *
 *	ret_sid		The address to store the Service ID return value.
 *			If num_sids > 1, ret_sid is the first Service ID
 *			in the range.
 *
 * ibt_register_service() returns:
 *	IBT_SUCCESS		- added a service successfully.
 *	IBT_INVALID_PARAM	- invalid input parameter.
 *	IBT_CM_FAILURE		- failed to add the service.
 *	IBT_CM_SERVICE_EXISTS	- service already exists.
 *	IBT_INSUFF_KERNEL_RESOURCE - ran out of local service ids (should
 *				     never happen).
 */
ibt_status_t
ibt_register_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_desc_t *srv,
    ib_svc_id_t sid, int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid)
{
	ibcm_svc_info_t		*svcinfop;

	IBTF_DPRINTF_L2(cmlog, "ibt_register_service(%p (%s), %p, 0x%llX, %d)",
	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl), srv, (longlong_t)sid,
	    num_sids);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*svcinfop))

	*srv_hdl = NULL;

	if (num_sids <= 0) {
		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
		    "Invalid number of service-ids specified (%d)", num_sids);
		return (IBT_INVALID_PARAM);
	}

	if (sid == 0) {
		if (ret_sid == NULL)
			return (IBT_INVALID_PARAM);
		sid = ibcm_alloc_local_sids(num_sids);
		if (sid == 0)
			return (IBT_INSUFF_KERNEL_RESOURCE);

	/* Make sure that the ServiceId specified is not of LOCAL AGN type. */
	} else if ((sid & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
		    "Invalid non-LOCAL SID specified: 0x%llX",
		    (longlong_t)sid);
		return (IBT_INVALID_PARAM);
	}

	svcinfop = ibcm_create_svc_entry(sid, num_sids);

	if (svcinfop == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_register_service: "
		    "Service-ID 0x%llx already registered", (longlong_t)sid);
		return (IBT_CM_SERVICE_EXISTS);
	}

	/*
	 * 'sid' and 'num_sids' are filled in ibcm_create_svc_entry()
	 */
	svcinfop->svc_flags = srv->sd_flags;
	svcinfop->svc_rc_handler = srv->sd_handler;
	svcinfop->svc_ud_handler = srv->sd_ud_handler;

	if (ret_sid != NULL)
		*ret_sid = sid;

	*srv_hdl = svcinfop;

	ibtl_cm_change_service_cnt(ibt_hdl, num_sids);

	/* If this message isn't seen, then ibt_register_service failed. */
	IBTF_DPRINTF_L2(cmlog, "ibt_register_service: done (%p, %llX)",
	    svcinfop, sid);

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*svcinfop))

	return (IBT_SUCCESS);
}


static ibt_status_t
ibcm_write_service_record(ibmf_saa_handle_t saa_handle,
    sa_service_record_t *srv_recp, ibmf_saa_access_type_t saa_type)
{
	int	rval;
	int	retry;

	ibcm_sa_access_enter();
	for (retry = 0; retry < ibcm_max_sa_retries; retry++) {
		rval = ibmf_saa_update_service_record(
		    saa_handle, srv_recp, saa_type, 0);
		if (rval != IBMF_TRANS_TIMEOUT) {
			break;
		}
		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
		    "ibmf_saa_update_service_record timed out"
		    " SID = %llX, rval = %d, saa_type = %d",
		    (longlong_t)srv_recp->ServiceID, rval, saa_type);
		delay(ibcm_sa_timeout_delay);
	}
	ibcm_sa_access_exit();

	if (rval != IBMF_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_write_service_record: "
		    "ibmf_saa_update_service_record() : Failed - %d", rval);
		return (ibcm_ibmf_analyze_error(rval));
	} else
		return (IBT_SUCCESS);
}


static void
ibcm_rem_stale_srec(ibmf_saa_handle_t saa_handle, sa_service_record_t *srec)
{
	ibt_status_t		retval;
	uint_t			num_found;
	size_t			length;
	sa_service_record_t	*srv_resp;
	void			*results_p;
	uint_t			i;
	uint64_t		component_mask;
	ibmf_saa_access_args_t	access_args;

	component_mask =
	    SA_SR_COMPMASK_PKEY | SA_SR_COMPMASK_NAME | SA_SR_COMPMASK_GID;

	/* Call in SA Access retrieve routine to get Service Records. */
	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
	access_args.sq_component_mask = component_mask;
	access_args.sq_template = srec;
	access_args.sq_template_length = sizeof (sa_service_record_t);
	access_args.sq_callback = NULL;
	access_args.sq_callback_arg = NULL;

	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
	    &results_p);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
		    "SA Access Failure");
		return;
	}

	num_found = length / sizeof (sa_service_record_t);

	if (num_found)
		IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
		    "Found %d matching Service Records.", num_found);

	/* Validate the returned number of records. */
	if ((results_p != NULL) && (num_found > 0)) {

		/* Remove all the records. */
		for (i = 0; i < num_found; i++) {

			srv_resp = (sa_service_record_t *)
			    ((uchar_t *)results_p +
			    i * sizeof (sa_service_record_t));

			/*
			 * Found some matching records, but check out whether
			 * this Record is really stale or just happens to match
			 * the current session records. If yes, don't remove it.
			 */
			mutex_enter(&ibcm_svc_info_lock);
			if (ibcm_find_svc_entry(srv_resp->ServiceID) != NULL) {
				/* This record is NOT STALE. */
				mutex_exit(&ibcm_svc_info_lock);
				IBTF_DPRINTF_L3(cmlog, "ibcm_rem_stale_srec: "
				    "This is not Stale, it's an active record");
				continue;
			}
			mutex_exit(&ibcm_svc_info_lock);

			IBTF_DPRINTF_L2(cmlog, "ibcm_rem_stale_srec: "
			    "Removing Stale Rec: %s, %llX",
			    srv_resp->ServiceName, srv_resp->ServiceID);

			IBCM_DUMP_SERVICE_REC(srv_resp);

			/*
			 * Remove the Service Record Entry from SA.
			 *
			 * Get ServiceID info from Response Buf, other
			 * attributes are already filled-in.
			 */

			 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(srec->ServiceID))

			srec->ServiceID = srv_resp->ServiceID;

			 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(srec->ServiceID))

			(void) ibcm_write_service_record(saa_handle, srec,
			    IBMF_SAA_DELETE);
		}

		/* Deallocate the memory for results_p. */
		kmem_free(results_p, length);
	}
}



/*
 * ibt_bind_service()
 *	Register a service with the IBCM
 *
 * INPUTS:
 *	srv_hdl		The service id handle returned to the client
 *			on an ibt_service_register() call.
 *
 *	gid		The GID to which to bind the service.
 *
 *	srv_bind	The address of a ibt_srv_bind_t that describes
 *			the service record.  This should be NULL if there
 *			is to be no service record.  This contains:
 *
 *		sb_lease	Lease period
 *		sb_pkey		Partition
 *		sb_name		pointer to ASCII string Service Name,
 *				NULL terminated.
 *		sb_key[]	Key to secure the service record.
 *		sb_data		Service Data structure (64-byte)
 *
 *	cm_private	First argument of Service handler.
 *
 *	sb_hdl_p	The address of a service bind handle, used
 *			to undo the service binding.
 *
 * ibt_bind_service() returns:
 *	IBT_SUCCESS		- added a service successfully.
 *	IBT_INVALID_PARAM	- invalid input parameter.
 *	IBT_CM_FAILURE		- failed to add the service.
 *	IBT_CM_SERVICE_EXISTS	- service already exists.
 */
ibt_status_t
ibt_bind_service(ibt_srv_hdl_t srv_hdl, ib_gid_t gid, ibt_srv_bind_t *srv_bind,
    void *cm_private, ibt_sbind_hdl_t *sb_hdl_p)
{
	ibt_status_t		status;
	ibtl_cm_hca_port_t	port;
	ibcm_svc_bind_t		*sbindp, *sbp;
	ibcm_hca_info_t		*hcap;
	ib_svc_id_t		sid, start_sid, end_sid;
	ibmf_saa_handle_t	saa_handle;
	sa_service_record_t	srv_rec;
	uint16_t		pkey_ix;

	if (sb_hdl_p != NULL)
		*sb_hdl_p = NULL;	/* return value for error cases */

	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: srv_hdl %p, gid (%llX:%llX)",
	    srv_hdl, (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sbindp))

	/* Call ibtl_cm_get_hca_port to get the port number and the HCA GUID. */
	if ((status = ibtl_cm_get_hca_port(gid, 0, &port)) != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
		    "ibtl_cm_get_hca_port failed: %d", status);
		return (status);
	}
	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: Port:%d HCA GUID:%llX",
	    port.hp_port, port.hp_hca_guid);

	hcap = ibcm_find_hca_entry(port.hp_hca_guid);
	if (hcap == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: NO HCA found");
		return (IBT_HCA_BUSY_DETACHING);
	}
	IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: hcap = %p", hcap);

	if (srv_bind != NULL) {
		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
		if (saa_handle == NULL) {
			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
			    "saa_handle is NULL");
			ibcm_dec_hca_acc_cnt(hcap);
			return (IBT_HCA_PORT_NOT_ACTIVE);
		}
		if (srv_bind->sb_pkey == 0) {
			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
			    "P_Key must not be 0");
			ibcm_dec_hca_acc_cnt(hcap);
			return (IBT_INVALID_PARAM);
		}
		if (strlen(srv_bind->sb_name) >= IB_SVC_NAME_LEN) {
			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
			    "Service Name is too long");
			ibcm_dec_hca_acc_cnt(hcap);
			return (IBT_INVALID_PARAM);
		} else
			IBTF_DPRINTF_L3(cmlog, "ibt_bind_service: "
			    "Service Name='%s'", srv_bind->sb_name);
		status = ibt_pkey2index_byguid(port.hp_hca_guid,
		    port.hp_port, srv_bind->sb_pkey, &pkey_ix);
		if (status != IBT_SUCCESS) {
			IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
			    "P_Key 0x%x not found in P_Key_Table",
			    srv_bind->sb_pkey);
			ibcm_dec_hca_acc_cnt(hcap);
			return (status);
		}
	}

	/* assume success - allocate before locking */
	sbindp = kmem_zalloc(sizeof (*sbindp), KM_SLEEP);
	sbindp->sbind_cm_private = cm_private;
	sbindp->sbind_gid = gid;
	sbindp->sbind_hcaguid = port.hp_hca_guid;
	sbindp->sbind_port = port.hp_port;

	mutex_enter(&ibcm_svc_info_lock);

	sbp = srv_hdl->svc_bind_list;
	while (sbp != NULL) {
		if (sbp->sbind_gid.gid_guid == gid.gid_guid &&
		    sbp->sbind_gid.gid_prefix == gid.gid_prefix) {
			if (srv_bind == NULL ||
			    srv_bind->sb_pkey == sbp->sbind_pkey) {
				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: "
				    "failed: GID %llX:%llX and PKEY %x is "
				    "already bound", gid.gid_prefix,
				    gid.gid_guid, sbp->sbind_pkey);
				mutex_exit(&ibcm_svc_info_lock);
				ibcm_dec_hca_acc_cnt(hcap);
				kmem_free(sbindp, sizeof (*sbindp));
				return (IBT_CM_SERVICE_EXISTS);
			}
		}
		sbp = sbp->sbind_link;
	}
	/* no entry found */

	sbindp->sbind_link = srv_hdl->svc_bind_list;
	srv_hdl->svc_bind_list = sbindp;

	mutex_exit(&ibcm_svc_info_lock);

	if (srv_bind != NULL) {
		bzero(&srv_rec, sizeof (srv_rec));

		srv_rec.ServiceLease =
		    sbindp->sbind_lease = srv_bind->sb_lease;
		srv_rec.ServiceP_Key =
		    sbindp->sbind_pkey = srv_bind->sb_pkey;
		srv_rec.ServiceKey_hi =
		    sbindp->sbind_key[0] = srv_bind->sb_key[0];
		srv_rec.ServiceKey_lo =
		    sbindp->sbind_key[1] = srv_bind->sb_key[1];
		(void) strcpy(sbindp->sbind_name, srv_bind->sb_name);
		(void) strcpy((char *)srv_rec.ServiceName, srv_bind->sb_name);
		srv_rec.ServiceGID = gid;

		/*
		 * Find out whether we have any stale Local Service records
		 * matching the current attributes.  If yes, we shall try to
		 * remove them from SA using the current request's ServiceKey.
		 *
		 * We will perform this operation only for Local Services, as
		 * it is handled by SA automatically for WellKnown Services.
		 *
		 * Ofcourse, clients can specify NOT to do this clean-up by
		 * setting IBT_SBIND_NO_CLEANUP flag (srv_bind->sb_flag).
		 */
		if ((srv_hdl->svc_id & IB_SID_AGN_LOCAL) &&
		    (!(srv_bind->sb_flag & IBT_SBIND_NO_CLEANUP))) {
			ibcm_rem_stale_srec(saa_handle, &srv_rec);
		}

		/* Handle endianess for service data. */
		ibcm_swizzle_from_srv(&srv_bind->sb_data, sbindp->sbind_data);

		bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);

		/* insert srv record into the SA */
		start_sid = srv_hdl->svc_id;
		end_sid = start_sid + srv_hdl->svc_num_sids - 1;
		for (sid = start_sid; sid <= end_sid; sid++) {

			srv_rec.ServiceID = sid;

			IBCM_DUMP_SERVICE_REC(&srv_rec);

			IBTF_DPRINTF_L4(cmlog, "ibt_bind_service: "
			    "ibmf_saa_write_service_record, SvcId = %llX",
			    (longlong_t)sid);

			status = ibcm_write_service_record(saa_handle, &srv_rec,
			    IBMF_SAA_UPDATE);
			if (status != IBT_SUCCESS) {
				IBTF_DPRINTF_L2(cmlog, "ibt_bind_service:"
				    " ibcm_write_service_record fails %d, "
				    "sid %llX", status, (longlong_t)sid);

				if (sid != start_sid) {
					/*
					 * Bind failed while bind SID other than
					 * first in the sid_range.  So we need
					 * to unbind those, which are passed.
					 *
					 * Need to increment svc count to
					 * compensate for ibt_unbind_service().
					 */
					ibcm_inc_hca_svc_cnt(hcap);
					ibcm_dec_hca_acc_cnt(hcap);

					(void) ibt_unbind_service(srv_hdl,
					    sbindp);
				} else {
					ibcm_svc_bind_t		**sbpp;

					/*
					 * Bind failed for the first SID or the
					 * only SID in question, then no need
					 * to unbind, just free memory and
					 * return error.
					 */
					mutex_enter(&ibcm_svc_info_lock);

					sbpp = &srv_hdl->svc_bind_list;
					sbp = *sbpp;
					while (sbp != NULL) {
						if (sbp == sbindp) {
							*sbpp = sbp->sbind_link;
							break;
						}
						sbpp = &sbp->sbind_link;
						sbp = *sbpp;
					}
					mutex_exit(&ibcm_svc_info_lock);
					ibcm_dec_hca_acc_cnt(hcap);

					kmem_free(sbindp, sizeof (*sbindp));
				}
				return (status);
			}
		}
	}
	ibcm_inc_hca_svc_cnt(hcap);
	ibcm_dec_hca_acc_cnt(hcap);

	/* If this message isn't seen then ibt_bind_service failed */
	IBTF_DPRINTF_L2(cmlog, "ibt_bind_service: DONE (%p, %llX:%llX)",
	    srv_hdl, gid.gid_prefix, gid.gid_guid);

	if (sb_hdl_p != NULL)
		*sb_hdl_p = sbindp;

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sbindp))

	return (IBT_SUCCESS);
}

ibt_status_t
ibt_unbind_service(ibt_srv_hdl_t srv_hdl, ibt_sbind_hdl_t sbindp)
{
	ib_svc_id_t	sid, end_sid;
	ibt_status_t	rval;
	ibcm_hca_info_t	*hcap;
	ibcm_svc_bind_t	*sbp, **sbpp;

	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service(%p, %p)",
	    srv_hdl, sbindp);

	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);

	/* If there is a service on hca, respective hcap cannot go away */
	ASSERT(hcap != NULL);

	mutex_enter(&ibcm_svc_info_lock);

	sbpp = &srv_hdl->svc_bind_list;
	sbp = *sbpp;
	while (sbp != NULL) {
		if (sbp == sbindp) {
			*sbpp = sbp->sbind_link;
			break;
		}
		sbpp = &sbp->sbind_link;
		sbp = *sbpp;
	}
	sid = srv_hdl->svc_id;
	end_sid = srv_hdl->svc_id + srv_hdl->svc_num_sids - 1;
	if (sbp != NULL)
		while (sbp->sbind_rewrite_state == IBCM_REWRITE_BUSY)
			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
	mutex_exit(&ibcm_svc_info_lock);

	if (sbp == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
		    "service binding not found: srv_hdl %p, srv_bind %p",
		    srv_hdl, sbindp);
		ibcm_dec_hca_acc_cnt(hcap);
		return (IBT_INVALID_PARAM);
	}

	if (sbindp->sbind_pkey != 0) {	/* Are there service records? */
		ibtl_cm_hca_port_t	port;
		sa_service_record_t	srv_rec;
		ibmf_saa_handle_t	saa_handle;
		ibt_status_t		status;

		/* get the default SGID of the port */
		if ((status = ibtl_cm_get_hca_port(sbindp->sbind_gid, 0, &port))
		    != IBT_SUCCESS) {
			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
			    "ibtl_cm_get_hca_port failed: %d", status);
			/* we're done, but there may be stale service records */
			goto done;
		}

		saa_handle = ibcm_get_saa_handle(hcap, port.hp_port);
		if (saa_handle == NULL) {
			IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
			    "saa_handle is NULL");
			/* we're done, but there may be stale service records */
			goto done;
		}

		/* Fill in fields of srv_rec */
		bzero(&srv_rec, sizeof (srv_rec));

		srv_rec.ServiceP_Key = sbindp->sbind_pkey;
		srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
		srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
		srv_rec.ServiceGID = sbindp->sbind_gid;
		(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);

		while (sid <= end_sid) {

			srv_rec.ServiceID = sid;
			IBCM_DUMP_SERVICE_REC(&srv_rec);

			rval = ibcm_write_service_record(saa_handle, &srv_rec,
			    IBMF_SAA_DELETE);

			IBTF_DPRINTF_L4(cmlog, "ibt_unbind_service: "
			    "ibcm_write_service_record rval = %d, SID %llx",
			    rval, sid);
			if (rval != IBT_SUCCESS) {
				/* this is not considered a reason to fail */
				IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: "
				    "ibcm_write_service_record fails %d, "
				    "sid %llx", rval, sid);
			}
			sid++;
		}
	}
done:
	ibcm_dec_hca_svc_cnt(hcap);
	ibcm_dec_hca_acc_cnt(hcap);
	kmem_free(sbindp, sizeof (*sbindp));

	/* If this message isn't seen then ibt_unbind_service failed */
	IBTF_DPRINTF_L2(cmlog, "ibt_unbind_service: done !!");

	return (IBT_SUCCESS);
}

/*
 * Simply pull off each binding from the list and unbind it.
 * If any of the unbind calls fail, we fail.
 */
ibt_status_t
ibt_unbind_all_services(ibt_srv_hdl_t srv_hdl)
{
	ibt_status_t	status;
	ibcm_svc_bind_t	*sbp;

	mutex_enter(&ibcm_svc_info_lock);
	sbp = NULL;

	/* this compare keeps the loop from being infinite */
	while (sbp != srv_hdl->svc_bind_list) {
		sbp = srv_hdl->svc_bind_list;
		mutex_exit(&ibcm_svc_info_lock);
		status = ibt_unbind_service(srv_hdl, sbp);
		if (status != IBT_SUCCESS)
			return (status);
		mutex_enter(&ibcm_svc_info_lock);
		if (srv_hdl->svc_bind_list == NULL)
			break;
	}
	mutex_exit(&ibcm_svc_info_lock);
	return (IBT_SUCCESS);
}

/*
 * ibt_deregister_service()
 *	Deregister a service with the IBCM
 *
 * INPUTS:
 *	ibt_hdl		The IBT client handle returned to the client
 *			on an ibt_attach() call.
 *
 *	srv_hdl		The address of a service identification handle, used
 *			to de-register a service.
 * RETURN VALUES:
 *	IBT_SUCCESS	on success (or respective failure on error)
 */
ibt_status_t
ibt_deregister_service(ibt_clnt_hdl_t ibt_hdl, ibt_srv_hdl_t srv_hdl)
{
	ibcm_svc_info_t		*svcp;
	ibcm_svc_lookup_t	svc;

	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(%p (%s), %p)",
	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl), srv_hdl);

	mutex_enter(&ibcm_svc_info_lock);

	if (srv_hdl->svc_bind_list != NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service:"
		    " srv_hdl %p still has bindings", srv_hdl);
		mutex_exit(&ibcm_svc_info_lock);
		return (IBT_CM_SERVICE_BUSY);
	}
	svc.sid = srv_hdl->svc_id;
	svc.num_sids = 1;
	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_service: SID 0x%llX, numsids %d",
	    srv_hdl->svc_id, srv_hdl->svc_num_sids);

#ifdef __lock_lint
	ibcm_svc_compare(NULL, NULL);
#endif
	svcp = avl_find(&ibcm_svc_avl_tree, &svc, NULL);
	if (svcp != srv_hdl) {
		mutex_exit(&ibcm_svc_info_lock);
		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service(): "
		    "srv_hdl %p not found", srv_hdl);
		return (IBT_INVALID_PARAM);
	}
	avl_remove(&ibcm_svc_avl_tree, svcp);

	/* wait for active REQ/SREQ handling to be done */
	svcp->svc_to_delete = 1;
	while (svcp->svc_ref_cnt != 0)
		cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);

	mutex_exit(&ibcm_svc_info_lock);

	if ((srv_hdl->svc_id & IB_SID_AGN_MASK) == IB_SID_AGN_LOCAL)
		ibcm_free_local_sids(srv_hdl->svc_id, srv_hdl->svc_num_sids);

	ibtl_cm_change_service_cnt(ibt_hdl, -srv_hdl->svc_num_sids);
	kmem_free(srv_hdl, sizeof (*srv_hdl));

	/* If this message isn't seen then ibt_deregister_service failed */
	IBTF_DPRINTF_L2(cmlog, "ibt_deregister_service: done !!");

	return (IBT_SUCCESS);
}

ibcm_status_t
ibcm_ar_init(void)
{
	ib_svc_id_t	sid = IBCM_DAPL_ATS_SID;
	ibcm_svc_info_t *tmp_svcp;

	IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init()");

	/* remove this special SID from the pool of available SIDs */
	if ((tmp_svcp = ibcm_create_svc_entry(sid, 1)) == NULL) {
		IBTF_DPRINTF_L3(cmlog, "ibcm_ar_init: "
		    "DAPL ATS SID 0x%llx already registered", (longlong_t)sid);
		return (IBCM_FAILURE);
	}
	mutex_enter(&ibcm_svc_info_lock);
	ibcm_ar_svcinfop = tmp_svcp;
	ibcm_ar_list = NULL;	/* no address records registered yet */
	mutex_exit(&ibcm_svc_info_lock);
	return (IBCM_SUCCESS);
}

ibcm_status_t
ibcm_ar_fini(void)
{
	ibcm_ar_t	*ar_list;
	ibcm_svc_info_t	*tmp_svcp;

	mutex_enter(&ibcm_svc_info_lock);
	ar_list = ibcm_ar_list;

	if (ar_list == NULL &&
	    avl_numnodes(&ibcm_svc_avl_tree) == 1 &&
	    avl_first(&ibcm_svc_avl_tree) == ibcm_ar_svcinfop) {
		avl_remove(&ibcm_svc_avl_tree, ibcm_ar_svcinfop);
		tmp_svcp = ibcm_ar_svcinfop;
		mutex_exit(&ibcm_svc_info_lock);
		kmem_free(tmp_svcp, sizeof (*ibcm_ar_svcinfop));
		return (IBCM_SUCCESS);
	}
	mutex_exit(&ibcm_svc_info_lock);
	return (IBCM_FAILURE);
}


/*
 * Return to the caller:
 *	IBT_SUCCESS		Found a perfect match.
 *				*arpp is set to the record.
 *	IBT_INCONSISTENT_AR	Found a record that's inconsistent.
 *	IBT_AR_NOT_REGISTERED	Found no record with same GID/pkey and
 *				found no record with same data.
 */
static ibt_status_t
ibcm_search_ar(ibt_ar_t *arp, ibcm_ar_t **arpp)
{
	ibcm_ar_t	*tmp;
	int		i;

	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
	tmp = ibcm_ar_list;
	while (tmp != NULL) {
		if (tmp->ar.ar_gid.gid_prefix == arp->ar_gid.gid_prefix &&
		    tmp->ar.ar_gid.gid_guid == arp->ar_gid.gid_guid &&
		    tmp->ar.ar_pkey == arp->ar_pkey) {
			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
				if (tmp->ar.ar_data[i] != arp->ar_data[i])
					return (IBT_INCONSISTENT_AR);
			*arpp = tmp;
			return (IBT_SUCCESS);
		} else {
			/* if all the data bytes match, we have inconsistency */
			for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++)
				if (tmp->ar.ar_data[i] != arp->ar_data[i])
					break;
			if (i == IBCM_DAPL_ATS_NBYTES)
				return (IBT_INCONSISTENT_AR);
			/* try next address record */
		}
		tmp = tmp->ar_link;
	}
	return (IBT_AR_NOT_REGISTERED);
}

ibt_status_t
ibt_register_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
{
	ibcm_ar_t		*found;
	ibcm_ar_t		*tmp;
	ibt_status_t		status;
	ibt_status_t		s1, s2;
	char			*s;
	ibcm_ar_ref_t		*hdlp;
	ibcm_ar_t		*new;
	ibcm_ar_t		**linkp;
	ibtl_cm_hca_port_t	cm_port;
	uint16_t		pkey_ix;
	ibcm_hca_info_t		*hcap;
	ibmf_saa_handle_t	saa_handle;
	sa_service_record_t	*srv_recp;
	uint64_t		gid_ored;

	IBTF_DPRINTF_L3(cmlog, "ibt_register_ar: PKey 0x%X GID %llX:%llX",
	    arp->ar_pkey, (longlong_t)arp->ar_gid.gid_prefix,
	    (longlong_t)arp->ar_gid.gid_guid);

	/*
	 * If P_Key is 0, but GID is not, this query is invalid.
	 * If GID is 0, but P_Key is not, this query is invalid.
	 */
	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
		    "GID/P_Key is not valid");
		return (IBT_INVALID_PARAM);
	}

	/* assume success, so these might be needed */
	hdlp = kmem_alloc(sizeof (*hdlp), KM_SLEEP);
	new = kmem_zalloc(sizeof (*new), KM_SLEEP);

	mutex_enter(&ibcm_svc_info_lock);
	/* search for existing GID/pkey (there can be at most 1) */
	status = ibcm_search_ar(arp, &found);
	if (status == IBT_INCONSISTENT_AR) {
		mutex_exit(&ibcm_svc_info_lock);
		kmem_free(new, sizeof (*new));
		kmem_free(hdlp, sizeof (*hdlp));
		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
		    "address record is inconsistent with a known one");
		return (IBT_INCONSISTENT_AR);
	} else if (status == IBT_SUCCESS) {
		if (found->ar_flags == IBCM_AR_INITING) {
			found->ar_waiters++;
			cv_wait(&found->ar_cv, &ibcm_svc_info_lock);
			found->ar_waiters--;
		}
		if (found->ar_flags == IBCM_AR_FAILED) {
			if (found->ar_waiters == 0) {
				cv_destroy(&found->ar_cv);
				kmem_free(found, sizeof (*found));
			}
			mutex_exit(&ibcm_svc_info_lock);
			kmem_free(new, sizeof (*new));
			kmem_free(hdlp, sizeof (*hdlp));
			return (ibt_get_module_failure(IBT_FAILURE_IBCM, 0));
		}
		hdlp->ar_ibt_hdl = ibt_hdl;
		hdlp->ar_ref_link = found->ar_ibt_hdl_list;
		found->ar_ibt_hdl_list = hdlp;
		mutex_exit(&ibcm_svc_info_lock);
		kmem_free(new, sizeof (*new));
		ibtl_cm_change_service_cnt(ibt_hdl, 1);
		return (IBT_SUCCESS);
	} else {
		ASSERT(status == IBT_AR_NOT_REGISTERED);
	}
	hdlp->ar_ref_link = NULL;
	hdlp->ar_ibt_hdl = ibt_hdl;
	new->ar_ibt_hdl_list = hdlp;
	new->ar = *arp;
	new->ar_flags = IBCM_AR_INITING;
	new->ar_waiters = 0;
	cv_init(&new->ar_cv, NULL, CV_DEFAULT, NULL);
	new->ar_link = ibcm_ar_list;
	ibcm_ar_list = new;

	/* verify GID/pkey is valid for a local port, etc. */
	hcap = NULL;
	if ((s1 = ibtl_cm_get_hca_port(arp->ar_gid, 0, &cm_port))
	    != IBT_SUCCESS ||
	    (s2 = ibt_pkey2index_byguid(cm_port.hp_hca_guid, cm_port.hp_port,
	    arp->ar_pkey, &pkey_ix)) != IBT_SUCCESS ||
	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL) {
		cv_destroy(&new->ar_cv);
		ibcm_ar_list = new->ar_link;
		mutex_exit(&ibcm_svc_info_lock);
		kmem_free(new, sizeof (*new));
		kmem_free(hdlp, sizeof (*hdlp));
		status = IBT_INVALID_PARAM;
		if (s1 == IBT_HCA_PORT_NOT_ACTIVE) {
			s = "PORT DOWN";
			status = IBT_HCA_PORT_NOT_ACTIVE;
		} else if (s1 != IBT_SUCCESS)
			s = "GID not found";
		else if (s2 != IBT_SUCCESS)
			s = "PKEY not found";
		else
			s = "CM could not find its HCA entry";
		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: %s, status = %d",
		    s, status);
		return (status);
	}
	mutex_exit(&ibcm_svc_info_lock);
	saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port);

	/* create service record */
	srv_recp = kmem_zalloc(sizeof (*srv_recp), KM_SLEEP);
	srv_recp->ServiceLease = 0xFFFFFFFF;	/* infinite */
	srv_recp->ServiceP_Key = arp->ar_pkey;
	srv_recp->ServiceKey_hi = 0xDA410000ULL;	/* DAPL */
	srv_recp->ServiceKey_lo = 0xA7500000ULL;	/* ATS */
	(void) strcpy((char *)srv_recp->ServiceName, IBCM_DAPL_ATS_NAME);
	srv_recp->ServiceGID = arp->ar_gid;
	bcopy(arp->ar_data, srv_recp->ServiceData, IBCM_DAPL_ATS_NBYTES);
	srv_recp->ServiceID = IBCM_DAPL_ATS_SID;

	/* insert service record into the SA */

	IBCM_DUMP_SERVICE_REC(srv_recp);

	if (saa_handle != NULL)
		status = ibcm_write_service_record(saa_handle, srv_recp,
		    IBMF_SAA_UPDATE);
	else
		status = IBT_HCA_PORT_NOT_ACTIVE;

	if (status != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: sa access fails %d, "
		    "sid %llX", status, (longlong_t)srv_recp->ServiceID);
		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: FAILED for gid "
		    "%llX:%llX pkey 0x%X", (longlong_t)arp->ar_gid.gid_prefix,
		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);

		kmem_free(srv_recp, sizeof (*srv_recp));
		kmem_free(hdlp, sizeof (*hdlp));

		mutex_enter(&ibcm_svc_info_lock);
		linkp = &ibcm_ar_list;
		tmp = *linkp;
		while (tmp != NULL) {
			if (tmp == new) {
				*linkp = new->ar_link;
				break;
			}
			linkp = &tmp->ar_link;
			tmp = *linkp;
		}
		if (new->ar_waiters > 0) {
			new->ar_flags = IBCM_AR_FAILED;
			cv_broadcast(&new->ar_cv);
			mutex_exit(&ibcm_svc_info_lock);
		} else {
			cv_destroy(&new->ar_cv);
			mutex_exit(&ibcm_svc_info_lock);
			kmem_free(new, sizeof (*new));
		}
		ibcm_dec_hca_acc_cnt(hcap);
		IBTF_DPRINTF_L2(cmlog, "ibt_register_ar: "
		    "IBMF_SAA failed to write address record");
	} else {					/* SUCCESS */
		uint8_t		*b;

		IBTF_DPRINTF_L3(cmlog, "ibt_register_ar: SUCCESS for gid "
		    "%llx:%llx pkey %x", (longlong_t)arp->ar_gid.gid_prefix,
		    (longlong_t)arp->ar_gid.gid_guid, arp->ar_pkey);
		b = arp->ar_data;

		IBTF_DPRINTF_L3(cmlog, "ibt_register_ar:"
		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
		    b[10], b[11], b[12], b[13], b[14], b[15]);
		mutex_enter(&ibcm_svc_info_lock);
		new->ar_srv_recp = srv_recp;
		new->ar_saa_handle = saa_handle;
		new->ar_port = cm_port.hp_port;
		new->ar_hcap = hcap;
		new->ar_flags = IBCM_AR_SUCCESS;
		if (new->ar_waiters > 0)
			cv_broadcast(&new->ar_cv);
		mutex_exit(&ibcm_svc_info_lock);
		ibtl_cm_change_service_cnt(ibt_hdl, 1);
		/* do not call ibcm_dec_hca_acc_cnt(hcap) until deregister */
	}
	return (status);
}

ibt_status_t
ibt_deregister_ar(ibt_clnt_hdl_t ibt_hdl, ibt_ar_t *arp)
{
	ibcm_ar_t		*found;
	ibcm_ar_t		*tmp;
	ibcm_ar_t		**linkp;
	ibcm_ar_ref_t		*hdlp;
	ibcm_ar_ref_t		**hdlpp;
	ibt_status_t		status;
	ibmf_saa_handle_t	saa_handle;
	sa_service_record_t	*srv_recp;
	uint64_t		gid_ored;

	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_ar: pkey %x", arp->ar_pkey);
	IBTF_DPRINTF_L3(cmlog, "ibt_deregister_ar: gid %llx:%llx",
	    (longlong_t)arp->ar_gid.gid_prefix,
	    (longlong_t)arp->ar_gid.gid_guid);

	/*
	 * If P_Key is 0, but GID is not, this query is invalid.
	 * If GID is 0, but P_Key is not, this query is invalid.
	 */
	gid_ored = arp->ar_gid.gid_guid | arp->ar_gid.gid_prefix;
	if ((arp->ar_pkey == 0 && gid_ored != 0ULL) ||
	    (arp->ar_pkey != 0 && gid_ored == 0ULL)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
		    "GID/P_Key is not valid");
		return (IBT_INVALID_PARAM);
	}

	mutex_enter(&ibcm_svc_info_lock);
	/* search for existing GID/pkey (there can be at most 1) */
	status = ibcm_search_ar(arp, &found);
	if (status == IBT_INCONSISTENT_AR || status == IBT_AR_NOT_REGISTERED) {
		mutex_exit(&ibcm_svc_info_lock);
		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
		    "address record not found");
		return (IBT_AR_NOT_REGISTERED);
	}
	ASSERT(status == IBT_SUCCESS);

	hdlpp = &found->ar_ibt_hdl_list;
	hdlp = *hdlpp;
	while (hdlp != NULL) {
		if (hdlp->ar_ibt_hdl == ibt_hdl)
			break;
		hdlpp = &hdlp->ar_ref_link;
		hdlp = *hdlpp;
	}
	if (hdlp == NULL) {	/* could not find ibt_hdl on list */
		mutex_exit(&ibcm_svc_info_lock);
		IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
		    "address record found, but not for this client");
		return (IBT_AR_NOT_REGISTERED);
	}
	*hdlpp = hdlp->ar_ref_link;	/* remove ref for this client */
	if (found->ar_ibt_hdl_list == NULL && found->ar_waiters == 0) {
		/* last entry was removed */
		found->ar_flags = IBCM_AR_INITING; /* hold off register_ar */
		saa_handle = found->ar_saa_handle;
		srv_recp = found->ar_srv_recp;

		/* wait if this service record is being rewritten */
		while (found->ar_rewrite_state == IBCM_REWRITE_BUSY)
			cv_wait(&ibcm_svc_info_cv, &ibcm_svc_info_lock);
		mutex_exit(&ibcm_svc_info_lock);

		/* remove service record */
		status = ibcm_write_service_record(saa_handle, srv_recp,
		    IBMF_SAA_DELETE);
		if (status != IBT_SUCCESS)
			IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
			    "IBMF_SAA failed to delete address record");
		mutex_enter(&ibcm_svc_info_lock);
		if (found->ar_waiters == 0) {	/* still no waiters */
			linkp = &ibcm_ar_list;
			tmp = *linkp;
			while (tmp != found) {
				linkp = &tmp->ar_link;
				tmp = *linkp;
			}
			*linkp = tmp->ar_link;
			ibcm_dec_hca_acc_cnt(found->ar_hcap);
			kmem_free(srv_recp, sizeof (*srv_recp));
			cv_destroy(&found->ar_cv);
			kmem_free(found, sizeof (*found));
		} else {
			/* add service record back in for the waiters */
			mutex_exit(&ibcm_svc_info_lock);
			status = ibcm_write_service_record(saa_handle, srv_recp,
			    IBMF_SAA_UPDATE);
			mutex_enter(&ibcm_svc_info_lock);
			if (status == IBT_SUCCESS)
				found->ar_flags = IBCM_AR_SUCCESS;
			else {
				found->ar_flags = IBCM_AR_FAILED;
				IBTF_DPRINTF_L2(cmlog, "ibt_deregister_ar: "
				    "IBMF_SAA failed to write address record");
			}
			cv_broadcast(&found->ar_cv);
		}
	}
	mutex_exit(&ibcm_svc_info_lock);
	kmem_free(hdlp, sizeof (*hdlp));
	ibtl_cm_change_service_cnt(ibt_hdl, -1);
	return (status);
}

ibt_status_t
ibt_query_ar(ib_gid_t *sgid, ibt_ar_t *queryp, ibt_ar_t *resultp)
{
	sa_service_record_t	svcrec_req;
	sa_service_record_t	*svcrec_resp;
	void			*results_p;
	uint64_t		component_mask = 0;
	uint64_t		gid_ored;
	size_t			length;
	int			num_rec;
	int			i;
	ibmf_saa_access_args_t	access_args;
	ibt_status_t		retval;
	ibtl_cm_hca_port_t	cm_port;
	ibcm_hca_info_t		*hcap;
	ibmf_saa_handle_t	saa_handle;

	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar(%p, %p)", queryp, resultp);
	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: sgid %llx:%llx",
	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid);
	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_pkey %x", queryp->ar_pkey);
	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: query_gid %llx:%llx",
	    (longlong_t)queryp->ar_gid.gid_prefix,
	    (longlong_t)queryp->ar_gid.gid_guid);

	/*
	 * If P_Key is 0, but GID is not, this query is invalid.
	 * If GID is 0, but P_Key is not, this query is invalid.
	 */
	gid_ored = queryp->ar_gid.gid_guid | queryp->ar_gid.gid_prefix;
	if ((queryp->ar_pkey == 0 && gid_ored != 0ULL) ||
	    (queryp->ar_pkey != 0 && gid_ored == 0ULL)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: GID/P_Key is not valid");
		return (IBT_INVALID_PARAM);
	}

	hcap = NULL;
	if (ibtl_cm_get_hca_port(*sgid, 0, &cm_port) != IBT_SUCCESS ||
	    (hcap = ibcm_find_hca_entry(cm_port.hp_hca_guid)) == NULL ||
	    (saa_handle = ibcm_get_saa_handle(hcap, cm_port.hp_port)) == NULL) {
		if (hcap != NULL)
			ibcm_dec_hca_acc_cnt(hcap);
		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: sgid is not valid");
		return (IBT_INVALID_PARAM);
	}

	bzero(&svcrec_req, sizeof (svcrec_req));

	/* Is GID/P_Key Specified. */
	if (queryp->ar_pkey != 0) {	/* GID is non-zero from check above */
		svcrec_req.ServiceP_Key = queryp->ar_pkey;
		component_mask |= SA_SR_COMPMASK_PKEY;
		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: P_Key %X",
		    queryp->ar_pkey);
		svcrec_req.ServiceGID = queryp->ar_gid;
		component_mask |= SA_SR_COMPMASK_GID;
		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: GID %llX:%llX",
		    (longlong_t)queryp->ar_gid.gid_prefix,
		    (longlong_t)queryp->ar_gid.gid_guid);
	}

	/* Is ServiceData Specified. */
	for (i = 0; i < IBCM_DAPL_ATS_NBYTES; i++) {
		if (queryp->ar_data[i] != 0) {
			bcopy(queryp->ar_data, svcrec_req.ServiceData,
			    IBCM_DAPL_ATS_NBYTES);
			component_mask |= 0xFFFF << 7;	/* all 16 Data8 */
							/* components */
			break;
		}
	}

	/* Service Name */
	(void) strcpy((char *)svcrec_req.ServiceName, IBCM_DAPL_ATS_NAME);
	component_mask |= SA_SR_COMPMASK_NAME;

	svcrec_req.ServiceID = IBCM_DAPL_ATS_SID;
	component_mask |= SA_SR_COMPMASK_ID;

	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
	    "Perform SA Access: Mask: 0x%X", component_mask);

	/*
	 * Call in SA Access retrieve routine to get Service Records.
	 *
	 * SA Access framework allocated memory for the "results_p".
	 * Make sure to deallocate once we are done with the results_p.
	 * The size of the buffer allocated will be as returned in
	 * "length" field.
	 */
	access_args.sq_attr_id = SA_SERVICERECORD_ATTRID;
	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
	access_args.sq_component_mask = component_mask;
	access_args.sq_template = &svcrec_req;
	access_args.sq_template_length = sizeof (sa_service_record_t);
	access_args.sq_callback = NULL;
	access_args.sq_callback_arg = NULL;

	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
	    &results_p);

	ibcm_dec_hca_acc_cnt(hcap);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: SA Access Failed");
		return (retval);
	}

	num_rec = length / sizeof (sa_service_record_t);

	IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
	    "Found %d Service Records.", num_rec);

	/* Validate the returned number of records. */
	if ((results_p != NULL) && (num_rec > 0)) {
		uint8_t		*b;

		/* Just return info from the first service record. */
		svcrec_resp = (sa_service_record_t *)results_p;

		/* The Service GID and Service ID */
		resultp->ar_gid = svcrec_resp->ServiceGID;
		resultp->ar_pkey = svcrec_resp->ServiceP_Key;
		bcopy(svcrec_resp->ServiceData,
		    resultp->ar_data, IBCM_DAPL_ATS_NBYTES);

		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar: "
		    "Found: pkey %x dgid %llX:%llX", resultp->ar_pkey,
		    (longlong_t)resultp->ar_gid.gid_prefix,
		    (longlong_t)resultp->ar_gid.gid_guid);
		b = resultp->ar_data;
		IBTF_DPRINTF_L3(cmlog, "ibt_query_ar:"
		    " data %d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d",
		    b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9],
		    b[10], b[11], b[12], b[13], b[14], b[15]);

		/* Deallocate the memory for results_p. */
		kmem_free(results_p, length);
		if (num_rec > 1)
			retval = IBT_MULTIPLE_AR;
		else
			retval = IBT_SUCCESS;
	} else {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_ar: "
		    "ibmf_sa_access found 0 matching records");
		retval = IBT_AR_NOT_REGISTERED;
	}
	return (retval);
}

/* mark all ATS service records associated with the port */
static void
ibcm_mark_ar(ib_guid_t hca_guid, uint8_t port)
{
	ibcm_ar_t	*tmp;

	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
		if (tmp->ar_hcap == NULL)
			continue;
		if (tmp->ar_hcap->hca_guid == hca_guid &&
		    tmp->ar_port == port) {
			/* even if it's busy, we mark it for rewrite */
			tmp->ar_rewrite_state = IBCM_REWRITE_NEEDED;
		}
	}
}

/* rewrite all ATS service records */
static int
ibcm_rewrite_ar(void)
{
	ibcm_ar_t		*tmp;
	ibmf_saa_handle_t	saa_handle;
	sa_service_record_t	*srv_recp;
	ibt_status_t		rval;
	int			did_something = 0;

	ASSERT(MUTEX_HELD(&ibcm_svc_info_lock));
check_for_work:
	for (tmp = ibcm_ar_list; tmp != NULL; tmp = tmp->ar_link) {
		if (tmp->ar_rewrite_state == IBCM_REWRITE_NEEDED) {
			tmp->ar_rewrite_state = IBCM_REWRITE_BUSY;
			saa_handle = tmp->ar_saa_handle;
			srv_recp = tmp->ar_srv_recp;
			mutex_exit(&ibcm_svc_info_lock);
			IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_ar: "
			    "rewriting ar @ %p", tmp);
			did_something = 1;
			rval = ibcm_write_service_record(saa_handle, srv_recp,
			    IBMF_SAA_UPDATE);
			if (rval != IBT_SUCCESS)
				IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_ar: "
				    "ibcm_write_service_record failed: "
				    "status = %d", rval);
			mutex_enter(&ibcm_svc_info_lock);
			/* if it got marked again, then we want to rewrite */
			if (tmp->ar_rewrite_state == IBCM_REWRITE_BUSY)
				tmp->ar_rewrite_state = IBCM_REWRITE_IDLE;
			/* in case there was a waiter... */
			cv_broadcast(&ibcm_svc_info_cv);
			goto check_for_work;
		}
	}
	return (did_something);
}

static void
ibcm_rewrite_svc_record(ibcm_svc_info_t *srv_hdl, ibcm_svc_bind_t *sbindp)
{
	ibcm_hca_info_t		*hcap;
	ib_svc_id_t		sid, start_sid, end_sid;
	ibmf_saa_handle_t	saa_handle;
	sa_service_record_t	srv_rec;
	ibt_status_t		rval;

	hcap = ibcm_find_hca_entry(sbindp->sbind_hcaguid);
	if (hcap == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
		    "NO HCA found for HCA GUID %llX", sbindp->sbind_hcaguid);
		return;
	}

	saa_handle = ibcm_get_saa_handle(hcap, sbindp->sbind_port);
	if (saa_handle == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record: "
		    "saa_handle is NULL");
		ibcm_dec_hca_acc_cnt(hcap);
		return;
	}

	IBTF_DPRINTF_L3(cmlog, "ibcm_rewrite_svc_record: "
	    "rewriting svc '%s', port_guid = %llX", sbindp->sbind_name,
	    sbindp->sbind_gid.gid_guid);

	bzero(&srv_rec, sizeof (srv_rec));

	srv_rec.ServiceLease = sbindp->sbind_lease;
	srv_rec.ServiceP_Key = sbindp->sbind_pkey;
	srv_rec.ServiceKey_hi = sbindp->sbind_key[0];
	srv_rec.ServiceKey_lo = sbindp->sbind_key[1];
	(void) strcpy((char *)srv_rec.ServiceName, sbindp->sbind_name);
	srv_rec.ServiceGID = sbindp->sbind_gid;

	bcopy(sbindp->sbind_data, srv_rec.ServiceData, IB_SVC_DATA_LEN);

	/* insert srv record into the SA */
	start_sid = srv_hdl->svc_id;
	end_sid = start_sid + srv_hdl->svc_num_sids - 1;
	for (sid = start_sid; sid <= end_sid; sid++) {
		srv_rec.ServiceID = sid;

		rval = ibcm_write_service_record(saa_handle, &srv_rec,
		    IBMF_SAA_UPDATE);

		IBTF_DPRINTF_L4(cmlog, "ibcm_rewrite_svc_record: "
		    "ibcm_write_service_record, SvcId = %llX, "
		    "rval = %d", (longlong_t)sid, rval);
		if (rval != IBT_SUCCESS) {
			IBTF_DPRINTF_L2(cmlog, "ibcm_rewrite_svc_record:"
			    " ibcm_write_service_record fails %d sid %llX",
			    rval, (longlong_t)sid);
		}
	}
	ibcm_dec_hca_acc_cnt(hcap);
}

/*
 * Task to mark all service records as needing to be rewritten to the SM/SA.
 * This task does not return until all of them have been rewritten.
 */
void
ibcm_service_record_rewrite_task(void *arg)
{
	ibcm_port_up_t	*pup = (ibcm_port_up_t *)arg;
	ib_guid_t	hca_guid = pup->pup_hca_guid;
	uint8_t		port = pup->pup_port;
	ibcm_svc_info_t	*svcp;
	ibcm_svc_bind_t	*sbp;
	avl_tree_t	*avl_tree = &ibcm_svc_avl_tree;
	static int	task_is_running = 0;

	IBTF_DPRINTF_L3(cmlog, "ibcm_service_record_rewrite_task STARTED "
	    "for hca_guid %llX, port %d", hca_guid, port);

	mutex_enter(&ibcm_svc_info_lock);
	ibcm_mark_ar(hca_guid, port);
	for (svcp = avl_first(avl_tree); svcp != NULL;
	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
		sbp = svcp->svc_bind_list;
		while (sbp != NULL) {
			if (sbp->sbind_pkey != 0 &&
			    sbp->sbind_port == port &&
			    sbp->sbind_hcaguid == hca_guid) {
				/* even if it's busy, we mark it for rewrite */
				sbp->sbind_rewrite_state = IBCM_REWRITE_NEEDED;
			}
			sbp = sbp->sbind_link;
		}
	}
	if (task_is_running) {
		/* let the other task thread finish the work */
		mutex_exit(&ibcm_svc_info_lock);
		return;
	}
	task_is_running = 1;

	(void) ibcm_rewrite_ar();

check_for_work:
	for (svcp = avl_first(avl_tree); svcp != NULL;
	    svcp = avl_walk(avl_tree, svcp, AVL_AFTER)) {
		sbp = svcp->svc_bind_list;
		while (sbp != NULL) {
			if (sbp->sbind_rewrite_state == IBCM_REWRITE_NEEDED) {
				sbp->sbind_rewrite_state = IBCM_REWRITE_BUSY;
				mutex_exit(&ibcm_svc_info_lock);
				ibcm_rewrite_svc_record(svcp, sbp);
				mutex_enter(&ibcm_svc_info_lock);
				/* if it got marked again, we want to rewrite */
				if (sbp->sbind_rewrite_state ==
				    IBCM_REWRITE_BUSY)
					sbp->sbind_rewrite_state =
					    IBCM_REWRITE_IDLE;
				/* in case there was a waiter... */
				cv_broadcast(&ibcm_svc_info_cv);
				goto check_for_work;
			}
			sbp = sbp->sbind_link;
		}
	}
	/*
	 * If there were no service records to write, and we failed to
	 * have to rewrite any more ATS service records, then we're done.
	 */
	if (ibcm_rewrite_ar() != 0)
		goto check_for_work;
	task_is_running = 0;
	mutex_exit(&ibcm_svc_info_lock);

	IBTF_DPRINTF_L3(cmlog, "ibcm_service_record_rewrite_task DONE");
	kmem_free(pup, sizeof (ibcm_port_up_t));
}

ibt_status_t
ibt_ofuvcm_get_req_data(void *session_id, ibt_ofuvcm_req_data_t *req_data)
{
	ibcm_state_data_t 	*statep = (ibcm_state_data_t *)session_id;
	ibcm_req_msg_t 		*req_msgp;

	IBTF_DPRINTF_L3(cmlog, "ibt_get_ofuvcm_req_data: session_id %p",
	    session_id);
	mutex_enter(&statep->state_mutex);
	if ((statep->state != IBCM_STATE_REQ_RCVD) &&
	    (statep->state != IBCM_STATE_MRA_SENT)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_get_ofuvcm_req_data: Invalid "
		    "State %x", statep->state);
		mutex_exit(&statep->state_mutex);
		return (IBT_CHAN_STATE_INVALID);
	}
	if (statep->mode == IBCM_ACTIVE_MODE) {
		IBTF_DPRINTF_L2(cmlog, "ibt_get_ofuvcm_req_data: Active mode "
		    "not supported");
		mutex_exit(&statep->state_mutex);
		return (IBT_INVALID_PARAM);
	}
	ASSERT(statep->req_msgp);

	/*
	 * Fill in the additional req message values reqired for
	 * RTR transition.
	 * Should the PSN be same as the active side??
	 */
	req_msgp = (ibcm_req_msg_t *)statep->req_msgp;
	req_data->req_rnr_nak_time = ibcm_default_rnr_nak_time;
	req_data->req_path_mtu = req_msgp->req_mtu_plus >> 4;
	req_data->req_rq_psn = b2h32(req_msgp->req_starting_psn_plus) >> 8;
	mutex_exit(&statep->state_mutex);
	return (IBT_SUCCESS);
}

ibt_status_t
ibt_ofuvcm_proceed(ibt_cm_event_type_t event, void *session_id,
    ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
    void *priv_data, ibt_priv_data_len_t priv_data_len)
{
	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
	ibt_status_t		ret;

	IBTF_DPRINTF_L3(cmlog, "ibt_ofuvcm_proceed chan 0x%p event %x "
	    "status %x session_id %p", statep->channel, event, status,
	    session_id);

	IBTF_DPRINTF_L5(cmlog, "ibt_ofuvcm_proceed chan 0x%p "
	    "cm_event_data %p, priv_data %p priv_data_len %x",
	    statep->channel, cm_event_data, priv_data, priv_data_len);

	/* validate session_id and status */
	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_ofuvcm_proceed : Invalid Args");
		return (IBT_INVALID_PARAM);
	}

	if (event != IBT_CM_EVENT_REQ_RCV) {
		IBTF_DPRINTF_L2(cmlog, "ibt_ofuvcm_proceed : only for REQ_RCV");
		return (IBT_INVALID_PARAM);
	}
	mutex_enter(&statep->state_mutex);
	statep->is_this_ofuv_chan = B_TRUE;
	mutex_exit(&statep->state_mutex);

	ret = ibt_cm_proceed(event, session_id, status, cm_event_data,
	    priv_data, priv_data_len);
	return (ret);
}

/*
 * Function:
 * 	ibt_cm_proceed
 *
 * Verifies the arguments and dispatches the cm state machine processing
 * via taskq
 */

ibt_status_t
ibt_cm_proceed(ibt_cm_event_type_t event, void *session_id,
    ibt_cm_status_t status, ibt_cm_proceed_reply_t *cm_event_data,
    void *priv_data, ibt_priv_data_len_t priv_data_len)
{
	ibcm_state_data_t *statep = (ibcm_state_data_t *)session_id;
	ibcm_proceed_targs_t	*proceed_targs;
	ibcm_proceed_error_t	proceed_error;

	IBTF_DPRINTF_L3(cmlog, "ibt_cm_proceed chan 0x%p event %x status %x "
	    "session_id %p", statep->channel, event, status, session_id);

	IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed chan 0x%p cm_event_data %p, "
	    "priv_data %p priv_data_len %x", statep->channel, cm_event_data,
	    priv_data, priv_data_len);

	/* validate session_id and status */
	if ((statep == NULL) || (status == IBT_CM_DEFER)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : Invalid Args");
		return (IBT_INVALID_PARAM);
	}

	/* If priv data len specified, then priv_data cannot be NULL */
	if ((priv_data_len > 0) && (priv_data == NULL))
		return (IBT_INVALID_PARAM);

	proceed_error = IBCM_PROCEED_INVALID_NONE;

	mutex_enter(&statep->state_mutex);
	if (event == IBT_CM_EVENT_REQ_RCV) {

		if ((statep->state != IBCM_STATE_REQ_RCVD) &&
		    (statep->state != IBCM_STATE_MRA_SENT))
			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
		else if (priv_data_len > IBT_REP_PRIV_DATA_SZ)
			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;

	} else if (event == IBT_CM_EVENT_REP_RCV) {
		if ((statep->state != IBCM_STATE_REP_RCVD) &&
		    (statep->state != IBCM_STATE_MRA_REP_SENT))
			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
		else if (priv_data_len > IBT_RTU_PRIV_DATA_SZ)
			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
	} else if (event == IBT_CM_EVENT_LAP_RCV) {
		if ((statep->ap_state != IBCM_AP_STATE_LAP_RCVD) &&
		    (statep->ap_state != IBCM_AP_STATE_MRA_LAP_SENT))
			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
		else if (priv_data_len > IBT_APR_PRIV_DATA_SZ)
			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
	} else if (event == IBT_CM_EVENT_CONN_CLOSED) {
		if (statep->state != IBCM_STATE_DREQ_RCVD)
			proceed_error = IBCM_PROCEED_INVALID_EVENT_STATE;
		else if (priv_data_len > IBT_DREP_PRIV_DATA_SZ)
			proceed_error = IBCM_PROCEED_INVALID_PRIV_SZ;
	} else {
			proceed_error = IBCM_PROCEED_INVALID_EVENT;
	}

	/* if there is an error, print an error message and return */
	if (proceed_error != IBCM_PROCEED_INVALID_NONE) {
		mutex_exit(&statep->state_mutex);
		if (proceed_error == IBCM_PROCEED_INVALID_EVENT_STATE) {
			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
			    "Invalid Event/State combination specified",
			    statep->channel);
			return (IBT_INVALID_PARAM);
		} else if (proceed_error == IBCM_PROCEED_INVALID_PRIV_SZ) {
			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
			    "Invalid Event/priv len combination specified",
			    statep->channel);
			return (IBT_INVALID_PARAM);
		} else if (proceed_error == IBCM_PROCEED_INVALID_EVENT) {
			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
			    "Invalid Event specified", statep->channel);
			return (IBT_INVALID_PARAM);
		} else {
			ASSERT(proceed_error == IBCM_PROCEED_INVALID_LAP);
			IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p"
			    "IBT_CM_EVENT_LAP_RCV not supported",
			    statep->channel);
			/* UNTIL HCA DRIVER ENABLES AP SUPPORT, FAIL THE CALL */
			return (IBT_APM_NOT_SUPPORTED);
		}
	}


	/* wait until client's CM handler returns DEFER status back to CM */

	while (statep->clnt_proceed == IBCM_BLOCK) {
		IBTF_DPRINTF_L5(cmlog, "ibt_cm_proceed : chan 0x%p blocked for "
		    "return of client's cm handler", statep->channel);
		cv_wait(&statep->block_client_cv, &statep->state_mutex);
	}

	if (statep->clnt_proceed == IBCM_FAIL) {
		mutex_exit(&statep->state_mutex);
		IBTF_DPRINTF_L2(cmlog, "ibt_cm_proceed : chan 0x%p Failed as "
		    "client returned non-DEFER status from cm handler",
		    statep->channel);
		return (IBT_CHAN_STATE_INVALID);
	}

	ASSERT(statep->clnt_proceed == IBCM_UNBLOCK);
	statep->clnt_proceed = IBCM_FAIL;
	mutex_exit(&statep->state_mutex);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))

	/* the state machine processing is done in a separate thread */

	/* proceed_targs is freed in ibcm_proceed_via_taskq */
	proceed_targs = kmem_alloc(sizeof (ibcm_proceed_targs_t),
	    KM_SLEEP);

	proceed_targs->event  = event;
	proceed_targs->status = status;
	proceed_targs->priv_data_len = priv_data_len;

	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);

	proceed_targs->tst.rc.statep = statep;
	bcopy(cm_event_data, &proceed_targs->tst.rc.rc_cm_event_data,
	    sizeof (ibt_cm_proceed_reply_t));

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))

	(void) taskq_dispatch(ibcm_taskq, ibcm_proceed_via_taskq,
	    proceed_targs, TQ_SLEEP);

	return (IBT_SUCCESS);
}

/*
 * Function:
 * 	ibcm_proceed_via_taskq
 *
 * Called from taskq, dispatched by ibt_cm_proceed
 * Completes the cm state processing for ibt_cm_proceed
 */
void
ibcm_proceed_via_taskq(void *targs)
{
	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
	ibcm_state_data_t *statep = proceed_targs->tst.rc.statep;
	ibt_cm_reason_t reject_reason;
	uint8_t arej_len;
	ibcm_status_t response;
	ibcm_clnt_reply_info_t clnt_info;

	clnt_info.reply_event = &proceed_targs->tst.rc.rc_cm_event_data;
	clnt_info.priv_data = proceed_targs->priv_data;
	clnt_info.priv_data_len = proceed_targs->priv_data_len;

	IBTF_DPRINTF_L4(cmlog, "ibcm_proceed_via_taskq chan 0x%p targs %x",
	    statep->channel, targs);

	if (proceed_targs->event == IBT_CM_EVENT_REQ_RCV) {
		response =
		    ibcm_process_cep_req_cm_hdlr(statep, proceed_targs->status,
		    &clnt_info, &reject_reason, &arej_len,
		    (ibcm_req_msg_t *)statep->defer_cm_msg);

		ibcm_handle_cep_req_response(statep, response, reject_reason,
		    arej_len);

	} else if (proceed_targs->event == IBT_CM_EVENT_REP_RCV) {
		response =
		    ibcm_process_cep_rep_cm_hdlr(statep, proceed_targs->status,
		    &clnt_info, &reject_reason, &arej_len,
		    (ibcm_rep_msg_t *)statep->defer_cm_msg);

		ibcm_handle_cep_rep_response(statep, response, reject_reason,
		    arej_len, (ibcm_rep_msg_t *)statep->defer_cm_msg);

	} else if (proceed_targs->event == IBT_CM_EVENT_LAP_RCV) {
		ibcm_process_cep_lap_cm_hdlr(statep, proceed_targs->status,
		    &clnt_info, (ibcm_lap_msg_t *)statep->defer_cm_msg,
		    (ibcm_apr_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg));

		ibcm_post_apr_mad(statep);

	} else {
		ASSERT(proceed_targs->event == IBT_CM_EVENT_CONN_CLOSED);
		ibcm_handle_cep_dreq_response(statep, proceed_targs->priv_data,
		    proceed_targs->priv_data_len);
	}

	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
}

/*
 * Function:
 * 	ibt_cm_ud_proceed
 *
 * Verifies the arguments and dispatches the cm state machine processing
 * via taskq
 */
ibt_status_t
ibt_cm_ud_proceed(void *session_id, ibt_channel_hdl_t ud_channel,
    ibt_cm_status_t status, ibt_redirect_info_t *redirect_infop,
    void *priv_data, ibt_priv_data_len_t priv_data_len)
{
	ibcm_ud_state_data_t *ud_statep = (ibcm_ud_state_data_t *)session_id;
	ibcm_proceed_targs_t	*proceed_targs;
	ibt_qp_query_attr_t	qp_attr;
	ibt_status_t		retval;

	IBTF_DPRINTF_L3(cmlog, "ibt_cm_ud_proceed session_id %p "
	    "ud_channel %p ", session_id, ud_channel);

	IBTF_DPRINTF_L4(cmlog, "ibt_cm_ud_proceed status %x priv_data %p "
	    "priv_data_len %x",  status, priv_data, priv_data_len);

	/* validate session_id and status */
	if ((ud_statep == NULL) || (status == IBT_CM_DEFER)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid Args");
		return (IBT_INVALID_PARAM);
	}

	/* If priv data len specified, then priv_data cannot be NULL */
	if ((priv_data_len > 0) && (priv_data == NULL))
		return (IBT_INVALID_PARAM);

	if (priv_data_len > IBT_SIDR_REP_PRIV_DATA_SZ)
		return (IBT_INVALID_PARAM);

	/* retrieve qpn and qkey from ud channel */

	/* validate event and statep's state */

	if (status == IBT_CM_ACCEPT) {
		retval = ibt_query_qp(ud_channel, &qp_attr);
		if ((retval != IBT_SUCCESS) ||
		    (qp_attr.qp_info.qp_trans != IBT_UD_SRV)) {
			IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed: "
			    "Failed to retrieve QPN from the channel: %d",
			    retval);
			return (IBT_INVALID_PARAM);
		}
	}


	mutex_enter(&ud_statep->ud_state_mutex);

	if (ud_statep->ud_state != IBCM_STATE_SIDR_REQ_RCVD) {
		mutex_exit(&ud_statep->ud_state_mutex);
		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Invalid State "
		    "specified");
		return (IBT_INVALID_PARAM);
	}

	/* wait until client's CM handler returns DEFER status back to CM */

	while (ud_statep->ud_clnt_proceed == IBCM_BLOCK) {
		IBTF_DPRINTF_L5(cmlog, "ibt_cm_ud_proceed : Blocked for return"
		    " of client's ud cm handler");
		cv_wait(&ud_statep->ud_block_client_cv,
		    &ud_statep->ud_state_mutex);
	}

	if (ud_statep->ud_clnt_proceed == IBCM_FAIL) {
		mutex_exit(&ud_statep->ud_state_mutex);
		IBTF_DPRINTF_L2(cmlog, "ibt_cm_ud_proceed : Failed as client "
		    "returned non-DEFER status from cm handler");
		return (IBT_INVALID_PARAM);
	}

	ASSERT(ud_statep->ud_clnt_proceed == IBCM_UNBLOCK);
	ud_statep->ud_clnt_proceed = IBCM_FAIL;
	mutex_exit(&ud_statep->ud_state_mutex);

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*proceed_targs))

	/* the state machine processing is done in a separate thread */

	/* proceed_targs is freed in ibcm_proceed_via_taskq */
	proceed_targs = kmem_zalloc(sizeof (ibcm_proceed_targs_t),
	    KM_SLEEP);

	proceed_targs->status = status;
	proceed_targs->priv_data_len = priv_data_len;

	bcopy(priv_data, proceed_targs->priv_data, priv_data_len);

	if (status == IBT_CM_ACCEPT) {
		proceed_targs->tst.ud.ud_qkey =
		    qp_attr.qp_info.qp_transport.ud.ud_qkey;
		proceed_targs->tst.ud.ud_qpn = qp_attr.qp_qpn;
	}

	proceed_targs->tst.ud.ud_statep = ud_statep;

	/* copy redirect info based on status */
	if (status == IBT_CM_REDIRECT)
		bcopy(redirect_infop, &proceed_targs->tst.ud.ud_redirect_info,
		    sizeof (ibt_redirect_info_t));

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*proceed_targs))

	(void) taskq_dispatch(ibcm_taskq, ibcm_ud_proceed_via_taskq,
	    proceed_targs, TQ_SLEEP);

	return (IBT_SUCCESS);
}

/*
 * Function:
 * 	ibcm_ud_proceed_via_taskq
 *
 * Called from taskq, dispatched by ibt_cm_ud_proceed
 * Completes the cm state processing for ibt_cm_ud_proceed
 */
void
ibcm_ud_proceed_via_taskq(void *targs)
{
	ibcm_proceed_targs_t	*proceed_targs = (ibcm_proceed_targs_t *)targs;
	ibcm_ud_state_data_t	*ud_statep = proceed_targs->tst.ud.ud_statep;
	ibcm_ud_clnt_reply_info_t ud_clnt_info;
	ibt_sidr_status_t	sidr_status;

	IBTF_DPRINTF_L4(cmlog, "ibcm_ud_proceed_via_taskq(%p)", targs);

	ud_clnt_info.ud_qpn  = proceed_targs->tst.ud.ud_qpn;
	ud_clnt_info.ud_qkey  = proceed_targs->tst.ud.ud_qkey;
	ud_clnt_info.priv_data = proceed_targs->priv_data;
	ud_clnt_info.priv_data_len = proceed_targs->priv_data_len;
	ud_clnt_info.redirect_infop = &proceed_targs->tst.ud.ud_redirect_info;

	/* validate event and statep's state */
	ibcm_process_sidr_req_cm_hdlr(ud_statep, proceed_targs->status,
	    &ud_clnt_info, &sidr_status,
	    (ibcm_sidr_rep_msg_t *)IBCM_OUT_MSGP(ud_statep->ud_stored_msg));

	ibcm_post_sidr_rep_mad(ud_statep, sidr_status);

	/* decr the statep ref cnt incremented in ibcm_process_sidr_req_msg */
	mutex_enter(&ud_statep->ud_state_mutex);
	IBCM_UD_REF_CNT_DECR(ud_statep);
	mutex_exit(&ud_statep->ud_state_mutex);

	kmem_free(targs, sizeof (ibcm_proceed_targs_t));
}

/*
 * Function:
 *	ibt_set_alt_path
 * Input:
 *	channel		Channel handle returned from ibt_alloc_rc_channel(9F).
 *
 *	mode		Execute in blocking or non blocking mode.
 *
 *	alt_path	A pointer to an ibt_alt_path_info_t as returned from an
 *			ibt_get_alt_path(9F) call that specifies the new
 *			alternate path.
 *
 *	priv_data       A pointer to a buffer specified by caller for the
 *			private data in the outgoing CM Load Alternate Path
 *			(LAP) message sent to the remote host. This can be NULL
 *			if no private data is available to communicate to the
 *			remote node.
 *
 *	priv_data_len   Length of valid data in priv_data, this should be less
 *			than or equal to IBT_LAP_PRIV_DATA_SZ.
 *
 * Output:
 *	ret_args	If called in blocking mode, points to a return argument
 *			structure of type ibt_ap_returns_t.
 *
 * Returns:
 *	IBT_SUCCESS on Success else appropriate error.
 * Description:
 *	Load the specified alternate path. Causes the CM to send an LAP message
 *	to the remote node.
 *	Can only be called on a previously opened RC channel.
 */
ibt_status_t
ibt_set_alt_path(ibt_channel_hdl_t channel, ibt_execution_mode_t mode,
    ibt_alt_path_info_t *alt_path, void *priv_data,
    ibt_priv_data_len_t priv_data_len, ibt_ap_returns_t *ret_args)
{
	ibmf_handle_t		ibmf_hdl;
	ibt_status_t		status = IBT_SUCCESS;
	ibcm_lap_msg_t		*lap_msgp;
	ibcm_hca_info_t		*hcap;
	ibcm_state_data_t	*statep;
	uint8_t			port_no;
	ib_lid_t		alternate_slid;
	ibt_priv_data_len_t	len;
	ib_lid_t		base_lid;
	boolean_t		alt_grh;

	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path(%p, %x, %p, %p, %x, %p)",
	    channel, mode, alt_path, priv_data, priv_data_len, ret_args);

	/* validate channel */
	if (IBCM_INVALID_CHANNEL(channel)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: invalid channel");
		return (IBT_CHAN_HDL_INVALID);
	}

	if (ibtl_cm_get_chan_type(channel) != IBT_RC_SRV) {
		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
		    "Invalid Channel type: Applicable only to RC Channel");
		return (IBT_CHAN_SRV_TYPE_INVALID);
	}

	if (mode == IBT_NONBLOCKING) {
		if (ret_args != NULL) {
			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
			    "ret_args should be NULL when called in "
			    "non-blocking mode");
			return (IBT_INVALID_PARAM);
		}
	} else if (mode == IBT_BLOCKING) {
		if (ret_args == NULL) {
			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
			    "ret_args should be Non-NULL when called in "
			    "blocking mode");
			return (IBT_INVALID_PARAM);
		}
		if (ret_args->ap_priv_data_len > IBT_APR_PRIV_DATA_SZ) {
			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
			    "expected private data length is too large");
			return (IBT_INVALID_PARAM);
		}
		if ((ret_args->ap_priv_data_len > 0) &&
		    (ret_args->ap_priv_data == NULL)) {
			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
			    "apr_priv_data_len > 0, but apr_priv_data NULL");
			return (IBT_INVALID_PARAM);
		}
	} else { /* any other mode is not valid for ibt_set_alt_path */
		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
		    "invalid mode %x specified", mode);
		return (IBT_INVALID_PARAM);
	}

	if ((port_no = alt_path->ap_alt_cep_path.cep_hca_port_num) == 0)
		return (IBT_INVALID_PARAM);

	/* get the statep */
	IBCM_GET_CHAN_PRIVATE(channel, statep);
	if (statep == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: statep NULL");
		return (IBT_CM_FAILURE);
	}

	mutex_enter(&statep->state_mutex);
	IBCM_RELEASE_CHAN_PRIVATE(channel);
	IBCM_REF_CNT_INCR(statep);
	mutex_exit(&statep->state_mutex);

	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: statep %p", statep);

	hcap = statep->hcap;

	/* HCA must have been in active state. If not, it's a client bug */
	if (!IBCM_ACCESS_HCA_OK(hcap))
		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: hca in error state");

	ASSERT(statep->cm_handler != NULL);

	/* Check Alternate port */
	status = ibt_get_port_state_byguid(hcap->hca_guid, port_no, NULL,
	    &base_lid);
	if (status != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
		    "ibt_get_port_state_byguid status %d ", status);
		mutex_enter(&statep->state_mutex);
		IBCM_REF_CNT_DECR(statep);
		mutex_exit(&statep->state_mutex);
		return (status);
	}

	if ((hcap->hca_port_info[port_no - 1].port_ibmf_hdl == NULL) &&
	    ((status = ibcm_hca_reinit_port(hcap, port_no - 1))
	    != IBT_SUCCESS)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
		    "ibmf reg or callback setup failed during re-initialize");
		mutex_enter(&statep->state_mutex);
		IBCM_REF_CNT_DECR(statep);
		mutex_exit(&statep->state_mutex);
		return (status);
	}

	ibmf_hdl = statep->stored_reply_addr.ibmf_hdl;

	alternate_slid = base_lid +
	    alt_path->ap_alt_cep_path.cep_adds_vect.av_src_path;

	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: alternate SLID = %x",
	    h2b16(alternate_slid));

	ibcm_lapr_enter();	/* limit how many run simultaneously */

	/* Allocate MAD for LAP */
	if (statep->lapr_msg == NULL)
		if ((status = ibcm_alloc_out_msg(ibmf_hdl, &statep->lapr_msg,
		    MAD_METHOD_SEND)) != IBT_SUCCESS) {
			ibcm_lapr_exit();
			IBTF_DPRINTF_L2(cmlog, "ibt_set_alt_path: "
			    "chan 0x%p ibcm_alloc_out_msg failed", channel);
			mutex_enter(&statep->state_mutex);
			IBCM_REF_CNT_DECR(statep);
			mutex_exit(&statep->state_mutex);
			return (status);
		}

	mutex_enter(&statep->state_mutex);

	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: connection state is"
	    " %x", statep->state);

	/* Check state */
	if ((statep->state != IBCM_STATE_ESTABLISHED) ||
	    (statep->ap_state != IBCM_AP_STATE_IDLE)) {
		IBCM_REF_CNT_DECR(statep);
		mutex_exit(&statep->state_mutex);
		(void) ibcm_free_out_msg(ibmf_hdl, &statep->lapr_msg);
		ibcm_lapr_exit();
		return (IBT_CHAN_STATE_INVALID);
	} else {
		/* Set to LAP Sent state */
		statep->ap_state = IBCM_AP_STATE_LAP_SENT;
		statep->ap_done = B_FALSE;
		statep->remaining_retry_cnt = statep->max_cm_retries;
		statep->timer_stored_state = statep->state;
		statep->timer_stored_ap_state = statep->ap_state;
		IBCM_REF_CNT_INCR(statep); /* for ibcm_post_lap_complete */
	}

	mutex_exit(&statep->state_mutex);

	/* No more failure returns below */

	/* Allocate MAD for LAP */
	IBTF_DPRINTF_L5(cmlog, "ibt_set_alt_path:"
	    " statep's mad addr = 0x%p", IBCM_OUT_HDRP(statep->lapr_msg));

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*lap_msgp))

	lap_msgp = (ibcm_lap_msg_t *)IBCM_OUT_MSGP(statep->lapr_msg);

	lap_msgp->lap_alt_l_port_lid = h2b16(alternate_slid);
	lap_msgp->lap_alt_r_port_lid =
	    h2b16(alt_path->ap_alt_cep_path.cep_adds_vect.av_dlid);

	/* Fill in remote port gid */
	lap_msgp->lap_alt_r_port_gid.gid_prefix =
	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_prefix);
	lap_msgp->lap_alt_r_port_gid.gid_guid =
	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_dgid.gid_guid);

	/* Fill in local port gid */
	lap_msgp->lap_alt_l_port_gid.gid_prefix =
	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_prefix);
	lap_msgp->lap_alt_l_port_gid.gid_guid =
	    h2b64(alt_path->ap_alt_cep_path.cep_adds_vect.av_sgid.gid_guid);

	alt_grh = alt_path->ap_alt_cep_path.cep_adds_vect.av_send_grh;

	/* alternate_flow_label, and alternate srate, alternate traffic class */
	lap_msgp->lap_alt_srate_plus =
	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srate & 0x3f;
	lap_msgp->lap_alt_flow_label_plus = h2b32(((alt_grh == B_TRUE) ?
	    (alt_path->ap_alt_cep_path.cep_adds_vect.av_flow << 12) : 0) |
	    alt_path->ap_alt_cep_path.cep_adds_vect.av_tclass);

	/* Alternate hop limit, service level */
	lap_msgp->lap_alt_hop_limit = (alt_grh == B_TRUE) ?
	    alt_path->ap_alt_cep_path.cep_adds_vect.av_hop : 1;
	lap_msgp->lap_alt_sl_plus =
	    alt_path->ap_alt_cep_path.cep_adds_vect.av_srvl << 4 |
	    ((alt_grh == B_FALSE) ? 0x8 : 0);

	lap_msgp->lap_alt_local_acktime_plus = ibt_usec2ib(
	    (2 * statep->rc_alt_pkt_lt) +
	    ibt_ib2usec(hcap->hca_ack_delay)) << 3;

	lap_msgp->lap_local_comm_id = h2b32(statep->local_comid);
	lap_msgp->lap_remote_comm_id = h2b32(statep->remote_comid);

	lap_msgp->lap_remote_qpn_eecn_plus =
	    h2b32((statep->remote_qpn << 8) |
	    ibt_usec2ib(ibcm_remote_response_time) << 3);

	len = min(priv_data_len, IBT_LAP_PRIV_DATA_SZ);
	if ((len > 0) && priv_data) {
		bcopy(priv_data, lap_msgp->lap_private_data, len);
	}

	/* only rc_alt_pkt_lt and ap_return_data fields are initialized */
	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*statep))

	statep->rc_alt_pkt_lt = ibt_ib2usec(alt_path->ap_alt_pkt_lt);

	/* return_data is filled up in the state machine code */
	statep->ap_return_data = ret_args;

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*statep))

	IBCM_OUT_HDRP(statep->lapr_msg)->AttributeID =
	    h2b16(IBCM_INCOMING_LAP + IBCM_ATTR_BASE_ID);

	IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID =
	    h2b64(ibcm_generate_tranid(IBCM_INCOMING_LAP, statep->local_comid,
	    0));
	IBTF_DPRINTF_L3(cmlog, "ibt_set_alt_path: statep %p, tid %llx",
	    statep, IBCM_OUT_HDRP(statep->lapr_msg)->TransactionID);

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*lap_msgp))

	/* Send LAP */
	ibcm_post_rc_mad(statep, statep->lapr_msg, ibcm_post_lap_complete,
	    statep);

	mutex_enter(&statep->state_mutex);

	if (mode == IBT_BLOCKING) {
		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: blocking");

		/* wait for APR */
		while (statep->ap_done != B_TRUE) {
			cv_wait(&statep->block_client_cv,
			    &statep->state_mutex);
		}

		IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done blocking");

		/*
		 * In the case that ibt_set_alt_path fails,
		 * change retval to IBT_CM_FAILURE
		 */
		if (statep->ap_return_data->ap_status != IBT_CM_AP_LOADED)
			status = IBT_CM_FAILURE;

	}

	/* decrement the ref-count before leaving here */
	IBCM_REF_CNT_DECR(statep);

	mutex_exit(&statep->state_mutex);

	ibcm_lapr_exit();

	/* If this message isn't seen then ibt_set_alt_path failed */
	IBTF_DPRINTF_L4(cmlog, "ibt_set_alt_path: done");

	return (status);
}


#ifdef DEBUG

/*
 * ibcm_query_classport_info:
 *	Query classportinfo
 *
 * INPUTS:
 *	channel		- Channel that is associated with a statep
 *
 * RETURN VALUE: NONE
 * This function is currently used to generate a valid get method classport
 * info, and test CM functionality. There is no ibtl client interface to
 * generate a classportinfo. It is possible that CM may use classportinfo
 * from other nodes in the future, and most of the code below could be re-used.
 */
void
ibcm_query_classport_info(ibt_channel_hdl_t channel)
{
	ibcm_state_data_t	*statep;
	ibmf_msg_t		*msgp;

	IBTF_DPRINTF_L3(cmlog, "ibcm_query_classport_info(%p)", channel);

	/* validate channel, first */
	if (IBCM_INVALID_CHANNEL(channel)) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
		    "invalid channel (%p)", channel);
		return;
	}

	/* get the statep */
	IBCM_GET_CHAN_PRIVATE(channel, statep);

	/*
	 * This can happen, if the statep is already gone by a DREQ from
	 * the remote side
	 */
	if (statep == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_query_classport_info: "
		    "statep NULL");
		return;
	}

	mutex_enter(&statep->state_mutex);
	IBCM_RELEASE_CHAN_PRIVATE(channel);
	IBCM_REF_CNT_INCR(statep);
	mutex_exit(&statep->state_mutex);

	/* Debug/test code, so don't care about return status */
	(void) ibcm_alloc_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp,
	    MAD_METHOD_GET);

	IBCM_OUT_HDRP(msgp)->TransactionID = h2b64(ibcm_generate_tranid(
	    MAD_ATTR_ID_CLASSPORTINFO, statep->local_comid, 0));
	IBCM_OUT_HDRP(msgp)->AttributeID = h2b16(MAD_ATTR_ID_CLASSPORTINFO);

	(void) ibcm_post_mad(msgp, &statep->stored_reply_addr, NULL, NULL);

	IBTF_DPRINTF_L3(cmlog, "ibcm_query_classport_info(%p) "
	    "Get method MAD posted ", channel);

	(void) ibcm_free_out_msg(statep->stored_reply_addr.ibmf_hdl, &msgp);

	mutex_enter(&statep->state_mutex);
	IBCM_REF_CNT_DECR(statep);
	mutex_exit(&statep->state_mutex);
}

static void
ibcm_print_reply_addr(ibt_channel_hdl_t channel, ibcm_mad_addr_t *cm_reply_addr)
{
	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: chan 0x%p, SLID %x, "
	    "DLID %x", channel, cm_reply_addr->rcvd_addr.ia_local_lid,
	    cm_reply_addr->rcvd_addr.ia_remote_lid);

	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: QKEY %x, PKEY %x, "
	    "RQPN %x SL %x", cm_reply_addr->rcvd_addr.ia_q_key,
	    cm_reply_addr->rcvd_addr.ia_p_key,
	    cm_reply_addr->rcvd_addr.ia_remote_qno,
	    cm_reply_addr->rcvd_addr.ia_service_level);

	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM SGID %llX:%llX ",
	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_prefix,
	    cm_reply_addr->grh_hdr.ig_sender_gid.gid_guid);

	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM DGID %llX:%llX",
	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_prefix,
	    cm_reply_addr->grh_hdr.ig_recver_gid.gid_guid);

	IBTF_DPRINTF_L4(cmlog, "ibcm_print_reply_addr: CM FL %x TC %x HL %x",
	    cm_reply_addr->grh_hdr.ig_flow_label,
	    cm_reply_addr->grh_hdr.ig_tclass,
	    cm_reply_addr->grh_hdr.ig_hop_limit);
}

#endif

/* For MCG List search */
typedef struct ibcm_mcg_list_s {
	struct ibcm_mcg_list_s	*ml_next;
	ib_gid_t		ml_sgid;
	ib_gid_t		ml_mgid;
	ib_pkey_t		ml_pkey;
	ib_qkey_t		ml_qkey;
	uint_t			ml_refcnt;
	uint8_t			ml_jstate;
} ibcm_mcg_list_t;

ibcm_mcg_list_t	*ibcm_mcglist = NULL;

_NOTE(MUTEX_PROTECTS_DATA(ibcm_mcglist_lock, ibcm_mcg_list_s))
_NOTE(MUTEX_PROTECTS_DATA(ibcm_mcglist_lock, ibcm_mcglist))

typedef struct ibcm_join_mcg_tqarg_s {
	ib_gid_t		rgid;
	ibt_mcg_attr_t		mcg_attr;
	ibt_mcg_info_t		*mcg_infop;
	ibt_mcg_handler_t	func;
	void			*arg;
} ibcm_join_mcg_tqarg_t;

_NOTE(READ_ONLY_DATA(ibcm_join_mcg_tqarg_s))

void
ibcm_add_incr_mcg_entry(sa_mcmember_record_t *mcg_req,
    sa_mcmember_record_t *mcg_resp)
{
	ibcm_mcg_list_t	*new = NULL;
	ibcm_mcg_list_t	*head = NULL;

	IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: MGID %llX:%llX"
	    "\n SGID %llX:%llX, JState %X)", mcg_req->MGID.gid_prefix,
	    mcg_req->MGID.gid_guid, mcg_req->PortGID.gid_prefix,
	    mcg_req->PortGID.gid_guid, mcg_req->JoinState);

	mutex_enter(&ibcm_mcglist_lock);
	head = ibcm_mcglist;

	while (head != NULL) {
		if ((head->ml_mgid.gid_guid == mcg_resp->MGID.gid_guid) &&
		    (head->ml_mgid.gid_prefix == mcg_resp->MGID.gid_prefix) &&
		    (head->ml_sgid.gid_guid == mcg_resp->PortGID.gid_guid)) {
			/* Increment the count */
			head->ml_refcnt++;
			/* OR the join_state value, we need this during leave */
			head->ml_jstate |= mcg_req->JoinState;

			IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: Entry "
			    "FOUND: refcnt %d JState %X", head->ml_refcnt,
			    head->ml_jstate);

			mutex_exit(&ibcm_mcglist_lock);
			return;
		}
		head = head->ml_next;
	}
	mutex_exit(&ibcm_mcglist_lock);

	IBTF_DPRINTF_L3(cmlog, "ibcm_add_incr_mcg_entry: Create NEW Entry ");

	/* If we are here, either list is empty or match couldn't be found */
	new = kmem_zalloc(sizeof (ibcm_mcg_list_t), KM_SLEEP);

	mutex_enter(&ibcm_mcglist_lock);
	/* Initialize the fields */
	new->ml_sgid = mcg_resp->PortGID;
	new->ml_mgid = mcg_resp->MGID;
	new->ml_qkey = mcg_req->Q_Key;
	new->ml_pkey = mcg_req->P_Key;
	new->ml_refcnt = 1; /* As this is the first entry */
	new->ml_jstate = mcg_req->JoinState;
	new->ml_next = NULL;

	new->ml_next = ibcm_mcglist;
	ibcm_mcglist = new;
	mutex_exit(&ibcm_mcglist_lock);
}

/*
 * ibcm_del_decr_mcg_entry
 *
 * Return value:
 * IBCM_SUCCESS		Entry found and ref_cnt is now zero. So go-ahead and
 * 			leave the MCG group. The return arg *jstate will have
 * 			a valid join_state value that needed to be used by
 * 			xxx_leave_mcg().
 * IBCM_LOOKUP_EXISTS	Entry found and ref_cnt is decremented but is NOT zero.
 * 			So do not leave the MCG group yet.
 * IBCM_LOOKUP_FAIL	Entry is NOT found.
 */
ibcm_status_t
ibcm_del_decr_mcg_entry(sa_mcmember_record_t *mcg_req, uint8_t *jstate)
{
	ibcm_mcg_list_t	*head, *prev;

	IBTF_DPRINTF_L3(cmlog, "ibcm_del_decr_mcg_entry: MGID %llX:%llX"
	    "\n SGID %llX:%llX, JState %X)", mcg_req->MGID.gid_prefix,
	    mcg_req->MGID.gid_guid, mcg_req->PortGID.gid_prefix,
	    mcg_req->PortGID.gid_guid, mcg_req->JoinState);

	*jstate = 0;

	mutex_enter(&ibcm_mcglist_lock);
	head = ibcm_mcglist;
	prev = NULL;

	while (head != NULL) {
		if ((head->ml_mgid.gid_guid == mcg_req->MGID.gid_guid) &&
		    (head->ml_mgid.gid_prefix == mcg_req->MGID.gid_prefix) &&
		    (head->ml_sgid.gid_guid == mcg_req->PortGID.gid_guid)) {
			if (!(head->ml_jstate & mcg_req->JoinState)) {
				IBTF_DPRINTF_L2(cmlog, "ibcm_del_decr_mcg_entry"
				    ": JoinState mismatch %X %X)",
				    head->ml_jstate, mcg_req->JoinState);
			}
			/* Decrement the count */
			head->ml_refcnt--;

			if (head->ml_refcnt == 0) {
				*jstate = head->ml_jstate;

				IBTF_DPRINTF_L3(cmlog, "ibcm_del_decr_mcg_entry"
				    ": refcnt is ZERO, so delete the entry ");
				if ((head == ibcm_mcglist) || (prev == NULL)) {
					ibcm_mcglist = head->ml_next;
				} else if (prev != NULL) {
					prev->ml_next = head->ml_next;
				}
				mutex_exit(&ibcm_mcglist_lock);

				kmem_free(head, sizeof (ibcm_mcg_list_t));
				return (IBCM_SUCCESS);
			}
			mutex_exit(&ibcm_mcglist_lock);
			return (IBCM_LOOKUP_EXISTS);
		}
		prev = head;
		head = head->ml_next;
	}
	mutex_exit(&ibcm_mcglist_lock);

	/*
	 * If we are here, something went wrong, we don't have the entry
	 * for that MCG being joined.
	 */
	IBTF_DPRINTF_L2(cmlog, "ibcm_del_decr_mcg_entry: Match NOT "
	    "Found ");

	return (IBCM_LOOKUP_FAIL);
}


/*
 * Function:
 *	ibt_join_mcg
 * Input:
 *	rgid		The request GID that defines the HCA port from which a
 *			contact to SA Access is performed to add the specified
 *			endport GID ((mcg_attr->mc_pgid) to a multicast group.
 *			If mcg_attr->mc_pgid is null, then this (rgid) will be
 *			treated as endport GID that is to be added to the
 *			multicast group.
 *
 *	mcg_attr	A pointer to an ibt_mcg_attr_t structure that defines
 *			the attributes of the desired multicast group to be
 *			created or joined.
 *
 *	func		NULL or a pointer to a function to call when
 *			ibt_join_mcg() completes. If 'func' is not NULL then
 *			ibt_join_mcg() will return as soon as possible after
 *			initiating the multicast group join/create process.
 *			'func' is then called when the process completes.
 *
 *	arg		Argument to the 'func'.
 *
 * Output:
 *	mcg_info_p	A pointer to the ibt_mcg_info_t structure, allocated
 *			by the caller, where the attributes of the created or
 *			joined multicast group are copied.
 * Returns:
 *	IBT_SUCCESS
 *	IBT_INVALID_PARAM
 *	IBT_MCG_RECORDS_NOT_FOUND
 *	IBT_INSUFF_RESOURCE
 * Description:
 *	Join a multicast group.  The first full member "join" causes the MCG
 *	to be created.
 */
ibt_status_t
ibt_join_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr,
    ibt_mcg_info_t *mcg_info_p, ibt_mcg_handler_t func, void  *arg)
{
	ibcm_join_mcg_tqarg_t	*mcg_tq;
	int			flag = ((func == NULL) ? KM_SLEEP : KM_NOSLEEP);

	IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg(%llX:%llX, %p)", rgid.gid_prefix,
	    rgid.gid_guid, mcg_attr);

	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Request GID is required");
		return (IBT_INVALID_PARAM);
	}

	if ((mcg_attr->mc_pkey == IB_PKEY_INVALID_LIMITED) ||
	    (mcg_attr->mc_pkey == IB_PKEY_INVALID_FULL)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Invalid P_Key specified");
		return (IBT_INVALID_PARAM);
	}

	if (mcg_attr->mc_join_state == 0) {
		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: JoinState not specified");
		return (IBT_INVALID_PARAM);
	}

	if (mcg_info_p == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: mcg_info_p is NULL");
		return (IBT_INVALID_PARAM);
	}

	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mcg_tq))

	mcg_tq = kmem_alloc(sizeof (ibcm_join_mcg_tqarg_t), flag);
	if (mcg_tq == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: "
		    "Unable to allocate memory for local usage.");
		return (IBT_INSUFF_KERNEL_RESOURCE);
	}

	mcg_tq->rgid = rgid;
	bcopy(mcg_attr, &mcg_tq->mcg_attr, sizeof (ibt_mcg_attr_t));
	mcg_tq->mcg_infop = mcg_info_p;
	mcg_tq->func = func;
	mcg_tq->arg = arg;

	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mcg_tq))

	if (func != NULL) {	/* Non-Blocking */
		IBTF_DPRINTF_L3(cmlog, "ibt_join_mcg: Non-Blocking Call");
		if (taskq_dispatch(ibcm_taskq, ibcm_process_async_join_mcg,
		    mcg_tq, TQ_NOSLEEP) == 0) {
			IBTF_DPRINTF_L2(cmlog, "ibt_join_mcg: Failed to "
			    "Dispatch the TaskQ");
			kmem_free(mcg_tq, sizeof (ibcm_join_mcg_tqarg_t));
			return (IBT_INSUFF_KERNEL_RESOURCE);
		} else
			return (IBT_SUCCESS);
	} else {		/* Blocking */
		return (ibcm_process_join_mcg(mcg_tq));
	}
}

static void
ibcm_process_async_join_mcg(void *tq_arg)
{
	(void) ibcm_process_join_mcg(tq_arg);
}

static ibt_status_t
ibcm_process_join_mcg(void *taskq_arg)
{
	sa_mcmember_record_t	mcg_req;
	sa_mcmember_record_t	*mcg_resp;
	ibmf_saa_access_args_t	access_args;
	ibmf_saa_handle_t	saa_handle;
	uint64_t		component_mask = 0;
	ibt_status_t		retval;
	ibtl_cm_hca_port_t	hca_port;
	uint_t			num_records;
	size_t			length;
	ibcm_hca_info_t		*hcap;
	ibcm_join_mcg_tqarg_t	*mcg_arg = (ibcm_join_mcg_tqarg_t *)taskq_arg;
	ibt_mcg_info_t		*mcg_info_p = mcg_arg->mcg_infop;

	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg(%p)", mcg_arg);

	retval = ibtl_cm_get_hca_port(mcg_arg->rgid, 0, &hca_port);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed to get "
		    "port info from specified RGID: status = %d", retval);
		goto ibcm_join_mcg_exit1;
	}

	bzero(&mcg_req, sizeof (sa_mcmember_record_t));

	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix == 0) ||
	    (mcg_arg->mcg_attr.mc_pgid.gid_guid == 0)) {
		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
		    "Request GID is Port GID");
		mcg_req.PortGID = mcg_arg->rgid;
	} else {
		mcg_req.PortGID = mcg_arg->mcg_attr.mc_pgid;
	}
	component_mask |= SA_MC_COMPMASK_PORTGID;

	mcg_req.Q_Key = mcg_arg->mcg_attr.mc_qkey;
	mcg_req.P_Key = mcg_arg->mcg_attr.mc_pkey;
	mcg_req.JoinState = mcg_arg->mcg_attr.mc_join_state;
	mcg_req.TClass = mcg_arg->mcg_attr.mc_tclass;
	mcg_req.FlowLabel = mcg_arg->mcg_attr.mc_flow;
	mcg_req.SL = mcg_arg->mcg_attr.mc_sl;

	component_mask |= SA_MC_COMPMASK_QKEY | SA_MC_COMPMASK_PKEY |
	    SA_MC_COMPMASK_JOINSTATE | SA_MC_COMPMASK_TCLASS |
	    SA_MC_COMPMASK_FLOWLABEL | SA_MC_COMPMASK_SL;

	/* If client has specified MGID, use it else SA will assign one. */
	if ((mcg_arg->mcg_attr.mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
		mcg_req.MGID = mcg_arg->mcg_attr.mc_mgid;
		component_mask |= SA_MC_COMPMASK_MGID;
	}

	IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: ");
	IBTF_DPRINTF_L3(cmlog, "PGID=%016llX:%016llX, ",
	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);
	IBTF_DPRINTF_L3(cmlog, "MGID=%016llX:%016llX",
	    mcg_req.MGID.gid_prefix, mcg_req.MGID.gid_guid);
	IBTF_DPRINTF_L3(cmlog, "JoinState = %X",
	    mcg_arg->mcg_attr.mc_join_state);
	IBTF_DPRINTF_L5(cmlog, "QKey %lX, PKey %lX",
	    mcg_arg->mcg_attr.mc_qkey, mcg_arg->mcg_attr.mc_pkey);
	IBTF_DPRINTF_L5(cmlog, "Scope %X, MLID %X",
	    mcg_arg->mcg_attr.mc_scope, mcg_arg->mcg_attr.mc_mlid);

	/* Is MTU specified. */
	if (mcg_arg->mcg_attr.mc_mtu_req.r_mtu) {
		mcg_req.MTU = mcg_arg->mcg_attr.mc_mtu_req.r_mtu;
		mcg_req.MTUSelector = mcg_arg->mcg_attr.mc_mtu_req.r_selector;

		component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
		    SA_MC_COMPMASK_MTU;
	}

	/* Is RATE specified. */
	if (mcg_arg->mcg_attr.mc_rate_req.r_srate) {
		mcg_req.Rate = mcg_arg->mcg_attr.mc_rate_req.r_srate;
		mcg_req.RateSelector =
		    mcg_arg->mcg_attr.mc_rate_req.r_selector;

		component_mask |= SA_MC_COMPMASK_RATESELECTOR |
		    SA_MC_COMPMASK_RATE;
	}

	/* Is Packet Life Time specified. */
	if (mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt) {
		mcg_req.Rate = mcg_arg->mcg_attr.mc_pkt_lt_req.p_pkt_lt;
		mcg_req.RateSelector =
		    mcg_arg->mcg_attr.mc_pkt_lt_req.p_selector;

		component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
		    SA_MC_COMPMASK_PKTLT;
	}

	if (mcg_arg->mcg_attr.mc_hop) {
		mcg_req.HopLimit = mcg_arg->mcg_attr.mc_hop;
		component_mask |= SA_MC_COMPMASK_HOPLIMIT;
	}

	if (mcg_arg->mcg_attr.mc_scope) {
		mcg_req.Scope = mcg_arg->mcg_attr.mc_scope;
		component_mask |= SA_MC_COMPMASK_SCOPE;
	}

	if (mcg_arg->mcg_attr.mc_mlid) {
		mcg_req.MLID = mcg_arg->mcg_attr.mc_mlid;
		component_mask |= SA_MC_COMPMASK_MLID;
	}

	/* Get SA Access Handle. */
	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
	if (hcap == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: NO HCA found");

		retval = IBT_HCA_BUSY_DETACHING;
		goto ibcm_join_mcg_exit1;
	}

	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
	if (saa_handle == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: SA Handle NULL");

		retval = IBT_HCA_PORT_NOT_ACTIVE;
		goto ibcm_join_mcg_exit;
	}

	if ((mcg_arg->mcg_attr.mc_pgid.gid_prefix != 0) &&
	    (mcg_arg->mcg_attr.mc_pgid.gid_guid != 0)) {
		retval = ibtl_cm_get_hca_port(mcg_arg->mcg_attr.mc_pgid, 0,
		    &hca_port);
		if (retval != IBT_SUCCESS) {
			IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: Failed "
			    "to get PortInfo of specified PGID: status = %d",
			    retval);
			goto ibcm_join_mcg_exit1;
		}
	}

	/* Contact SA Access */
	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
	access_args.sq_access_type = IBMF_SAA_UPDATE;
	access_args.sq_component_mask = component_mask;
	access_args.sq_template = &mcg_req;
	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
	access_args.sq_callback = NULL;
	access_args.sq_callback_arg = NULL;

	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
	    (void **)&mcg_resp);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_process_join_mcg: "
		    "SA Access Failed");
		goto ibcm_join_mcg_exit;
	}

	num_records = length/sizeof (sa_mcmember_record_t);

	IBTF_DPRINTF_L4(cmlog, "ibcm_process_join_mcg: "
	    "Found %d MCMember Records", num_records);

	/* Validate the returned number of records. */
	if ((mcg_resp != NULL) && (num_records > 0)) {
		/* Update the return values. */
		mcg_info_p->mc_adds_vect.av_dgid = mcg_resp->MGID;
		mcg_info_p->mc_adds_vect.av_sgid = mcg_resp->PortGID;
		mcg_info_p->mc_adds_vect.av_srate = mcg_resp->Rate;
		mcg_info_p->mc_adds_vect.av_srvl = mcg_resp->SL;
		mcg_info_p->mc_adds_vect.av_flow = mcg_resp->FlowLabel;
		mcg_info_p->mc_adds_vect.av_tclass = mcg_resp->TClass;
		mcg_info_p->mc_adds_vect.av_hop = mcg_resp->HopLimit;
		mcg_info_p->mc_adds_vect.av_send_grh = B_TRUE;
		mcg_info_p->mc_adds_vect.av_dlid = mcg_resp->MLID;
		mcg_info_p->mc_mtu = mcg_resp->MTU;
		mcg_info_p->mc_qkey = mcg_resp->Q_Key;

		retval = ibt_pkey2index_byguid(hca_port.hp_hca_guid,
		    hca_port.hp_port, mcg_resp->P_Key, &mcg_info_p->mc_pkey_ix);
		if (retval != IBT_SUCCESS) {
			IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
			    "Pkey2Index Conversion failed<%d>", retval);
			mcg_info_p->mc_pkey_ix = 0;
		}

		mcg_info_p->mc_scope = mcg_resp->Scope;
		mcg_info_p->mc_pkt_lt = mcg_resp->PacketLifeTime;

		mcg_info_p->mc_adds_vect.av_port_num = hca_port.hp_port;
		mcg_info_p->mc_adds_vect.av_sgid_ix = hca_port.hp_sgid_ix;
		mcg_info_p->mc_adds_vect.av_src_path = 0;

		/* Add or Incr the matching MCG entry. */
		ibcm_add_incr_mcg_entry(&mcg_req, mcg_resp);
		/* Deallocate the memory allocated by SA for mcg_resp. */
		kmem_free(mcg_resp, length);

		retval = IBT_SUCCESS;
	} else {
		retval = IBT_MCG_RECORDS_NOT_FOUND;
		IBTF_DPRINTF_L3(cmlog, "ibcm_process_join_mcg: "
		    "MCG RECORDS NOT FOUND");
	}

ibcm_join_mcg_exit:
	ibcm_dec_hca_acc_cnt(hcap);

ibcm_join_mcg_exit1:
	if (mcg_arg->func)
		(*(mcg_arg->func))(mcg_arg->arg, retval, mcg_info_p);

	kmem_free(mcg_arg, sizeof (ibcm_join_mcg_tqarg_t));

	return (retval);
}


/*
 * Function:
 *	ibt_leave_mcg
 * Input:
 *	rgid		The request GID that defines the HCA port upon which
 *			to send the request to the Subnet Administrator, to
 *			remove the specified port (port_gid) from the multicast
 *			group.  If 'port_gid' is the Reserved GID (i.e.
 *			port_gid.gid_prefix = 0 and port_gid.gid_guid = 0),
 *			then the end-port associated with 'rgid' is removed
 *			from the multicast group.
 *
 *	mc_gid		A multicast group GID as returned from ibt_join_mcg()
 *			call.  This is optional, if not specified (i.e.
 *			mc_gid.gid_prefix has 0xFF in its upper 8 bits to
 *			identify this as being a multicast GID), then the
 *			port is removed from all the multicast groups of
 *			which it is a member.
 *
 *	port_gid	This is optional, if not the Reserved GID (gid_prefix
 *			and gid_guid not equal to 0), then this specifies the
 *			endport GID of the multicast group member being deleted
 *			from the group. If it is the Reserved GID (gid_prefix
 *			and gid_guid equal to 0) then the member endport GID is
 *			determined from 'rgid'.
 *
 *	mc_join_state	The Join State attribute used when the group was joined
 *			using ibt_join_mcg(). This Join State component must
 *			contains at least one bit set to 1 in the same position
 *			as that used during ibt_join_mcg(). i.e. the logical
 *			AND of the two JoinState components is not all zeros.
 *			This Join State component must not have some bits set
 *			which are not set using ibt_join_mcg().
 * Output:
 *	None.
 * Returns:
 *	IBT_SUCCESS
 *	IBT_INVALID_PARAM
 *	IBT_MC_GROUP_INVALID
 *	IBT_INSUFF_RESOURCE
 * Description:
 *	The port associated with the port GID shall be removed from the
 *	multicast group specified by MGID (mc_gid) or from all the multicast
 *	groups of which it is a member if the MGID (mc_gid) is not specified.
 *
 *	The last full member to leave causes the destruction of the Multicast
 *	Group.
 */
ibt_status_t
ibt_leave_mcg(ib_gid_t rgid, ib_gid_t mc_gid, ib_gid_t port_gid,
    uint8_t mc_join_state)
{
	sa_mcmember_record_t	mcg_req;
	ibmf_saa_access_args_t	access_args;
	ibmf_saa_handle_t	saa_handle;
	uint64_t		component_mask = 0;
	int			sa_retval;
	ibt_status_t		retval;
	ibcm_status_t		ret;
	ibtl_cm_hca_port_t	hca_port;
	size_t			length;
	void			*results_p;
	ibcm_hca_info_t		*hcap;
	uint8_t			jstate = 0;

	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, %llX:%llX)",
	    rgid.gid_prefix, rgid.gid_guid, mc_gid.gid_prefix, mc_gid.gid_guid);

	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg(%llX:%llX, 0x%X)",
	    port_gid.gid_prefix, port_gid.gid_guid, mc_join_state);

	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: RequestGID is required");
		return (IBT_INVALID_PARAM);
	}

	bzero(&mcg_req, sizeof (sa_mcmember_record_t));

	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: MGID: %llX%llX",
	    mc_gid.gid_prefix, mc_gid.gid_guid);

	/* Validate MGID */
	if ((mc_gid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
		mcg_req.MGID = mc_gid;
		component_mask |= SA_MC_COMPMASK_MGID;
	} else if ((mc_gid.gid_prefix != 0) || (mc_gid.gid_guid != 0)) {
		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Invalid MGID specified");
		return (IBT_MC_MGID_INVALID);
	}

	if ((port_gid.gid_prefix == 0) || (port_gid.gid_guid == 0)) {
		mcg_req.PortGID = rgid;
	} else {
		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Performing PROXY Leave");
		mcg_req.PortGID = port_gid;
	}
	component_mask |= SA_MC_COMPMASK_PORTGID;

	IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Port GID <%llX:%llX>",
	    mcg_req.PortGID.gid_prefix, mcg_req.PortGID.gid_guid);

	/* Join State */
	mcg_req.JoinState = mc_join_state;
	component_mask |= SA_MC_COMPMASK_JOINSTATE;

	ret = ibcm_del_decr_mcg_entry(&mcg_req, &jstate);
	if (ret == IBCM_LOOKUP_EXISTS) {
		IBTF_DPRINTF_L3(cmlog, "ibt_leave_mcg: Multiple JoinMCG record "
		    " still exists, we shall leave for last leave_mcg call");
		return (IBT_SUCCESS);
	} else if (ret == IBCM_LOOKUP_FAIL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: No Record found, "
		    "continue with leave_mcg call");
	} else if ((ret == IBCM_SUCCESS) && (jstate != 0)) {
		/*
		 * Update with cached "jstate", as this will be OR'ed of
		 * all ibt_join_mcg() calls for this record.
		 */
		mcg_req.JoinState = jstate;
	}

	retval = ibtl_cm_get_hca_port(rgid, 0, &hca_port);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: Failed to get port info "
		    "from specified RGID : status = %d", retval);
		return (retval);
	}

	/* Get SA Access Handle. */
	hcap = ibcm_find_hca_entry(hca_port.hp_hca_guid);
	if (hcap == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: "
		    "NO HCA found");
		return (IBT_HCA_BUSY_DETACHING);
	}

	saa_handle = ibcm_get_saa_handle(hcap, hca_port.hp_port);
	if (saa_handle == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: saa_handle is NULL");
		ibcm_dec_hca_acc_cnt(hcap);
		return (IBT_HCA_PORT_NOT_ACTIVE);
	}

	/* Contact SA Access */
	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
	access_args.sq_access_type = IBMF_SAA_DELETE;
	access_args.sq_component_mask = component_mask;
	access_args.sq_template = &mcg_req;
	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
	access_args.sq_callback = NULL;
	access_args.sq_callback_arg = NULL;

	ibcm_sa_access_enter();

	sa_retval = ibmf_sa_access(saa_handle, &access_args, 0, &length,
	    &results_p);
	if (sa_retval != IBMF_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_leave_mcg: SA access Failed: %d",
		    sa_retval);
		(void) ibcm_ibmf_analyze_error(sa_retval);
		retval = IBT_MC_GROUP_INVALID;
	}

	ibcm_sa_access_exit();

	ibcm_dec_hca_acc_cnt(hcap);

	return (retval);
}


/*
 * Function:
 *	ibt_query_mcg
 * Input:
 *	rgid		The request GID that defines the HCA port upon which
 *			to send the request to the Subnet Administrator, to
 *			retrieve Multicast Records matching attributes as
 *			specified through 'mcg_attr' argument.
 *
 *	mcg_attr	NULL or a pointer to an ibt_mcg_attr_t structure that
 *			specifies MCG attributes that are to be matched.
 *			Attributes that are not required can be wild carded
 *			by specifying as '0'.
 *
 *	mcgs_max_num	The maximum number of matching multicast groups to
 *			return.  If zero, then all available matching multicast
 *			groups are returned.
 * Output:
 *	mcgs_info_p	The address of an ibt_mcg_info_t pointer, where
 *			multicast group information is returned. The actual
 *			number of entries filled in the array is returned in
 *			entries_p.
 *
 *	entries_p	The number of ibt_mcg_attr_t entries returned.
 * Returns:
 *	IBT_SUCCESS
 *	IBT_INVALID_PARAM
 *	IBT_MCG_RECORDS_NOT_FOUND
 * Description:
 *	Request information on multicast groups that match the parameters
 *	specified in mcg_attr. Information on each multicast group is returned
 *	to the caller in the form of an array of ibt_mcg_info_t.
 *	ibt_query_mcg() allocates the memory for this array and returns a
 *	pointer to the array (mcgs_p) and the number of entries in the array
 *	(entries_p). This memory should be freed by the client using
 *	ibt_free_mcg_info().
 */
ibt_status_t
ibt_query_mcg(ib_gid_t rgid, ibt_mcg_attr_t *mcg_attr, uint_t mcgs_max_num,
    ibt_mcg_info_t **mcgs_info_p, uint_t *entries_p)
{
	sa_mcmember_record_t	mcg_req;
	sa_mcmember_record_t	*mcg_resp;
	ibt_mcg_info_t		*mcg_infop;
	ibmf_saa_access_args_t	access_args;
	ibmf_saa_handle_t	saa_handle;
	uint64_t		component_mask = 0;
	ibt_status_t		retval;
	ibtl_cm_hca_port_t	hport;
	uint_t			num_records;
	size_t			length;
	void			*results_p;
	ib_gid_t		port_gid;
	ibcm_hca_info_t		*hcap;

	IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg(%p, %d)", mcg_attr, mcgs_max_num);

	if ((entries_p == NULL) || (mcgs_info_p == NULL)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
		    "entries_p or mcgs_info_p is NULL");
		return (IBT_INVALID_PARAM);
	}

	if ((rgid.gid_prefix == 0) || (rgid.gid_guid == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: RequestGID is required");
		return (IBT_INVALID_PARAM);
	}
	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Request GID <%llX:%llX>",
	    rgid.gid_prefix, rgid.gid_guid);

	bzero(&mcg_req, sizeof (sa_mcmember_record_t));
	port_gid.gid_prefix = port_gid.gid_guid = 0;

	if (mcg_attr != NULL) {
		port_gid = mcg_attr->mc_pgid;

		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {
			mcg_req.PortGID = mcg_attr->mc_pgid;
			component_mask |= SA_MC_COMPMASK_PORTGID;

			IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: PGID %llX:%llX",
			    port_gid.gid_prefix, port_gid.gid_guid);
		}

		/* Is Q_Key specified. */
		if (mcg_attr->mc_qkey != 0) {
			mcg_req.Q_Key = mcg_attr->mc_qkey;
			component_mask |= SA_MC_COMPMASK_QKEY;
		}

		/* Is P_Key specified. */
		if (mcg_attr->mc_pkey != 0) {
			mcg_req.P_Key = mcg_attr->mc_pkey;
			component_mask |= SA_MC_COMPMASK_PKEY;
		}

		/* Is MGID specified. */
		if ((mcg_attr->mc_mgid.gid_prefix >> 56ULL & 0xFF) == 0xFF) {
			mcg_req.MGID = mcg_attr->mc_mgid;
			component_mask |= SA_MC_COMPMASK_MGID;
		}

		/* Is MTU specified. */
		if (mcg_attr->mc_mtu_req.r_mtu) {
			mcg_req.MTU = mcg_attr->mc_mtu_req.r_mtu;
			mcg_req.MTUSelector = mcg_attr->mc_mtu_req.r_selector;

			component_mask |= SA_MC_COMPMASK_MTUSELECTOR |
			    SA_MC_COMPMASK_MTU;
		}

		if (mcg_attr->mc_tclass) {
			mcg_req.TClass = mcg_attr->mc_tclass;
			component_mask |= SA_MC_COMPMASK_TCLASS;
		}

		/* Is RATE specified. */
		if (mcg_attr->mc_rate_req.r_srate) {
			mcg_req.Rate = mcg_attr->mc_rate_req.r_srate;
			mcg_req.RateSelector = mcg_attr->mc_rate_req.r_selector;

			component_mask |= SA_MC_COMPMASK_RATESELECTOR |
			    SA_MC_COMPMASK_RATE;
		}

		/* Is Packet Life Time specified. */
		if (mcg_attr->mc_pkt_lt_req.p_pkt_lt) {
			mcg_req.Rate = mcg_attr->mc_pkt_lt_req.p_pkt_lt;
			mcg_req.RateSelector =
			    mcg_attr->mc_pkt_lt_req.p_selector;

			component_mask |= SA_MC_COMPMASK_PKTLTSELECTOR |
			    SA_MC_COMPMASK_PKTLT;
		}

		if (mcg_attr->mc_hop) {
			mcg_req.HopLimit = mcg_attr->mc_hop;
			component_mask |= SA_MC_COMPMASK_HOPLIMIT;
		}

		if (mcg_attr->mc_flow) {
			mcg_req.FlowLabel = mcg_attr->mc_flow;
			component_mask |= SA_MC_COMPMASK_FLOWLABEL;
		}

		if (mcg_attr->mc_sl) {
			mcg_req.SL = mcg_attr->mc_sl;
			component_mask |= SA_MC_COMPMASK_SL;
		}

		if (mcg_attr->mc_scope) {
			mcg_req.Scope = mcg_attr->mc_scope;
			component_mask |= SA_MC_COMPMASK_SCOPE;
		}

		if (mcg_attr->mc_join_state) {
			mcg_req.JoinState = mcg_attr->mc_join_state;
			component_mask |= SA_MC_COMPMASK_JOINSTATE;
		}

		if (mcg_attr->mc_mlid) {
			mcg_req.MLID = mcg_attr->mc_mlid;
			component_mask |= SA_MC_COMPMASK_MLID;
		}
	}

	retval = ibtl_cm_get_hca_port(rgid, 0, &hport);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: Failed to get port info "
		    "from specified RGID : status = %d", retval);
		return (retval);
	}

	/* Get SA Access Handle. */
	hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
	if (hcap == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: NO HCA found");
		return (IBT_HCA_BUSY_DETACHING);
	}

	saa_handle = ibcm_get_saa_handle(hcap, hport.hp_port);
	if (saa_handle == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: saa_handle is NULL");
		ibcm_dec_hca_acc_cnt(hcap);
		return (IBT_HCA_PORT_NOT_ACTIVE);
	}

	/* Contact SA Access */
	access_args.sq_attr_id = SA_MCMEMBERRECORD_ATTRID;
	access_args.sq_access_type = IBMF_SAA_RETRIEVE;
	access_args.sq_component_mask = component_mask;
	access_args.sq_template = &mcg_req;
	access_args.sq_template_length = sizeof (sa_mcmember_record_t);
	access_args.sq_callback = NULL;
	access_args.sq_callback_arg = NULL;

	retval = ibcm_contact_sa_access(saa_handle, &access_args, &length,
	    &results_p);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: SA access Failed");
		ibcm_dec_hca_acc_cnt(hcap);
		return (retval);
	}

	num_records = length/sizeof (sa_mcmember_record_t);

	IBTF_DPRINTF_L4(cmlog, "ibt_query_mcg: Found %d MCMember Records",
	    num_records);

	/* Validate the returned number of records. */
	if ((results_p != NULL) && (num_records > 0)) {
		uint_t	i;

		/*
		 * If mcgs_max_num is zero, then return all records else
		 * return only requested number of records
		 */
		if ((mcgs_max_num != 0) && (num_records > mcgs_max_num)) {
			/* we are interested in only mcgs_max_num records */
			num_records = mcgs_max_num;
		}

		/*
		 * The SGID returned in "mcg_info_p" buffer should be PortGID,
		 * (mcg_attr->mc_pgid), if 'mcg_attr->mc_pgid' was specified,
		 * else RequestGID (rgid) should be returned.
		 */
		if ((port_gid.gid_prefix != 0) && (port_gid.gid_guid != 0)) {

			/* Get sgid_ix and port number of 'port_gid' */
			retval = ibtl_cm_get_hca_port(port_gid, 0, &hport);
			if (retval != IBT_SUCCESS) {
				IBTF_DPRINTF_L2(cmlog, "ibt_query_mcg: "
				    "Failed to Get Portinfo for PortGID :"
				    "status = %d", retval);
				return (retval);
			}
		} else {
			/*
			 * The sgid_ix and port number related to RequestGID
			 * are already obtained at the beginning.
			 */
			port_gid = rgid;
		}

		/*
		 * Allocate memory for return buffer, to be freed in
		 * ibt_free_mcg_info().
		 */
		mcg_infop = kmem_alloc((num_records * sizeof (ibt_mcg_info_t)),
		    KM_SLEEP);

		*mcgs_info_p = mcg_infop;
		*entries_p = num_records;

		/* Update the return values. */
		for (i = 0; i < num_records; i++) {

			mcg_resp = (sa_mcmember_record_t *)((uchar_t *)
			    results_p + i * sizeof (sa_mcmember_record_t));

			mcg_infop[i].mc_adds_vect.av_dgid = mcg_resp->MGID;
			mcg_infop[i].mc_adds_vect.av_sgid = port_gid;
			mcg_infop[i].mc_adds_vect.av_srate = mcg_resp->Rate;
			mcg_infop[i].mc_adds_vect.av_srvl = mcg_resp->SL;
			mcg_infop[i].mc_adds_vect.av_flow = mcg_resp->FlowLabel;
			mcg_infop[i].mc_adds_vect.av_tclass = mcg_resp->TClass;
			mcg_infop[i].mc_adds_vect.av_hop = mcg_resp->HopLimit;
			mcg_infop[i].mc_adds_vect.av_port_num = hport.hp_port;
			mcg_infop[i].mc_adds_vect.av_send_grh = B_TRUE;
			mcg_infop[i].mc_adds_vect.av_dlid = mcg_resp->MLID;
			mcg_infop[i].mc_adds_vect.av_sgid_ix = hport.hp_sgid_ix;
			mcg_infop[i].mc_adds_vect.av_src_path = 0;
			mcg_infop[i].mc_mtu = mcg_resp->MTU;
			mcg_infop[i].mc_qkey = mcg_resp->Q_Key;
			mcg_infop[i].mc_scope = mcg_resp->Scope;
			mcg_infop[i].mc_pkt_lt = mcg_resp->PacketLifeTime;

			if (ibt_pkey2index_byguid(hport.hp_hca_guid,
			    hport.hp_port, mcg_resp->P_Key,
			    &mcg_infop[i].mc_pkey_ix) != IBT_SUCCESS) {
				IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: "
				    "Pkey2Index Conversion failed");
				mcg_infop[i].mc_pkey_ix = 0;
			}
		}

		/*
		 * Deallocate the memory allocated by SA for results_p.
		 */
		kmem_free(results_p, length);
		retval = IBT_SUCCESS;

		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: returning %d MCGRecords",
		    num_records);

	} else {
		retval = IBT_MCG_RECORDS_NOT_FOUND;
		*entries_p = 0;

		IBTF_DPRINTF_L3(cmlog, "ibt_query_mcg: MCG RECORDS NOT FOUND");
	}

	ibcm_dec_hca_acc_cnt(hcap);

	return (retval);
}


/*
 * ibt_free_mcg_info()
 *	Free the memory allocated by successful ibt_query_mcg()
 *
 *	mcgs_info	Pointer returned by ibt_query_mcg().
 *
 *	entries		The number of ibt_mcg_info_t entries to free.
 */
void
ibt_free_mcg_info(ibt_mcg_info_t *mcgs_info, uint_t entries)
{
	IBTF_DPRINTF_L3(cmlog, "ibt_free_mcg_info: "
	    "Free <%d> entries from 0x%p", entries, mcgs_info);

	if ((mcgs_info != NULL) && (entries > 0))
		kmem_free(mcgs_info, entries * sizeof (ibt_mcg_info_t));
	else
		IBTF_DPRINTF_L2(cmlog, "ibt_free_mcg_info: "
		    "ERROR: NULL buf pointer or length specified.");
}


/*
 * Function:
 *	ibt_gid_to_node_info()
 * Input:
 *	gid		Identifies the IB Node and port for which to obtain
 *			Node information.
 * Output:
 *	node_info_p	A pointer to an ibt_node_info_t structure (allocated
 *			by the caller) in which to return the node information.
 * Returns:
 *	IBT_SUCCESS
 *	IBT_INVALID_PARAM
 *	IBT_NODE_RECORDS_NOT_FOUND
 *	IBT_NO_HCAS_AVAILABLE
 * Description:
 *	Retrieve Node Information for the specified GID.
 */
ibt_status_t
ibt_gid_to_node_info(ib_gid_t gid, ibt_node_info_t *node_info_p)
{
	sa_node_record_t	nr_req, *nr_resp;
	ibmf_saa_handle_t	saa_handle;
	ibt_status_t		retval;
	ibcm_hca_info_t		*hcap;
	ibtl_cm_hca_port_t	hport;
	int			i, j;
	uint_t			num_rec;
	ib_guid_t		*guid_array = NULL;
	sa_path_record_t	*path;
	size_t			len;
	uint8_t			npaths;
	uint32_t		num_hcas = 0;
	ib_lid_t		node_lid;
	boolean_t		local_node = B_FALSE;
	void			*res_p;
	uint8_t			num_ports = 0;


	IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info(%llX:%llX, %p)",
	    gid.gid_prefix, gid.gid_guid, node_info_p);

	if ((gid.gid_prefix == 0) || (gid.gid_guid == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: GID is required");
		return (IBT_INVALID_PARAM);
	}

	if (node_info_p == NULL) {
		IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
		    "Return Buf (node_info_p) is NULL.");
		return (IBT_INVALID_PARAM);
	}

	/*
	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
	 * associated port) info via ibtl_cm_get_hca_port() call.
	 */
	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
	if (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS) {

		hcap = ibcm_find_hca_entry(hport.hp_hca_guid);
		if (hcap == NULL) {
			IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
			    "HCA(%llX) info not found", hport.hp_hca_guid);
			return (IBT_NO_HCAS_AVAILABLE);
		}
		num_ports = 1;
		num_hcas = 1;
		node_lid = hport.hp_base_lid;
		local_node = B_TRUE;
		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: Local Node: "
		    "LID = 0x%X", node_lid);
	} else {
		/* Get the number of HCAs and their GUIDs */
		num_hcas = ibt_get_hca_list(&guid_array);
		IBTF_DPRINTF_L4(cmlog, "ibt_gid_to_node_info: ibt_get_hca_list "
		    "returned %d hcas", num_hcas);

		if (num_hcas == 0) {
			IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
			    "NO HCA's Found on this system");
			return (IBT_NO_HCAS_AVAILABLE);
		}
	}

	for (i = 0; i < num_hcas; i++) {
		if (local_node == B_FALSE) {
			hcap = ibcm_find_hca_entry(guid_array[i]);
			if (hcap == NULL) {
				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
				    "HCA(%llX) info not found", guid_array[i]);
				retval = IBT_NO_HCAS_AVAILABLE;
				continue;
			}
			num_ports = hcap->hca_num_ports;
		}

		for (j = 0; j < num_ports; j++) {
			uint8_t		port = 0;

			if (local_node == B_TRUE)
				port = hport.hp_port;
			else
				port = j + 1;

			/* Get SA Access Handle. */
			saa_handle = ibcm_get_saa_handle(hcap, port);
			if (saa_handle == NULL) {
				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
				    "Port %d of HCA (%llX) is NOT ACTIVE",
				    port, hport.hp_hca_guid);
				retval = IBT_NODE_RECORDS_NOT_FOUND;
				continue;
			}

			if (local_node == B_FALSE) {
				ib_gid_t	sgid;
				int		sa_ret;

				/*
				 * Check whether 'gid' and this port has same
				 * subnet prefix. If not, then there is no use
				 * in searching from this port.
				 */
				sgid = hcap->hca_port_info[j].port_sgid0;
				if (gid.gid_prefix != sgid.gid_prefix) {
					IBTF_DPRINTF_L3(cmlog,
					    "ibt_gid_to_node_info:Sn_Prefix of "
					    "GID(%llX) and Port's(%llX) differ",
					    gid.gid_prefix, sgid.gid_prefix);
					retval = IBT_NODE_RECORDS_NOT_FOUND;
					continue;
				}

				/*
				 * First Get Path Records for the specified DGID
				 * from this port (SGID). From Path Records,
				 * note down DLID, then use this DLID as Input
				 * attribute to get NodeRecords from SA Access.
				 */
				npaths = 1;
				path = NULL;

				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
				    &path);
				if (sa_ret != IBMF_SUCCESS) {
					IBTF_DPRINTF_L2(cmlog,
					    "ibt_gid_to_node_info: "
					    "ibmf_saa_gid_to_pathrecords() "
					    "returned error: %d ", sa_ret);
					retval =
					    ibcm_ibmf_analyze_error(sa_ret);
					continue;
				} else if ((npaths == 0) || (path == NULL)) {
					IBTF_DPRINTF_L3(cmlog,
					    "ibt_gid_to_node_info: failed (%d) "
					    "to get path records for the DGID "
					    "0x%llX from SGID 0x%llX", sa_ret,
					    gid.gid_guid, sgid.gid_guid);
					retval = IBT_NODE_RECORDS_NOT_FOUND;
					continue;
				}
				node_lid = path->DLID;	/* LID */

				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
				    "Remote Node: LID = 0x%X", node_lid);

				/* Free SA_Access memory for path record. */
				kmem_free(path, len);
			}

			/* Retrieve Node Records from SA Access. */
			bzero(&nr_req, sizeof (sa_node_record_t));

			nr_req.LID = node_lid;	/* LID */

			retval = ibcm_get_node_rec(saa_handle, &nr_req,
			    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
				    "failed (%d) to get Node records", retval);
				continue;
			} else if (retval != IBT_SUCCESS) {
				IBTF_DPRINTF_L2(cmlog, "ibt_gid_to_node_info: "
				    "failed (%d) to get Node records", retval);
				ibcm_dec_hca_acc_cnt(hcap);
				goto gid_to_ni_exit;
			}

			num_rec = len/sizeof (sa_node_record_t);
			nr_resp = (sa_node_record_t *)(uchar_t *)res_p;

			/* Validate the returned number of records. */
			if ((nr_resp != NULL) && (num_rec > 0)) {

				IBCM_DUMP_NODE_REC(nr_resp);

				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
				    *node_info_p))

				node_info_p->n_sys_img_guid =
				    nr_resp->NodeInfo.SystemImageGUID;
				node_info_p->n_node_guid =
				    nr_resp->NodeInfo.NodeGUID;
				node_info_p->n_port_guid =
				    nr_resp->NodeInfo.PortGUID;
				node_info_p->n_dev_id =
				    nr_resp->NodeInfo.DeviceID;
				node_info_p->n_revision =
				    nr_resp->NodeInfo.Revision;
				node_info_p->n_vendor_id =
				    nr_resp->NodeInfo.VendorID;
				node_info_p->n_num_ports =
				    nr_resp->NodeInfo.NumPorts;
				node_info_p->n_port_num =
				    nr_resp->NodeInfo.LocalPortNum;
				node_info_p->n_node_type =
				    nr_resp->NodeInfo.NodeType;
				(void) strncpy(node_info_p->n_description,
				    (char *)&nr_resp->NodeDescription, 64);

				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
				    *node_info_p))

				/*
				 * Deallocate the memory allocated by SA for
				 * 'nr_resp'.
				 */
				ibcm_dec_hca_acc_cnt(hcap);
				kmem_free(nr_resp, len);
				retval = IBT_SUCCESS;

				goto gid_to_ni_exit;
			} else {
				retval = IBT_NODE_RECORDS_NOT_FOUND;
				IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: "
				    "Node Records NOT found - PortGUID %016llX",
				    gid.gid_guid);
			}
		}
		ibcm_dec_hca_acc_cnt(hcap);

		if (local_node == B_TRUE)
			break;
	}

gid_to_ni_exit:
	if (guid_array)
		ibt_free_hca_list(guid_array, num_hcas);

	IBTF_DPRINTF_L3(cmlog, "ibt_gid_to_node_info: done. Status %d", retval);

	return (retval);
}


ibt_status_t
ibcm_get_node_rec(ibmf_saa_handle_t saa_handle, sa_node_record_t *nr_req,
    uint64_t component_mask, void *result_p, size_t *len)
{
	ibmf_saa_access_args_t  args;
	size_t			length;
	ibt_status_t		retval;

	args.sq_attr_id = SA_NODERECORD_ATTRID;
	args.sq_template = nr_req;
	args.sq_access_type = IBMF_SAA_RETRIEVE;
	args.sq_template_length = sizeof (sa_node_record_t);
	args.sq_component_mask = component_mask;
	args.sq_callback = NULL;
	args.sq_callback_arg = NULL;

	retval = ibcm_contact_sa_access(saa_handle, &args, &length, result_p);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: SA Call Failed");
		return (retval);
	}

	*len = length;

	/* Validate the returned number of records. */
	if ((result_p != NULL) && (length > 0)) {
		IBTF_DPRINTF_L3(cmlog, "ibcm_get_node_rec: Node Records FOUND");

		/* Got it, done!. */
		return (IBT_SUCCESS);
	} else {
		IBTF_DPRINTF_L2(cmlog, "ibcm_get_node_rec: Node Rec NOT found");
		return (IBT_NODE_RECORDS_NOT_FOUND);
	}
}


/*
 * Function:
 *	ibt_lid_to_node_info()
 * Input:
 *	lid		Identifies the IB Node and port for which to obtain
 *			Node information.
 * Output:
 *	node_info_p	A pointer to an ibt_node_info_t structure (allocated
 *			by the caller) in which to return the node information.
 * Returns:
 *	IBT_SUCCESS
 *	IBT_INVALID_PARAM
 *	IBT_NODE_RECORDS_NOT_FOUND
 *	IBT_NO_HCAS_AVAILABLE
 * Description:
 *	Retrieve Node Information for the specified LID.
 */
ibt_status_t
ibt_lid_to_node_info(ib_lid_t lid, ibt_node_info_t *node_info_p)
{
	ibt_status_t	retval;
	ibcm_hca_info_t	*hcap;
	uint8_t		i, j;
	ib_guid_t	*guid_array = NULL;
	uint_t		num_hcas = 0;


	IBTF_DPRINTF_L4(cmlog, "ibt_lid_to_node_info(0x%lX, %p)",
	    lid, node_info_p);

	if ((lid == 0) || (node_info_p == NULL)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: "
		    "Lid is zero, or node_info_p is NULL.");
		return (IBT_INVALID_PARAM);
	}

	/* Get the number of HCAs and their GUIDs */
	num_hcas = ibt_get_hca_list(&guid_array);
	IBTF_DPRINTF_L4(cmlog, "ibt_lid_to_node_info: ibt_get_hca_list "
	    "returned %d hcas", num_hcas);

	if (num_hcas == 0) {
		IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: "
		    "NO HCA's Found on this system");
		return (IBT_NO_HCAS_AVAILABLE);
	}

	for (i = 0; i < num_hcas; i++) {
		hcap = ibcm_find_hca_entry(guid_array[i]);
		if (hcap == NULL) {
			IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: "
			    "HCA(%llX) info not found", guid_array[i]);
			retval = IBT_NO_HCAS_AVAILABLE;
			continue;
		}

		for (j = 0; j < hcap->hca_num_ports; j++) {
			uint8_t			port;
			ibmf_saa_handle_t	saa_handle;
			uint_t			num_rec;
			size_t			len;
			void			*res_p;
			sa_node_record_t	nr_req, *nr_resp;

			port = j + 1;

			/* Get SA Access Handle. */
			saa_handle = ibcm_get_saa_handle(hcap, port);
			if (saa_handle == NULL) {
				IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: "
				    "Port %d of HCA (%llX) is NOT ACTIVE",
				    port, guid_array[i]);
				retval = IBT_NODE_RECORDS_NOT_FOUND;
				continue;
			}

			/* Retrieve Node Records from SA Access. */
			bzero(&nr_req, sizeof (sa_node_record_t));

			nr_req.LID = lid;	/* LID */

			retval = ibcm_get_node_rec(saa_handle, &nr_req,
			    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
				IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: "
				    "failed (%d) to get Node records", retval);
				continue;
			} else if (retval != IBT_SUCCESS) {
				IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: "
				    "failed (%d) to get Node records", retval);
				ibcm_dec_hca_acc_cnt(hcap);
				goto lid_to_ni_exit;
			}

			num_rec = len/sizeof (sa_node_record_t);
			nr_resp = (sa_node_record_t *)(uchar_t *)res_p;

			/* Validate the returned number of records. */
			if ((nr_resp != NULL) && (num_rec > 0)) {

				IBCM_DUMP_NODE_REC(nr_resp);

				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(
				    *node_info_p))

				node_info_p->n_sys_img_guid =
				    nr_resp->NodeInfo.SystemImageGUID;
				node_info_p->n_node_guid =
				    nr_resp->NodeInfo.NodeGUID;
				node_info_p->n_port_guid =
				    nr_resp->NodeInfo.PortGUID;
				node_info_p->n_dev_id =
				    nr_resp->NodeInfo.DeviceID;
				node_info_p->n_revision =
				    nr_resp->NodeInfo.Revision;
				node_info_p->n_vendor_id =
				    nr_resp->NodeInfo.VendorID;
				node_info_p->n_num_ports =
				    nr_resp->NodeInfo.NumPorts;
				node_info_p->n_port_num =
				    nr_resp->NodeInfo.LocalPortNum;
				node_info_p->n_node_type =
				    nr_resp->NodeInfo.NodeType;
				(void) strncpy(node_info_p->n_description,
				    (char *)&nr_resp->NodeDescription, 64);

				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(
				    *node_info_p))

				/*
				 * Deallocate the memory allocated by SA for
				 * 'nr_resp'.
				 */
				ibcm_dec_hca_acc_cnt(hcap);
				kmem_free(nr_resp, len);
				retval = IBT_SUCCESS;

				goto lid_to_ni_exit;
			} else {
				retval = IBT_NODE_RECORDS_NOT_FOUND;
				IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: "
				    "Node Records NOT found - LID 0x%lX",
				    lid);
			}
		}
		ibcm_dec_hca_acc_cnt(hcap);
	}

lid_to_ni_exit:
	if (guid_array)
		ibt_free_hca_list(guid_array, num_hcas);

	IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: done. Status %d", retval);

	return (retval);
}

/*
 * Function:
 *	ibt_get_companion_port_gids()
 * Description:
 *	Get list of GID's available on a companion port(s) of the specified
 *	GID or list of GIDs available on a specified Node GUID/SystemImage GUID.
 */
ibt_status_t
ibt_get_companion_port_gids(ib_gid_t gid, ib_guid_t hca_guid,
    ib_guid_t sysimg_guid, ib_gid_t **gids_p, uint_t *num_gids_p)
{
	sa_node_record_t	nr_req, *nr_resp;
	void			*res_p;
	ibmf_saa_handle_t	saa_handle;
	int			sa_ret;
	ibt_status_t		retval = IBT_SUCCESS;
	ibcm_hca_info_t		*hcap;
	ibtl_cm_hca_port_t	hport;
	int			i, j;
	uint_t			num_rec;
	ib_guid_t		*guid_array = NULL;
	sa_path_record_t	*path;
	size_t			len;
	uint8_t			npaths;
	uint32_t		num_hcas = 0;
	boolean_t		local_node = B_FALSE;
	boolean_t		local_hca = B_FALSE;
	ib_guid_t		h_guid = hca_guid;
	ib_gid_t		*gidp = NULL, *t_gidp = NULL;
	int			multi_hca_loop = 0;

	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids(%llX:%llX, %llX, "
	    "%llX)", gid.gid_prefix, gid.gid_guid, hca_guid, sysimg_guid);

	if (((gid.gid_prefix == 0) || (gid.gid_guid == 0)) && (hca_guid == 0) &&
	    (sysimg_guid == 0)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
		    "Null Input attribute specified.");
		return (IBT_INVALID_PARAM);
	}

	if ((num_gids_p == NULL) || (gids_p == NULL)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
		    "num_gids_p or gids_p is NULL");
		return (IBT_INVALID_PARAM);
	}

	*num_gids_p = 0;

	/* Get the number of HCAs and their GUIDs */
	if ((num_hcas = ibt_get_hca_list(&guid_array)) == 0) {
		IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
		    "NO HCA's Found on this system");
		return (IBT_NO_HCAS_AVAILABLE);
	}

	IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
	    "ibt_get_hca_list() returned %d hcas", num_hcas);

	/*
	 * If 'gid' is on local node, then get node lid (i.e. base lid of the
	 * associated port) info via ibtl_cm_get_hca_port() call.
	 */
	bzero(&hport, sizeof (ibtl_cm_hca_port_t));
	if ((gid.gid_prefix != 0) && (gid.gid_guid != 0) &&
	    (ibtl_cm_get_hca_port(gid, 0, &hport) == IBT_SUCCESS)) {

		if ((hca_guid != 0) && (hca_guid != hport.hp_hca_guid)) {
			IBTF_DPRINTF_L2(cmlog, "ibt_get_companion_port_gids: "
			    "Invalid GID<->HCAGUID combination specified.");
			retval = IBT_INVALID_PARAM;
			goto get_comp_pgid_exit;
		}
		h_guid = hport.hp_hca_guid;
		local_node = B_TRUE;

		IBTF_DPRINTF_L4(cmlog, "ibt_get_companion_port_gids: "
		    "Local Node: HCA (0x%llX)", h_guid);
	} else if (h_guid) {	/* Is specified HCA GUID - local? */
		for (i = 0; i < num_hcas; i++) {
			if (h_guid == guid_array[i]) {
				local_hca = B_TRUE;
				break;
			}
		}
	} else if (sysimg_guid) { /* Is specified SystemImage GUID - local? */
		for (i = 0; i < num_hcas; i++) {
			ibt_status_t	ret;
			ibt_hca_attr_t	hca_attr;

			ret = ibt_query_hca_byguid(guid_array[i], &hca_attr);
			if (ret != IBT_SUCCESS) {
				IBTF_DPRINTF_L2(cmlog,
				    "ibt_get_companion_port_gids: HCA(%llX) "
				    "info not found", guid_array[i]);
				retval = IBT_NO_HCAS_AVAILABLE;
				continue;
			}
			if (hca_attr.hca_si_guid == sysimg_guid) {
				if ((hca_guid != 0) &&
				    (hca_guid != hca_attr.hca_node_guid)) {
					IBTF_DPRINTF_L2(cmlog,
					    "ibt_get_companion_port_gids: "
					    "Invalid SysImg<->HCA GUID "
					    "combination specified.");
					retval = IBT_INVALID_PARAM;
					goto get_comp_pgid_exit;
				}
				local_hca = B_TRUE;
				h_guid = hca_attr.hca_node_guid;
				break;
			}
		}
	}

	if ((local_node == B_TRUE) || (local_hca == B_TRUE)) {
		retval = ibtl_cm_get_local_comp_gids(h_guid, gid, gids_p,
		    num_gids_p);
		goto get_comp_pgid_exit;
	}

get_comp_for_multihca:
	/* We will be here, if request is for remote node */
	for (i = 0; i < num_hcas; i++) {
		int		multism;
		uint_t		count = 0;
		int		multi_sm_loop = 0;
		uint_t		k = 0, l;

		hcap = ibcm_find_hca_entry(guid_array[i]);
		if (hcap == NULL) {
			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
			    "HCA(%llX) info not found", guid_array[i]);
			retval = IBT_NO_HCAS_AVAILABLE;
			continue;
		}

		/* 1 - MultiSM, 0 - Single SM */
		multism = ibtl_cm_is_multi_sm(guid_array[i]);

		for (j = 0; j < hcap->hca_num_ports; j++) {
			ib_gid_t	sgid;
			uint64_t	c_mask = 0;
			ib_guid_t	pg;
			uint_t		port = j;

get_comp_for_multism:
			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
			    "Port %d, HCA %llX, MultiSM= %d, Loop=%d",
			    port + 1, h_guid, multism, multi_sm_loop);

			/* Get SA Access Handle. */
			saa_handle = ibcm_get_saa_handle(hcap, port + 1);
			if (saa_handle == NULL) {
				IBTF_DPRINTF_L2(cmlog,
				    "ibt_get_companion_port_gids: "
				    "Port (%d)  - NOT ACTIVE", port + 1);
				retval = IBT_GIDS_NOT_FOUND;
				continue;
			}

			/*
			 * Check whether 'gid' and this port has same subnet
			 * prefix. If not, then there is no use in searching
			 * from this port.
			 */
			sgid = hcap->hca_port_info[port].port_sgid0;
			if ((h_guid == 0) && (gid.gid_prefix != 0) &&
			    (multi_sm_loop == 0) &&
			    (gid.gid_prefix != sgid.gid_prefix)) {
				IBTF_DPRINTF_L2(cmlog,
				    "ibt_get_companion_port_gids: SnPrefix of "
				    "GID(%llX) and Port SN_Pfx(%llX) differ",
				    gid.gid_prefix, sgid.gid_prefix);
				retval = IBT_GIDS_NOT_FOUND;
				continue;
			}

			/*
			 * If HCA GUID or System Image GUID is specified, then
			 * we can achieve our goal sooner!.
			 */
			if ((h_guid == 0) && (sysimg_guid == 0)) {
				/* So only GID info is provided. */

				/*
				 * First Get Path Records for the specified DGID
				 * from this port (SGID). From Path Records,
				 * note down DLID, then use this DLID as Input
				 * attribute to get NodeRecords.
				 */
				npaths = 1;
				path = NULL;

				sa_ret = ibmf_saa_gid_to_pathrecords(saa_handle,
				    sgid, gid, 0, 0, B_TRUE, &npaths, 0, &len,
				    &path);
				if (sa_ret != IBMF_SUCCESS) {
					IBTF_DPRINTF_L2(cmlog,
					    "ibt_get_companion_port_gids: "
					    "ibmf_saa_gid_to_pathrecords() "
					    "returned error: %d ", sa_ret);
					retval =
					    ibcm_ibmf_analyze_error(sa_ret);
					ibcm_dec_hca_acc_cnt(hcap);
					goto get_comp_pgid_exit;
				} else if ((npaths == 0) || (path == NULL)) {
					IBTF_DPRINTF_L2(cmlog,
					    "ibt_get_companion_port_gids: "
					    "failed (%d) to get path records "
					    "for the DGID (0x%llX) from SGID "
					    "(0x%llX)", sa_ret, gid.gid_guid,
					    sgid.gid_guid);
					retval = IBT_GIDS_NOT_FOUND;
					continue;
				}

				bzero(&nr_req, sizeof (sa_node_record_t));
				nr_req.LID = path->DLID;	/* LID */

				IBTF_DPRINTF_L3(cmlog,
				    "ibt_get_companion_port_gids: "
				    "Remote Node: LID = 0x%X", nr_req.LID);

				/* Free SA_Access memory for path record. */
				kmem_free(path, len);

				IBTF_DPRINTF_L3(cmlog,
				    "ibt_get_companion_port_gids: SAA Call: "
				    "based on LID ");

				retval = ibcm_get_node_rec(saa_handle, &nr_req,
				    SA_NODEINFO_COMPMASK_NODELID, &res_p, &len);
				if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
					IBTF_DPRINTF_L2(cmlog,
					    "ibt_get_companion_port_gids: "
					    "failed (%d) to get Node records",
					    retval);
					continue;
				} else if (retval != IBT_SUCCESS) {
					IBTF_DPRINTF_L2(cmlog,
					    "ibt_get_companion_port_gids: "
					    "failed (%d) to get Node records",
					    retval);
					ibcm_dec_hca_acc_cnt(hcap);
					goto get_comp_pgid_exit;
				}

				nr_resp = (sa_node_record_t *)(uchar_t *)res_p;
				/* Note down HCA GUID info. */
				h_guid = nr_resp->NodeInfo.NodeGUID;

				IBTF_DPRINTF_L3(cmlog,
				    "ibt_get_companion_port_gids: "
				    "Remote HCA GUID: 0x%llX", h_guid);

				IBCM_DUMP_NODE_REC(nr_resp);

				kmem_free(res_p, len);
			}

			bzero(&nr_req, sizeof (sa_node_record_t));
			if (h_guid != 0) {
				nr_req.NodeInfo.NodeGUID = h_guid;
				c_mask = SA_NODEINFO_COMPMASK_NODEGUID;
			}

			if (sysimg_guid != 0) {
				nr_req.NodeInfo.SystemImageGUID = sysimg_guid;
				c_mask |= SA_NODEINFO_COMPMASK_SYSIMAGEGUID;
			}

			IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: "
			    "SAA Call: CMASK= 0x%llX", c_mask);

			retval = ibcm_get_node_rec(saa_handle, &nr_req, c_mask,
			    &res_p, &len);
			if (retval == IBT_NODE_RECORDS_NOT_FOUND) {
				IBTF_DPRINTF_L3(cmlog,
				    "ibt_get_companion_port_gids: "
				    "failed (%d) to get Node records", retval);
				continue;
			} else if (retval != IBT_SUCCESS) {
				IBTF_DPRINTF_L2(cmlog,
				    "ibt_get_companion_port_gids: Error: (%d) "
				    "while getting Node records", retval);
				ibcm_dec_hca_acc_cnt(hcap);
				goto get_comp_pgid_exit;
			}

			num_rec = len/sizeof (sa_node_record_t);

			/* We will be here, only if we found some NodeRec */
			if (gid.gid_prefix && gid.gid_guid) {
				nr_resp = (sa_node_record_t *)res_p;
				for (l = 0; l < num_rec; l++, nr_resp++) {
					pg = nr_resp->NodeInfo.PortGUID;
					if (gid.gid_guid != pg)
						count++;
				}
			} else {
				count = num_rec;
			}

			if (count != 0) {
				if (multi_sm_loop == 1) {
					count += k;
					t_gidp = kmem_zalloc(count *
					    sizeof (ib_gid_t), KM_SLEEP);

					if ((k != 0) && (gidp != NULL)) {
						bcopy(gidp, t_gidp,
						    k * sizeof (ib_gid_t));
						kmem_free(gidp,
						    k * sizeof (ib_gid_t));
					}
					gidp = t_gidp;
				} else {
					gidp = kmem_zalloc(count *
					    sizeof (ib_gid_t), KM_SLEEP);
				}
				*num_gids_p = count;
				*gids_p = gidp;

				nr_resp = (sa_node_record_t *)res_p;
				for (l = 0; l < num_rec; l++, nr_resp++) {
					IBCM_DUMP_NODE_REC(nr_resp);

					pg = nr_resp->NodeInfo.PortGUID;
					IBTF_DPRINTF_L4(cmlog,
					    "ibt_get_companion_port_gids: "
					    "PortGID %llX", pg);

					if (pg != gid.gid_guid) {
						gidp[k].gid_prefix =
						    sgid.gid_prefix;
						gidp[k].gid_guid = pg;

						IBTF_DPRINTF_L3(cmlog,
						    "ibt_get_companion_pgids: "
						    "GID[%d] = %llX:%llX", k,
						    gidp[k].gid_prefix,
						    gidp[k].gid_guid);

						k++;
						if (k == count)
							break;
					}
				}
				retval = IBT_SUCCESS;	/* done!. */
				kmem_free(res_p, len);
				ibcm_dec_hca_acc_cnt(hcap);
				goto get_comp_pgid_exit;
			} else {
				IBTF_DPRINTF_L2(cmlog,
				    "ibt_get_companion_port_gids: "
				    "Companion PortGIDs not available");
				retval = IBT_GIDS_NOT_FOUND;
			}
			/* Deallocate the memory for 'res_p'. */
			kmem_free(res_p, len);

			/*
			 * If we are on MultiSM setup, then we need to lookout
			 * from that subnet port too.
			 */
			if (multism) {
				/* break if already searched both the subnet */
				if (multi_sm_loop == 1)
					break;

				port = (j == 0) ? 1 : 0;
				multi_sm_loop = 1;
				goto get_comp_for_multism;
			} else {
				break;
			}
		}
		ibcm_dec_hca_acc_cnt(hcap);

		/*
		 * We may be on dual HCA with dual SM configured system.  And
		 * the input attr GID was visible from second HCA. So in order
		 * to get the companion portgid we need to re-look from the
		 * first HCA ports.
		 */
		if ((num_hcas > 1) && (i > 0) && (h_guid != 0) &&
		    (multi_hca_loop != 1)) {
			multi_hca_loop = 1;
			goto get_comp_for_multihca;
		}
	}
	if (*num_gids_p == 0)
		retval = IBT_GIDS_NOT_FOUND;

get_comp_pgid_exit:
	if (guid_array)
		ibt_free_hca_list(guid_array, num_hcas);

	if ((retval != IBT_SUCCESS) && (*num_gids_p != 0)) {
		retval = IBT_SUCCESS;
	}

	IBTF_DPRINTF_L3(cmlog, "ibt_get_companion_port_gids: done. Status %d, "
	    "Found %d GIDs", retval, *num_gids_p);

	return (retval);
}

/* RDMA IP CM Support routines */
ibt_status_t
ibt_get_src_ip(ibt_srcip_attr_t *sattr, ibt_srcip_info_t **src_info_p,
    uint_t *entries_p)
{
	ibt_srcip_info_t	*s_ip;
	ibcm_arp_ip_t		*ipp;
	ibcm_arp_ibd_insts_t	ibds;
	uint8_t			i, j;
	uint_t			count;
	ibt_status_t		retval = IBT_SUCCESS;

	IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip(%p, %p, %p)",
	    sattr, src_info_p, entries_p);

	if (sattr == NULL || entries_p == NULL) {
		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid I/P Args.");
		return (IBT_INVALID_PARAM);
	}

	if (sattr->sip_gid.gid_prefix == 0 || sattr->sip_gid.gid_guid == 0) {
		IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid GID.");
		return (IBT_INVALID_PARAM);
	}

	/* TBD: Zoneid */
	retval = ibcm_arp_get_ibds(&ibds, sattr->sip_family);
	if (retval != IBT_SUCCESS) {
		IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds "
		    "failed to get IBD Instances: ret 0x%x", retval);
		goto get_src_ip_end;
	}

	count = 0;
	for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt;
	    i++, ipp++) {
		if (ipp->ip_inet_family == AF_UNSPEC)
			continue;
		if (ipp->ip_port_gid.gid_prefix == sattr->sip_gid.gid_prefix &&
		    ipp->ip_port_gid.gid_guid == sattr->sip_gid.gid_guid) {
			if ((sattr->sip_pkey) &&
			    (ipp->ip_pkey != sattr->sip_pkey))
				continue;

			if ((sattr->sip_zoneid != ALL_ZONES) &&
			    (sattr->sip_zoneid != ipp->ip_zoneid))
				continue;

			count++;
			break;
		}
	}

	if (count) {
		/*
		 * Allocate memory for return buffer, to be freed by
		 * ibt_free_srcip_info().
		 */
		s_ip = kmem_alloc((count * sizeof (ibt_srcip_info_t)),
		    KM_SLEEP);

		*src_info_p = s_ip;
		*entries_p = count;

		j = 0;
		for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt;
		    i++, ipp++) {
			if (ipp->ip_inet_family == AF_UNSPEC)
				continue;
			if ((ipp->ip_port_gid.gid_prefix ==
			    sattr->sip_gid.gid_prefix) &&
			    (ipp->ip_port_gid.gid_guid ==
			    sattr->sip_gid.gid_guid)) {
				if ((sattr->sip_pkey) &&
				    (ipp->ip_pkey != sattr->sip_pkey))
					continue;

				if ((sattr->sip_zoneid != ALL_ZONES) &&
				    (sattr->sip_zoneid != ipp->ip_zoneid))
					continue;

				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*s_ip))
				s_ip[j].ip_addr.family = ipp->ip_inet_family;
				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*s_ip))
				if (s_ip[j].ip_addr.family == AF_INET) {
					bcopy(&ipp->ip_cm_sin.sin_addr,
					    &s_ip[j].ip_addr.un.ip4addr,
					    sizeof (in_addr_t));
				} else if (s_ip[j].ip_addr.family == AF_INET6) {
					bcopy(&ipp->ip_cm_sin6.sin6_addr,
					    &s_ip[j].ip_addr.un.ip6addr,
					    sizeof (in6_addr_t));
					/* TBD: scope_id */
				}
				IBCM_PRINT_IP("ibt_get_src_ip",
				    &s_ip[j].ip_addr);
				j++;
			}
		}
	} else {
		retval = IBT_SRC_IP_NOT_FOUND;
	}

get_src_ip_end:
	ibcm_arp_free_ibds(&ibds);
	return (retval);
}

/*
 * ibt_free_srcip_info()
 *	Free the memory allocated by successful ibt_get_src_ip()
 *
 *	src_info	Pointer returned by ibt_get_src_ip().
 *
 *	entries		The number of ibt_ip_addr_t entries to free.
 */
void
ibt_free_srcip_info(ibt_srcip_info_t *src_info, uint_t entries)
{
	IBTF_DPRINTF_L3(cmlog, "ibt_free_srcip_info: "
	    "Free <%d> entries from 0x%p", entries, src_info);

	if ((src_info != NULL) && (entries > 0))
		kmem_free(src_info, entries * sizeof (ibt_srcip_info_t));
	else
		IBTF_DPRINTF_L2(cmlog, "ibt_free_srcip_info: "
		    "ERROR: NULL buf pointer or ZERO length specified.");
}


ib_svc_id_t
ibt_get_ip_sid(uint8_t protocol_num, in_port_t dst_port)
{
	ib_svc_id_t	sid;

	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_sid(%X, %lX)", protocol_num,
	    dst_port);

	/*
	 * If protocol_num is non-zero, then formulate the SID and return it.
	 * If protocol_num is zero, then we need to assign a locally generated
	 * IP SID with IB_SID_IPADDR_PREFIX.
	 */
	if (protocol_num) {
		sid = IB_SID_IPADDR_PREFIX | protocol_num << 16 | dst_port;
	} else {
		sid = ibcm_alloc_ip_sid();
	}

	IBTF_DPRINTF_L3(cmlog, "ibt_get_ip_sid: SID: 0x%016llX", sid);
	return (sid);
}

ibt_status_t
ibt_release_ip_sid(ib_svc_id_t ip_sid)
{
	IBTF_DPRINTF_L4(cmlog, "ibt_release_ip_sid(%llX)", ip_sid);

	if (((ip_sid & IB_SID_IPADDR_PREFIX_MASK) != 0) ||
	    (!(ip_sid & IB_SID_IPADDR_PREFIX))) {
		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
		    "Called for Non-RDMA IP SID", ip_sid);
		return (IBT_INVALID_PARAM);
	}

	/*
	 * If protocol_num in ip_sid are all ZEROs, then this SID is allocated
	 * by IBTF. If not, then the specified ip_sid is invalid.
	 */
	if (ip_sid & IB_SID_IPADDR_IPNUM_MASK) {
		IBTF_DPRINTF_L2(cmlog, "ibt_release_ip_sid(0x%016llX): ERROR: "
		    "Called for Non-IBTF assigned RDMA IP SID", ip_sid);
		return (IBT_INVALID_PARAM);
	}

	ibcm_free_ip_sid(ip_sid);

	return (IBT_SUCCESS);
}


uint8_t
ibt_get_ip_protocol_num(ib_svc_id_t sid)
{
	return ((sid & IB_SID_IPADDR_IPNUM_MASK) >> 16);
}

in_port_t
ibt_get_ip_dst_port(ib_svc_id_t sid)
{
	return (sid & IB_SID_IPADDR_PORTNUM_MASK);
}

_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibt_ip_cm_info_t))
_NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_ip_pvtdata_t))

ibt_status_t
ibt_format_ip_private_data(ibt_ip_cm_info_t *ip_cm_info,
    ibt_priv_data_len_t priv_data_len, void *priv_data_p)
{
	ibcm_ip_pvtdata_t	ip_data;

	IBTF_DPRINTF_L4(cmlog, "ibt_format_ip_private_data(%p, %d, %p)",
	    ip_cm_info, priv_data_len, priv_data_p);

	if ((ip_cm_info == NULL) || (priv_data_p == NULL) ||
	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
		    "Invalid Inputs.");
		return (IBT_INVALID_PARAM);
	}

	bzero(&ip_data, sizeof (ibcm_ip_pvtdata_t));
	ip_data.ip_srcport = ip_cm_info->src_port; /* Source Port */

	IBCM_PRINT_IP("format_ip_pvt: src", &ip_cm_info->src_addr);
	IBCM_PRINT_IP("format_ip_pvt: dst", &ip_cm_info->dst_addr);
	/* IPV = 0x4, if IP-Addr are IPv4 format, else 0x6 for IPv6 */
	if (ip_cm_info->src_addr.family == AF_INET) {
		ip_data.ip_ipv = IBT_CM_IP_IPV_V4;
		ip_data.ip_srcv4 = ip_cm_info->src_addr.un.ip4addr;
		ip_data.ip_dstv4 = ip_cm_info->dst_addr.un.ip4addr;
	} else if (ip_cm_info->src_addr.family == AF_INET6) {
		ip_data.ip_ipv = IBT_CM_IP_IPV_V6;
		bcopy(&ip_cm_info->src_addr.un.ip6addr,
		    &ip_data.ip_srcv6, sizeof (in6_addr_t));
		bcopy(&ip_cm_info->dst_addr.un.ip6addr,
		    &ip_data.ip_dstv6, sizeof (in6_addr_t));
	} else {
		IBTF_DPRINTF_L2(cmlog, "ibt_format_ip_private_data: ERROR "
		    "IP Addr needs to be either AF_INET or AF_INET6 family.");
		return (IBT_INVALID_PARAM);
	}

	ip_data.ip_MajV = IBT_CM_IP_MAJ_VER;
	ip_data.ip_MinV = IBT_CM_IP_MIN_VER;

	bcopy(&ip_data, priv_data_p, IBT_IP_HDR_PRIV_DATA_SZ);

	return (IBT_SUCCESS);
}


ibt_status_t
ibt_get_ip_data(ibt_priv_data_len_t priv_data_len, void *priv_data,
    ibt_ip_cm_info_t *ip_cm_infop)
{
	ibcm_ip_pvtdata_t	ip_data;

	IBTF_DPRINTF_L4(cmlog, "ibt_get_ip_data(%d, %p, %p)",
	    priv_data_len, priv_data, ip_cm_infop);

	if ((ip_cm_infop == NULL) || (priv_data == NULL) ||
	    (priv_data_len < IBT_IP_HDR_PRIV_DATA_SZ)) {
		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR Invalid Inputs");
		return (IBT_INVALID_PARAM);
	}

	bcopy(priv_data, &ip_data, IBT_IP_HDR_PRIV_DATA_SZ);
	ip_cm_infop->src_port = ip_data.ip_srcport; /* Source Port */

	/* IPV = 0x4, if IP Address are IPv4 format, else 0x6 for IPv6 */
	if (ip_data.ip_ipv == IBT_CM_IP_IPV_V4) {
		/* Copy IPv4 Addr */
		ip_cm_infop->src_addr.family = ip_cm_infop->dst_addr.family =
		    AF_INET;
		ip_cm_infop->src_addr.un.ip4addr = ip_data.ip_srcv4;
		ip_cm_infop->dst_addr.un.ip4addr = ip_data.ip_dstv4;
	} else if (ip_data.ip_ipv == IBT_CM_IP_IPV_V6) {
		/* Copy IPv6 Addr */
		ip_cm_infop->src_addr.family = ip_cm_infop->dst_addr.family =
		    AF_INET6;
		bcopy(&ip_data.ip_srcv6, &ip_cm_infop->src_addr.un.ip6addr,
		    sizeof (in6_addr_t));
		bcopy(&ip_data.ip_dstv6, &ip_cm_infop->dst_addr.un.ip6addr,
		    sizeof (in6_addr_t));
	} else {
		IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_data: ERROR: IP Addr needs"
		    " to be either AF_INET or AF_INET6 family.");
		return (IBT_INVALID_PARAM);
	}
	IBCM_PRINT_IP("ibt_get_ip_data: src", &ip_cm_infop->src_addr);
	IBCM_PRINT_IP("ibt_get_ip_data: dst", &ip_cm_infop->dst_addr);

	return (IBT_SUCCESS);
}


/* Routines for warlock */

/* ARGSUSED */
static void
ibcm_dummy_mcg_handler(void *arg, ibt_status_t retval, ibt_mcg_info_t *minfo)
{
	ibcm_join_mcg_tqarg_t	dummy_mcg;

	dummy_mcg.func = ibcm_dummy_mcg_handler;

	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_mcg_handler: "
	    "dummy_mcg.func %p", dummy_mcg.func);
}


/* ARGSUSED */
static void
ibcm_dummy_recycle_rc_handler(ibt_status_t retval, void *arg)
{
	ibcm_taskq_recycle_arg_t	dummy_rc_recycle;

	dummy_rc_recycle.func = ibcm_dummy_recycle_rc_handler;

	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_recycle_rc_handler: "
	    "dummy_rc_recycle.func %p", dummy_rc_recycle.func);
}


/* ARGSUSED */
static ibt_cm_status_t
ibcm_dummy_ud_handler(void *priv, ibt_cm_ud_event_t *event,
    ibt_cm_ud_return_args_t *ret_args,
    void *priv_data, ibt_priv_data_len_t len)
{
	/*
	 * Let warlock see that ibcm_local_handler_s::actual_cm_handler
	 * points to this routine.
	 */
	ibcm_local_handler_t	p;
	ibcm_ud_state_data_t	dummy_ud;

	p.actual_cm_handler = ibcm_dummy_ud_handler;
	dummy_ud.ud_cm_handler = ibcm_dummy_ud_handler;

	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_ud_handler: p.actual_cm_handler %p"
	    "dummy_ud.ud_cm_handler %p", p.actual_cm_handler,
	    dummy_ud.ud_cm_handler);
	/*
	 * Call all routines that the client's callback routine could call.
	 */

	return (IBT_CM_ACCEPT);
}

/* ARGSUSED */
static ibt_cm_status_t
ibcm_dummy_rc_handler(void *priv, ibt_cm_event_t *event,
    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
{
	ibcm_state_data_t	dummy_rc;

	dummy_rc.cm_handler = ibcm_dummy_rc_handler;

	IBTF_DPRINTF_L5(cmlog, "ibcm_dummy_rc_handler: "
	    "dummy_ud.ud_cm_handler %p", dummy_rc.cm_handler);
	/*
	 * Call all routines that the client's callback routine could call.
	 */

	return (IBT_CM_ACCEPT);
}