changeset 929:e9eba56e751c

PSARC 2005/546 FMR Update for IBTF 6227237 IBCM blames the client for SM's fault 6281147 ibtl does not support fast memory registration (FMR) 6334921 tavor needs to support fast memory registration 6337636 tavor should use MSIs when possible
author srust
date Tue, 15 Nov 2005 19:50:27 -0800
parents 36d72fe4da29
children 1b624a2ec4bc
files usr/src/uts/common/io/ib/clients/ibd/ibd.c usr/src/uts/common/io/ib/ibtl/ibtl_hca.c usr/src/uts/common/io/ib/ibtl/ibtl_impl.c usr/src/uts/common/io/ib/ibtl/ibtl_mem.c usr/src/uts/common/io/ib/inc.flg usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c usr/src/uts/common/io/ib/mgt/ibmf/ibmf_impl.c usr/src/uts/common/rpc/rpcib.c usr/src/uts/common/sys/ib/ibtl/ibci.h usr/src/uts/common/sys/ib/ibtl/ibti_common.h usr/src/uts/common/sys/ib/ibtl/ibtl_status.h usr/src/uts/common/sys/ib/ibtl/ibtl_types.h usr/src/uts/common/sys/ib/ibtl/impl/ibtl.h usr/src/uts/sparc/ibtl/ibtl.wlcmd
diffstat 16 files changed, 356 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c	Tue Nov 15 19:50:27 2005 -0800
@@ -326,7 +326,7 @@
  *	    data).
  */
 static struct ibt_clnt_modinfo_s ibd_clnt_modinfo = {
-	IBTI_V1,
+	IBTI_V2,
 	IBT_NETWORK,
 	ibd_async_handler,
 	NULL,
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_hca.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_hca.c	Tue Nov 15 19:50:27 2005 -0800
@@ -20,7 +20,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -225,7 +225,8 @@
 	/* Make sure resources have been freed. */
 	if (hca_hdl->ha_qp_cnt | hca_hdl->ha_cq_cnt | hca_hdl->ha_eec_cnt |
 	    hca_hdl->ha_ah_cnt | hca_hdl->ha_mr_cnt | hca_hdl->ha_mw_cnt |
-	    hca_hdl->ha_pd_cnt) {
+	    hca_hdl->ha_pd_cnt | hca_hdl->ha_fmr_pool_cnt |
+	    hca_hdl->ha_ma_cnt) {
 		IBTF_DPRINTF_L2(ibtf_hca, "ibt_close_hca: "
 		    "some resources have not been freed by '%s': hca_hdl = %p",
 		    hca_hdl->ha_clnt_devp->clnt_modinfop->mi_clnt_name,
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c	Tue Nov 15 19:50:27 2005 -0800
@@ -236,11 +236,11 @@
 	/*
 	 * Validate the Transport API version.
 	 */
-	if (mod_infop->mi_ibt_version != IBTI_V1) {
+	if (mod_infop->mi_ibt_version != IBTI_V2) {
 		IBTF_DPRINTF_L1(ibtf, "ibt_attach: IB client '%s' has an "
 		    "invalid IB TI Version '%d'", mod_infop->mi_clnt_name,
 		    mod_infop->mi_ibt_version);
-		return (IBT_INVALID_PARAM);
+		return (IBT_NOT_SUPPORTED);
 	}
 
 	if (mod_infop->mi_async_handler == NULL) {
@@ -526,8 +526,9 @@
 	IBTF_DPRINTF_L2(ibtf, "ibc_attach(%p, %p)", ibc_hdl_p, info_p);
 
 	/* Validate the Transport API version */
-	if (info_p->hca_ci_vers != IBCI_V1) {
-		IBTF_DPRINTF_L1(ibtf, "ibc_attach: Invalid IB CI Version");
+	if (info_p->hca_ci_vers != IBCI_V2) {
+		IBTF_DPRINTF_L1(ibtf, "ibc_attach: Invalid IB CI Version '%d'",
+		    info_p->hca_ci_vers);
 		return (IBC_FAILURE);
 	}
 
@@ -1039,6 +1040,7 @@
 	case IBT_FAILURE_IBCM:
 	case IBT_FAILURE_IBDM:
 	case IBT_FAILURE_IBTL:
+	case IBT_FAILURE_IBSM:
 		ret = IBTL_ENA_POSSIBLE | (type << IBTL_TYPE_SHIFT);
 		break;
 	default:
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_mem.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/ibtl/ibtl_mem.c	Tue Nov 15 19:50:27 2005 -0800
@@ -20,7 +20,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -517,16 +517,25 @@
 ibt_status_t
 ibt_map_mem_area(ibt_hca_hdl_t hca_hdl, ibt_va_attr_t *va_attrs,
     uint_t paddr_list_len, ibt_phys_buf_t *paddr_list_p, uint_t *num_paddr_p,
-    ibt_ma_hdl_t *ma_hdl_p)
+    size_t *paddr_bufsz_p, ib_memlen_t *paddr_offset_p, ibt_ma_hdl_t *ma_hdl_p)
 {
+	ibt_status_t 	status;
+
 	IBTF_DPRINTF_L3(ibtl_mem, "ibt_map_mem_area(%p, %p, %d)",
 	    hca_hdl, va_attrs, paddr_list_len);
 
-	return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_map_mem_area(
+	status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_map_mem_area(
 	    IBTL_HCA2CIHCA(hca_hdl), va_attrs,
 	    NULL, /* IBTL_HCA2MODI_P(hca_hdl)->mi_reserved */
-	    paddr_list_len, paddr_list_p,
-	    num_paddr_p, ma_hdl_p));
+	    paddr_list_len, paddr_list_p, num_paddr_p,  paddr_bufsz_p,
+	    paddr_offset_p, ma_hdl_p);
+	if (status == IBT_SUCCESS) {
+		mutex_enter(&hca_hdl->ha_mutex);
+		hca_hdl->ha_ma_cnt++;
+		mutex_exit(&hca_hdl->ha_mutex);
+	}
+
+	return (status);
 }
 
 
@@ -546,11 +555,20 @@
 ibt_status_t
 ibt_unmap_mem_area(ibt_hca_hdl_t hca_hdl, ibt_ma_hdl_t ma_hdl)
 {
+	ibt_status_t 	status;
+
 	IBTF_DPRINTF_L3(ibtl_mem, "ibt_unmap_mem_area(%p, %p)",
 	    hca_hdl, ma_hdl);
 
-	return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_unmap_mem_area(
+	status = (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_unmap_mem_area(
 	    IBTL_HCA2CIHCA(hca_hdl), ma_hdl));
+	if (status == IBT_SUCCESS) {
+		mutex_enter(&hca_hdl->ha_mutex);
+		hca_hdl->ha_ma_cnt--;
+		mutex_exit(&hca_hdl->ha_mutex);
+	}
+
+	return (status);
 }
 
 
@@ -577,12 +595,21 @@
     uint_t phys_buf_list_sz, ibt_mr_hdl_t *mr_hdl_p,
     ibt_pmr_desc_t *mem_desc_p)
 {
+	ibt_status_t 	status;
+
 	IBTF_DPRINTF_L3(ibtl_mem, "ibt_alloc_lkey(%p, %p, 0x%X, %d)",
 	    hca_hdl, pd, flags, phys_buf_list_sz);
 
-	return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_alloc_lkey(
+	status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_alloc_lkey(
 	    IBTL_HCA2CIHCA(hca_hdl), pd, flags, phys_buf_list_sz, mr_hdl_p,
-	    mem_desc_p));
+	    mem_desc_p);
+	if (status == IBT_SUCCESS) {
+		mutex_enter(&hca_hdl->ha_mutex);
+		hca_hdl->ha_mr_cnt++;
+		mutex_exit(&hca_hdl->ha_mutex);
+	}
+
+	return (status);
 }
 
 
@@ -606,13 +633,22 @@
     ibt_pmr_attr_t *mem_pattr, ibt_mr_hdl_t *mr_hdl_p,
     ibt_pmr_desc_t *mem_desc_p)
 {
+	ibt_status_t 	status;
+
 	IBTF_DPRINTF_L3(ibtl_mem, "ibt_register_phys_mr(%p, %p, %p)",
 	    hca_hdl, pd, mem_pattr);
 
-	return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_register_physical_mr(
+	status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_register_physical_mr(
 	    IBTL_HCA2CIHCA(hca_hdl), pd, mem_pattr,
 	    NULL, /* IBTL_HCA2MODI_P(hca_hdl)->mi_reserved */
-	    mr_hdl_p, mem_desc_p));
+	    mr_hdl_p, mem_desc_p);
+	if (status == IBT_SUCCESS) {
+		mutex_enter(&hca_hdl->ha_mutex);
+		hca_hdl->ha_mr_cnt++;
+		mutex_exit(&hca_hdl->ha_mutex);
+	}
+
+	return (status);
 }
 
 
@@ -637,11 +673,148 @@
     ibt_pd_hdl_t pd, ibt_pmr_attr_t *mem_pattr, ibt_mr_hdl_t *mr_hdl_p,
     ibt_pmr_desc_t *mem_desc_p)
 {
+	ibt_status_t 	status;
+
 	IBTF_DPRINTF_L3(ibtl_mem, "ibt_reregister_phys_mr(%p, %p, %p, %p)",
 	    hca_hdl, mr_hdl, pd, mem_pattr);
 
-	return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_reregister_physical_mr(
+	status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_reregister_physical_mr(
 	    IBTL_HCA2CIHCA(hca_hdl), mr_hdl, pd, mem_pattr,
 	    NULL, /* IBTL_HCA2MODI_P(hca_hdl)->mi_reserved */
+	    mr_hdl_p, mem_desc_p);
+
+	if (!(status == IBT_SUCCESS || status == IBT_MR_IN_USE ||
+	    status == IBT_HCA_HDL_INVALID || status == IBT_MR_HDL_INVALID)) {
+		IBTF_DPRINTF_L2(ibtl_mem, "ibt_reregister_phys_mr: "
+		    "Re-registration Mem Failed: %d", status);
+
+		/* we lost one memory region resource */
+		mutex_enter(&hca_hdl->ha_mutex);
+		hca_hdl->ha_mr_cnt--;
+		mutex_exit(&hca_hdl->ha_mutex);
+
+	}
+	return (status);
+}
+
+
+/*
+ * Fast Memory Registration (FMR).
+ *
+ * ibt_create_fmr_pool
+ *      Not fast-path.
+ *      ibt_create_fmr_pool() verifies that the HCA supports FMR and allocates
+ *      and initializes an "FMR pool".  This pool contains state specific to
+ *      this registration, including the watermark setting to determine when
+ *      to sync, and the total number of FMR regions available within this pool.
+ *
+ */
+ibt_status_t
+ibt_create_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_pd_hdl_t pd,
+    ibt_fmr_pool_attr_t *fmr_params, ibt_fmr_pool_hdl_t *fmr_pool_p)
+{
+	ibt_status_t 		status;
+
+	IBTF_DPRINTF_L3(ibtl_mem, "ibt_create_fmr_pool(%p, %p, %p)",
+	    hca_hdl, pd, fmr_params);
+
+	status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_create_fmr_pool(
+	    IBTL_HCA2CIHCA(hca_hdl), pd, fmr_params, fmr_pool_p);
+	if (status != IBT_SUCCESS) {
+		*fmr_pool_p = NULL;
+		return (status);
+	}
+
+	/* Update the FMR resource count */
+	mutex_enter(&hca_hdl->ha_mutex);
+	hca_hdl->ha_fmr_pool_cnt++;
+	mutex_exit(&hca_hdl->ha_mutex);
+
+	return (status);
+}
+
+
+/*
+ * ibt_destroy_fmr_pool
+ *      ibt_destroy_fmr_pool() deallocates all of the FMR regions in a specific
+ *      pool.  All state and information regarding the pool are destroyed and
+ *      returned as free space once again.  No more use of FMR regions in this
+ *      pool are possible without a subsequent call to ibt_create_fmr_pool().
+ */
+ibt_status_t
+ibt_destroy_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_fmr_pool_hdl_t fmr_pool)
+{
+	ibt_status_t 	status;
+
+	IBTF_DPRINTF_L3(ibtl_mem, "ibt_destroy_fmr_pool(%p, %p)",
+	    hca_hdl, fmr_pool);
+
+	status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_destroy_fmr_pool(
+	    IBTL_HCA2CIHCA(hca_hdl), fmr_pool);
+	if (status != IBT_SUCCESS) {
+		IBTF_DPRINTF_L2(ibtl_mem, "ibt_destroy_fmr_pool: "
+		    "CI FMR Pool destroy failed (%d)", status);
+		return (status);
+	}
+
+	mutex_enter(&hca_hdl->ha_mutex);
+	hca_hdl->ha_fmr_pool_cnt--;
+	mutex_exit(&hca_hdl->ha_mutex);
+
+	return (status);
+}
+
+/*
+ * ibt_flush_fmr_pool
+ *      ibt_flush_fmr_pool forces a flush to occur.  At the client's request,
+ *      any unmapped FMR regions (See 'ibt_deregister_mr())') are returned to
+ *      a free state.  This function allows for an asynchronous cleanup of
+ *      formerly used FMR regions.  Sync operation is also performed internally
+ *      by HCA driver, when 'watermark' settings for the number of free FMR
+ *      regions left in the "pool" is reached.
+ */
+ibt_status_t
+ibt_flush_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_fmr_pool_hdl_t fmr_pool)
+{
+	IBTF_DPRINTF_L3(ibtl_mem, "ibt_flush_fmr_pool(%p, %p)",
+	    hca_hdl, fmr_pool);
+
+	return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_flush_fmr_pool(
+	    IBTL_HCA2CIHCA(hca_hdl), fmr_pool));
+}
+
+/*
+ * ibt_register_physical_fmr
+ *      ibt_register_physical_fmr() assigns a "free" entry from the FMR Pool.
+ *      It first consults the "FMR cache" to see if this is a duplicate memory
+ *      registration to something already in use.  If not, then a free entry
+ *      in the "pool" is marked used.
+ */
+ibt_status_t
+ibt_register_physical_fmr(ibt_hca_hdl_t hca_hdl, ibt_fmr_pool_hdl_t fmr_pool,
+    ibt_pmr_attr_t *mem_pattr, ibt_mr_hdl_t *mr_hdl_p,
+    ibt_pmr_desc_t *mem_desc_p)
+{
+	IBTF_DPRINTF_L3(ibtl_mem, "ibt_register_physical_fmr(%p, %p, %p, %p)",
+	    hca_hdl, fmr_pool, mem_pattr, mem_desc_p);
+
+	return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_register_physical_fmr(
+	    IBTL_HCA2CIHCA(hca_hdl), fmr_pool, mem_pattr,
+	    NULL, /* IBTL_HCA2MODI_P(hca_hdl)->mi_reserved */
 	    mr_hdl_p, mem_desc_p));
 }
+
+/*
+ * ibt_deregister_fmr
+ *	The ibt_deregister_fmr un-maps the resources reserved from the FMR
+ *	pool by ibt_register_physical_fmr().   The ibt_deregister_fmr() will
+ *	mark the region as free in the FMR Pool.
+ */
+ibt_status_t
+ibt_deregister_fmr(ibt_hca_hdl_t hca, ibt_mr_hdl_t mr_hdl)
+{
+	IBTF_DPRINTF_L3(ibtl_mem, "ibt_deregister_fmr(%p, %p)", hca, mr_hdl);
+
+	return (IBTL_HCA2CIHCAOPS_P(hca)->ibc_deregister_fmr(
+	    IBTL_HCA2CIHCA(hca), mr_hdl));
+}
--- a/usr/src/uts/common/io/ib/inc.flg	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/inc.flg	Tue Nov 15 19:50:27 2005 -0800
@@ -73,7 +73,9 @@
 	usr/src/uts/intel/ibmf		\
 	usr/src/uts/intel/ibtl		\
 	usr/src/uts/intel/tavor		\
-	usr/src/uts/intel/daplt
+	usr/src/uts/intel/ibd		\
+	usr/src/uts/intel/daplt		\
+	usr/src/uts/intel/rpcib	
 
 # packaging files
 find_files "s.*"	\
@@ -120,6 +122,8 @@
 echo_file  usr/src/uts/intel/Makefile.intel
 echo_file  usr/src/uts/intel/Makefile.targ
 echo_file  usr/src/uts/intel/ia32/ml/ia32.il
+echo_file  usr/src/uts/sun4/Makefile.files
+echo_file  usr/src/uts/sun4/Makefile.rules
 echo_file  usr/src/uts/sun4u/Makefile
 echo_file  usr/src/uts/sun4u/Makefile.files
 echo_file  usr/src/uts/sun4u/Makefile.rules
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c	Tue Nov 15 19:50:27 2005 -0800
@@ -170,7 +170,7 @@
     ibcm_ud_state_data_s::ud_timeout_next))
 
 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
-	IBTI_V1,
+	IBTI_V2,
 	IBT_CM,
 	ibcm_async_handler,
 	NULL,
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c	Tue Nov 15 19:50:27 2005 -0800
@@ -2011,13 +2011,13 @@
 		IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: Path's Packet "
 		    "LifeTime too high %d, Maximum allowed %d IB Time (4 sec)",
 		    prec_resp->PacketLifeTime, ibcm_max_ib_pkt_lt);
-		return (IBT_PATH_PKT_LT_TOO_HIGH);
+		return (ibt_get_module_failure(IBT_FAILURE_IBSM, 0));
 	}
 
 	if ((prec_resp->Mtu > IB_MTU_4K) || (prec_resp->Mtu < IB_MTU_256)) {
 		IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: MTU (%d) from "
 		    "pathrecord is invalid, reject it.", prec_resp->Mtu);
-		return (ibt_get_module_failure(IBT_FAILURE_IBCM, 0));
+		return (ibt_get_module_failure(IBT_FAILURE_IBSM, 0));
 	}
 
 	/* Source Node Information. */
@@ -2030,7 +2030,7 @@
 			/* Failed to get pkey_index from pkey */
 			IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: "
 			    "Pkey2Index conversion failed: %d", retval);
-			return (retval);
+			return (ibt_get_module_failure(IBT_FAILURE_IBSM, 0));
 		}
 		cep_p->cep_adds_vect.av_sgid_ix = hport->hp_sgid_ix;
 		cep_p->cep_adds_vect.av_src_path =
@@ -2049,7 +2049,8 @@
 					IBTF_DPRINTF_L2(cmlog,
 					    "ibcm_update_cep_info: Pkey2Index "
 					    "conversion failed: %d", retval);
-					return (retval);
+					return (ibt_get_module_failure(
+					    IBT_FAILURE_IBSM, 0));
 				}
 
 				cep_p->cep_adds_vect.av_sgid_ix =
@@ -2078,7 +2079,7 @@
 	default:
 		IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: SRate (%d) from "
 		    "pathrecord is invalid, reject it.", prec_resp->Rate);
-		return (IBT_STATIC_RATE_INVALID);
+		return (ibt_get_module_failure(IBT_FAILURE_IBSM, 0));
 	}
 	/*
 	 * If both Source and Destination GID prefix are same, then GRH is not
--- a/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c	Tue Nov 15 19:50:27 2005 -0800
@@ -159,7 +159,7 @@
 };
 
 static ibt_clnt_modinfo_t ibdm_ibt_modinfo = {
-	IBTI_V1,
+	IBTI_V2,
 	IBT_DM,
 	ibdm_event_hdlr,
 	NULL,
--- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_impl.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_impl.c	Tue Nov 15 19:50:27 2005 -0800
@@ -168,7 +168,7 @@
 	    IBMF_TNF_TRACE, "", "ibmf_init() enter\n");
 
 	/* setup the IBT module information */
-	ibmf_statep->ibmf_ibt_modinfo.mi_ibt_version = IBTI_V1;
+	ibmf_statep->ibmf_ibt_modinfo.mi_ibt_version = IBTI_V2;
 	ibmf_statep->ibmf_ibt_modinfo.mi_clnt_class = IBT_IBMA;
 	ibmf_statep->ibmf_ibt_modinfo.mi_async_handler
 	    = ibmf_ibt_async_handler;
--- a/usr/src/uts/common/rpc/rpcib.c	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/rpc/rpcib.c	Tue Nov 15 19:50:27 2005 -0800
@@ -291,7 +291,7 @@
  * Registration with IBTF as a consumer
  */
 static struct ibt_clnt_modinfo_s rib_modinfo = {
-	IBTI_V1,
+	IBTI_V2,
 	IBT_GENERIC,
 	rib_async_handler,	/* async event handler */
 	NULL,			/* Memory Region Handler */
--- a/usr/src/uts/common/sys/ib/ibtl/ibci.h	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/sys/ib/ibtl/ibci.h	Tue Nov 15 19:50:27 2005 -0800
@@ -66,6 +66,7 @@
 typedef struct ibc_cq_s		*ibc_cq_hdl_t;	/* Completion Queue Handle */
 typedef struct ibc_eec_s	*ibc_eec_hdl_t; /* End-to-End Context Handle */
 
+#define	ibc_fmr_pool_hdl_t	ibt_fmr_pool_hdl_t /* FMR Pool Handle */
 #define	ibc_mr_hdl_t	ibt_mr_hdl_t	/* Memory Region Handle */
 #define	ibc_mw_hdl_t	ibt_mw_hdl_t	/* Memory Window Handle */
 #define	ibc_ma_hdl_t	ibt_ma_hdl_t	/* Memory Area Handle */
@@ -149,6 +150,7 @@
 #define	hca_max_ether_qp	hca_max_ether_chan
 #define	hca_eec_max_ci_priv_sz	hca_opaque7
 #define	hca_rdd_max_ci_priv_sz	hca_opaque8
+#define	hca_max_map_per_fmr	hca_opaque9
 
 
 /*
@@ -159,7 +161,8 @@
 
 /* Channel Interface version */
 typedef enum ibc_version_e {
-	IBCI_V1		= 1
+	IBCI_V1		= 1,
+	IBCI_V2		= 2		/* FMR Support */
 } ibc_version_t;
 
 
@@ -360,7 +363,8 @@
 	ibt_status_t (*ibc_map_mem_area)(ibc_hca_hdl_t hca_hdl,
 	    ibt_va_attr_t *va_attrs, void *ibtl_reserved,
 	    uint_t paddr_list_len, ibt_phys_buf_t *paddr_list_p,
-	    uint_t *num_paddr_p, ibc_ma_hdl_t *ibc_ma_hdl_p);
+	    uint_t *num_paddr_p, size_t *paddr_bufsz_p,
+	    ib_memlen_t *paddr_offset_p, ibc_ma_hdl_t *ma_hdl_p);
 	ibt_status_t (*ibc_unmap_mem_area)(ibc_hca_hdl_t hca_hdl,
 	    ibc_ma_hdl_t ma_hdl);
 
@@ -377,6 +381,19 @@
 	    ibc_mr_hdl_t mr, ibc_pd_hdl_t pd, ibt_pmr_attr_t *mem_pattr,
 	    void *ibtl_reserved, ibc_mr_hdl_t *mr_p,
 	    ibt_pmr_desc_t *mem_desc_p);
+
+	/* Fast Memory Registration (FMR) */
+	ibt_status_t (*ibc_create_fmr_pool)(ibc_hca_hdl_t hca, ibc_pd_hdl_t pd,
+	    ibt_fmr_pool_attr_t *fmr_params, ibc_fmr_pool_hdl_t *fmr_pool_p);
+	ibt_status_t (*ibc_destroy_fmr_pool)(ibc_hca_hdl_t hca,
+	    ibc_fmr_pool_hdl_t fmr_pool);
+	ibt_status_t (*ibc_flush_fmr_pool)(ibc_hca_hdl_t hca,
+	    ibc_fmr_pool_hdl_t fmr_pool);
+	ibt_status_t (*ibc_register_physical_fmr)(ibc_hca_hdl_t hca,
+	    ibc_fmr_pool_hdl_t fmr_pool, ibt_pmr_attr_t *mem_pattr,
+	    void *ibtl_reserved, ibc_mr_hdl_t *mr_hdl_p,
+	    ibt_pmr_desc_t *mem_desc_p);
+	ibt_status_t (*ibc_deregister_fmr)(ibc_hca_hdl_t hca, ibc_mr_hdl_t mr);
 } ibc_operations_t;
 
 
--- a/usr/src/uts/common/sys/ib/ibtl/ibti_common.h	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/sys/ib/ibtl/ibti_common.h	Tue Nov 15 19:50:27 2005 -0800
@@ -100,7 +100,8 @@
 
 /* Transport Interface version */
 typedef enum ibt_version_e {
-	IBTI_V1 = 1
+	IBTI_V1 = 1,
+	IBTI_V2 = 2		/* FMR Support */
 } ibt_version_t;
 
 /*
@@ -1322,7 +1323,7 @@
  */
 ibt_status_t ibt_map_mem_area(ibt_hca_hdl_t hca_hdl, ibt_va_attr_t *va_attrs,
     uint_t paddr_list_len, ibt_phys_buf_t *paddr_list_p, uint_t *num_paddr_p,
-    ibt_ma_hdl_t *ma_hdl_p);
+    size_t *paddr_bufsz_p, ib_memlen_t *paddr_offset_p, ibt_ma_hdl_t *ma_hdl_p);
 
 /*
  * ibt_unmap_mem_area()
@@ -1679,6 +1680,57 @@
 ibt_status_t ibt_get_port_state_byguid(ib_guid_t hca_guid, uint8_t port,
     ib_gid_t *sgid_p, ib_lid_t *base_lid_p);
 
+
+/*
+ * Fast Memory Registration (FMR).
+ *
+ * ibt_create_fmr_pool
+ *	Not fast-path.
+ *	ibt_create_fmr_pool() verifies that the HCA supports FMR and allocates
+ *	and initializes an "FMR pool".  This pool contains state specific to
+ *	this registration, including the watermark setting to determine when
+ *	to sync, and the total number of FMR regions available within this pool.
+ *
+ * ibt_destroy_fmr_pool
+ *	ibt_destroy_fmr_pool() deallocates all of the FMR regions in a specific
+ *	pool.  All state and information regarding the pool are destroyed and
+ *	returned as free space once again.  No more use of FMR regions in this
+ *	pool are possible without a subsequent call to ibt_create_fmr_pool().
+ *
+ * ibt_flush_fmr_pool
+ *	ibt_flush_fmr_pool forces a flush to occur.  At the client's request,
+ *	any unmapped FMR regions (See 'ibt_deregister_mr())') are returned to
+ *	a free state.  This function allows for an asynchronous cleanup of
+ *	formerly used FMR regions.  Sync operation is also performed internally
+ *	by HCA driver, when 'watermark' settings for the number of free FMR
+ *	regions left in the "pool" is reached.
+ *
+ * ibt_register_physical_fmr
+ *	ibt_register_physical_fmr() assigns a "free" entry from the FMR Pool.
+ *	It first consults the "FMR cache" to see if this is a duplicate memory
+ *	registration to something already in use.  If not, then a free entry
+ *	in the "pool" is marked used.
+ *
+ * ibt_deregister_fmr
+ *	The ibt_deregister_fmr un-maps the resources reserved from the FMR
+ *	pool by ibt_register_physical_fmr().   The ibt_deregister_fmr() will
+ *	mark the region as free in the FMR Pool.
+ */
+ibt_status_t ibt_create_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_pd_hdl_t pd,
+    ibt_fmr_pool_attr_t *fmr_params, ibt_fmr_pool_hdl_t *fmr_pool_p);
+
+ibt_status_t ibt_destroy_fmr_pool(ibt_hca_hdl_t hca_hdl,
+    ibt_fmr_pool_hdl_t fmr_pool);
+
+ibt_status_t ibt_flush_fmr_pool(ibt_hca_hdl_t hca_hdl,
+    ibt_fmr_pool_hdl_t fmr_pool);
+
+ibt_status_t ibt_register_physical_fmr(ibt_hca_hdl_t hca_hdl,
+    ibt_fmr_pool_hdl_t fmr_pool, ibt_pmr_attr_t *mem_pattr,
+    ibt_mr_hdl_t *mr_hdl_p, ibt_pmr_desc_t *mem_desc_p);
+
+ibt_status_t ibt_deregister_fmr(ibt_hca_hdl_t hca, ibt_mr_hdl_t mr_hdl);
+
 #ifdef __cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h	Tue Nov 15 19:50:27 2005 -0800
@@ -20,7 +20,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -155,9 +155,12 @@
 	IBT_HCA_CNTR_VAL_INVALID	= 205,	/* Invalid Counter value */
 	IBT_HCA_PORT_NOT_ACTIVE		= 206,	/* Port is down */
 	IBT_HCA_SRQ_NOT_SUPPORTED	= 207,	/* Shared Receive Queue */
-	IBT_HCA_RESIZE_SRQ_NOT_SUPPORTED = 208,	/* Not supported */
-	IBT_HCA_PAGE_MODE		= 209,	/* HCA operating in page mode */
-	IBT_HCA_BLOCK_MODE		= 210,	/* HCA in block mode */
+						/* not supported */
+	IBT_HCA_RESIZE_SRQ_NOT_SUPPORTED = 208,	/* SRQ Resize not supported */
+	IBT_HCA_PAGE_MODE		= 209,	/* Not opened in page mode */
+	IBT_HCA_BLOCK_MODE		= 210,	/* HCA does not support Block */
+						/* mode or Not opened in */
+						/* Block mode */
 	IBT_HCA_BMM_NOT_SUPPORTED	= 211,	/* Base Memory Management */
 						/* Extensions not supported */
 	IBT_HCA_BQM_NOT_SUPPORTED	= 212,	/* Base Queue Management */
@@ -170,6 +173,8 @@
 
 	IBT_HCA_LIF_NOT_SUPPORTED	= 216,	/* Local Invalidate Fencing */
 						/* not supported */
+	IBT_HCA_FMR_NOT_SUPPORTED	= 217,	/* Fast Memory Registration */
+						/* not supported */
 	/*
 	 * Address errors
 	 */
@@ -247,6 +252,7 @@
 	IBT_MW_HDL_INVALID 		= 809,	/* Invalid Memory Window */
 						/* Handle */
 	IBT_MW_TYPE_INVALID		= 810,
+	IBT_MA_HDL_INVALID		= 811,  /* Invalid Memory Area Hdl */
 
 	/*
 	 * Multicast errors
@@ -277,8 +283,14 @@
 	IBT_SRQ_SZ_INSUFFICIENT		= 1203,	/* The Size of the WQ is too */
 						/* small, there are more */
 						/* outstanding entries than */
-	IBT_SRQ_IN_USE			= 1204	/* SRQ Still has QPs */
+	IBT_SRQ_IN_USE			= 1204,	/* SRQ Still has QPs */
 						/* associated with it */
+	/*
+	 * FMR Errors
+	 */
+	IBT_FMR_POOL_HDL_INVALID	= 1300,	/* Invalid FMR Pool handle */
+	IBT_FMR_POOL_IN_USE		= 1301,	/* FMR Pool in use. */
+	IBT_PBL_TOO_SMALL		= 1302
 
 } ibt_status_t;
 
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h	Tue Nov 15 19:50:27 2005 -0800
@@ -56,6 +56,7 @@
 typedef	struct	ibcm_svc_info_s	*ibt_srv_hdl_t;	    /* ibt_register_service() */
 typedef	struct	ibcm_svc_bind_s	*ibt_sbind_hdl_t;   /* ibt_bind_service() */
 
+typedef	struct	ibc_fmr_pool_s	*ibt_fmr_pool_hdl_t; /* ibt_create_fmr_pool() */
 typedef	struct	ibc_ma_s	*ibt_ma_hdl_t;	    /* ibt_map_mem_area() */
 typedef	struct	ibc_pd_s	*ibt_pd_hdl_t;	    /* ibt_alloc_pd() */
 typedef	struct	ibc_sched_s	*ibt_sched_hdl_t;   /* ibt_alloc_cq_sched() */
@@ -307,7 +308,8 @@
 	IBT_HCA_BASE_QUEUE_MGT	= 1 << 24,	/* Base Queue Mgt supported? */
 	IBT_HCA_CKSUM_FULL	= 1 << 25,	/* Checksum offload supported */
 	IBT_HCA_MEM_WIN_TYPE_2B	= 1 << 26,	/* Type 2B memory windows */
-	IBT_HCA_PHYS_BUF_BLOCK	= 1 << 27	/* Block mode phys buf lists */
+	IBT_HCA_PHYS_BUF_BLOCK	= 1 << 27,	/* Block mode phys buf lists */
+	IBT_HCA_FMR		= 1 << 28	/* FMR Support */
 } ibt_hca_flags_t;
 
 /*
@@ -428,6 +430,8 @@
 	size_t		hca_block_sz_hi;	/* supported by the HCA */
 	uint_t		hca_max_cq_handlers;
 	ibt_lkey_t	hca_reserved_lkey;
+	uint_t		hca_max_fmrs;		/* Max FMR Supported */
+	uint_t		hca_opaque9;
 } ibt_hca_attr_t;
 
 /*
@@ -733,7 +737,8 @@
 	IBT_MR_REMOTE_ATOMIC		= (1 << 4),
 	IBT_MR_ZERO_BASED_VA		= (1 << 5),
 	IBT_MR_CONSUMER_OWNED_KEY	= (1 << 6),
-	IBT_MR_SHARED			= (1 << 7)
+	IBT_MR_SHARED			= (1 << 7),
+	IBT_MR_FMR			= (1 << 8)
 } ibt_mr_attr_flags_t;
 
 /* Memory region physical descriptor. */
@@ -802,9 +807,12 @@
 	ibt_lkey_t	pmr_lkey;	/* Reregister only */
 	ibt_rkey_t	pmr_rkey;	/* Reregister only */
 	uint8_t		pmr_key;	/* Key to use on new Lkey & Rkey */
-	uint_t		pmr_num_buf;	/* Num of entries in the mr_buf_list */
+	uint_t		pmr_num_buf;	/* Num of entries in the pmr_buf_list */
+	size_t		pmr_buf_sz;
 	ibt_phys_buf_t	*pmr_buf_list;	/* List of physical buffers accessed */
 					/* as an array */
+	ibt_ma_hdl_t	pmr_ma;		/* Memory handle used to obtain the */
+					/* pmr_buf_list */
 } ibt_pmr_attr_t;
 
 
@@ -873,7 +881,11 @@
  * Flags for Virtual Address to HCA Physical Address translation.
  */
 typedef enum ibt_va_flags_e {
-	IBT_VA_NO_FLAGS		= 0
+	IBT_VA_SLEEP		= 0,
+	IBT_VA_NOSLEEP		= (1 << 0),
+	IBT_VA_NONCOHERENT	= (1 << 1),
+	IBT_VA_FMR		= (1 << 2),
+	IBT_VA_BLOCK_MODE	= (1 << 3)
 } ibt_va_flags_t;
 
 
@@ -883,9 +895,33 @@
 	ib_memlen_t	va_len;		/* Length of region to register */
 	struct as	*va_as;		/* A pointer to an address space */
 					/* structure. */
+	size_t		va_phys_buf_min;
+	size_t		va_phys_buf_max;
 	ibt_va_flags_t	va_flags;
 } ibt_va_attr_t;
 
+
+/*
+ * Fast Memory Registration (FMR) support.
+ */
+
+/* FMR flush function handler. */
+typedef void (*ibt_fmr_flush_handler_t)(ibt_fmr_pool_hdl_t fmr_pool,
+    void *fmr_func_arg);
+
+/* FMR Pool create attributes. */
+typedef struct ibt_fmr_pool_attr_s {
+	uint_t			fmr_max_pages_per_fmr;
+	uint_t			fmr_pool_size;
+	uint_t			fmr_dirty_watermark;
+	size_t			fmr_page_sz;
+	boolean_t		fmr_cache;
+	ibt_mr_flags_t		fmr_flags;
+	ibt_fmr_flush_handler_t	fmr_func_hdlr;
+	void			*fmr_func_arg;
+} ibt_fmr_pool_attr_t;
+
+
 /*
  * WORK REQUEST AND WORK REQUEST COMPLETION DEFINITIONS.
  */
@@ -1286,7 +1322,8 @@
 	IBT_FAILURE_IBMF,
 	IBT_FAILURE_IBTL,
 	IBT_FAILURE_IBCM,
-	IBT_FAILURE_IBDM
+	IBT_FAILURE_IBDM,
+	IBT_FAILURE_IBSM
 } ibt_failure_type_t;
 
 #ifdef	__cplusplus
--- a/usr/src/uts/common/sys/ib/ibtl/impl/ibtl.h	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/common/sys/ib/ibtl/impl/ibtl.h	Tue Nov 15 19:50:27 2005 -0800
@@ -20,7 +20,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -206,6 +206,8 @@
 	uint32_t		ha_srq_cnt;	/* SRQ resource counter */
 	ibtl_async_flags_t	ha_async_flags;	/* see *_async_flags above */
 	uint32_t		ha_async_cnt;	/* #asyncs in progress */
+	uint32_t		ha_fmr_pool_cnt; /* FMR Pool resource count */
+	uint32_t		ha_ma_cnt;	/* Mem Area resource count */
 } ibtl_hca_t;
 
 /* ha_flags values */
--- a/usr/src/uts/sparc/ibtl/ibtl.wlcmd	Tue Nov 15 17:06:33 2005 -0800
+++ b/usr/src/uts/sparc/ibtl/ibtl.wlcmd	Tue Nov 15 19:50:27 2005 -0800
@@ -53,8 +53,11 @@
 root	ibt_unmap_mem_area
 root	ibt_register_phys_mr
 root	ibt_reregister_phys_mr
-root	ibc_async_handler
-root	ibc_cq_handler
+root	ibt_create_fmr_pool
+root	ibt_destroy_fmr_pool
+root	ibt_flush_fmr_pool
+root	ibt_register_physical_fmr
+root	ibt_deregister_fmr
 root	ibt_enable_cq_notify
 root	ibt_set_cq_handler
 root	ibt_alloc_qp
@@ -155,6 +158,7 @@
 root	ibc_detach
 root	ibc_async_handler
 root	ibc_cq_handler
+root	ibc_fmr_flush_handler
 
 # IBTF Debug entry points
 root	ibtl_dprintf_intr
@@ -274,8 +278,14 @@
 add	ibc_operations_s::ibc_alloc_lkey		targets warlock_dummy
 add	ibc_operations_s::ibc_register_physical_mr	targets warlock_dummy
 add	ibc_operations_s::ibc_reregister_physical_mr	targets warlock_dummy
+add	ibc_operations_s::ibc_create_fmr_pool		targets warlock_dummy
+add	ibc_operations_s::ibc_destroy_fmr_pool		targets warlock_dummy
+add	ibc_operations_s::ibc_flush_fmr_pool		targets warlock_dummy
+add	ibc_operations_s::ibc_register_physical_fmr	targets warlock_dummy
+add	ibc_operations_s::ibc_deregister_fmr		targets warlock_dummy
 add	ibt_clnt_modinfo_s::mi_async_handler		targets	warlock_dummy
 add	ibtl_handlers.c:ibtl_async_client_call/async_handler	targets	warlock_dummy
 add	ibtl_handlers.c:ibtl_cq_handler_call/cq_handler	targets	warlock_dummy
+add	ibtl_handlers.c:ibtl_fmr_flush_handler_call/fmr_handler	targets	warlock_dummy
 add	ibtl_handlers.c:ibtl_sm_notice_task/sm_notice_handler	targets	warlock_dummy
 add	ibtl_mgr_s::mgr_async_handler			targets	warlock_dummy