Mercurial > illumos > illumos-gate
changeset 929:e9eba56e751c
PSARC 2005/546 FMR Update for IBTF
6227237 IBCM blames the client for SM's fault
6281147 ibtl does not support fast memory registration (FMR)
6334921 tavor needs to support fast memory registration
6337636 tavor should use MSIs when possible
line wrap: on
line diff
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c Tue Nov 15 19:50:27 2005 -0800 @@ -326,7 +326,7 @@ * data). */ static struct ibt_clnt_modinfo_s ibd_clnt_modinfo = { - IBTI_V1, + IBTI_V2, IBT_NETWORK, ibd_async_handler, NULL,
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_hca.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_hca.c Tue Nov 15 19:50:27 2005 -0800 @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -225,7 +225,8 @@ /* Make sure resources have been freed. */ if (hca_hdl->ha_qp_cnt | hca_hdl->ha_cq_cnt | hca_hdl->ha_eec_cnt | hca_hdl->ha_ah_cnt | hca_hdl->ha_mr_cnt | hca_hdl->ha_mw_cnt | - hca_hdl->ha_pd_cnt) { + hca_hdl->ha_pd_cnt | hca_hdl->ha_fmr_pool_cnt | + hca_hdl->ha_ma_cnt) { IBTF_DPRINTF_L2(ibtf_hca, "ibt_close_hca: " "some resources have not been freed by '%s': hca_hdl = %p", hca_hdl->ha_clnt_devp->clnt_modinfop->mi_clnt_name,
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c Tue Nov 15 19:50:27 2005 -0800 @@ -236,11 +236,11 @@ /* * Validate the Transport API version. */ - if (mod_infop->mi_ibt_version != IBTI_V1) { + if (mod_infop->mi_ibt_version != IBTI_V2) { IBTF_DPRINTF_L1(ibtf, "ibt_attach: IB client '%s' has an " "invalid IB TI Version '%d'", mod_infop->mi_clnt_name, mod_infop->mi_ibt_version); - return (IBT_INVALID_PARAM); + return (IBT_NOT_SUPPORTED); } if (mod_infop->mi_async_handler == NULL) { @@ -526,8 +526,9 @@ IBTF_DPRINTF_L2(ibtf, "ibc_attach(%p, %p)", ibc_hdl_p, info_p); /* Validate the Transport API version */ - if (info_p->hca_ci_vers != IBCI_V1) { - IBTF_DPRINTF_L1(ibtf, "ibc_attach: Invalid IB CI Version"); + if (info_p->hca_ci_vers != IBCI_V2) { + IBTF_DPRINTF_L1(ibtf, "ibc_attach: Invalid IB CI Version '%d'", + info_p->hca_ci_vers); return (IBC_FAILURE); } @@ -1039,6 +1040,7 @@ case IBT_FAILURE_IBCM: case IBT_FAILURE_IBDM: case IBT_FAILURE_IBTL: + case IBT_FAILURE_IBSM: ret = IBTL_ENA_POSSIBLE | (type << IBTL_TYPE_SHIFT); break; default:
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_mem.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_mem.c Tue Nov 15 19:50:27 2005 -0800 @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -517,16 +517,25 @@ ibt_status_t ibt_map_mem_area(ibt_hca_hdl_t hca_hdl, ibt_va_attr_t *va_attrs, uint_t paddr_list_len, ibt_phys_buf_t *paddr_list_p, uint_t *num_paddr_p, - ibt_ma_hdl_t *ma_hdl_p) + size_t *paddr_bufsz_p, ib_memlen_t *paddr_offset_p, ibt_ma_hdl_t *ma_hdl_p) { + ibt_status_t status; + IBTF_DPRINTF_L3(ibtl_mem, "ibt_map_mem_area(%p, %p, %d)", hca_hdl, va_attrs, paddr_list_len); - return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_map_mem_area( + status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_map_mem_area( IBTL_HCA2CIHCA(hca_hdl), va_attrs, NULL, /* IBTL_HCA2MODI_P(hca_hdl)->mi_reserved */ - paddr_list_len, paddr_list_p, - num_paddr_p, ma_hdl_p)); + paddr_list_len, paddr_list_p, num_paddr_p, paddr_bufsz_p, + paddr_offset_p, ma_hdl_p); + if (status == IBT_SUCCESS) { + mutex_enter(&hca_hdl->ha_mutex); + hca_hdl->ha_ma_cnt++; + mutex_exit(&hca_hdl->ha_mutex); + } + + return (status); } @@ -546,11 +555,20 @@ ibt_status_t ibt_unmap_mem_area(ibt_hca_hdl_t hca_hdl, ibt_ma_hdl_t ma_hdl) { + ibt_status_t status; + IBTF_DPRINTF_L3(ibtl_mem, "ibt_unmap_mem_area(%p, %p)", hca_hdl, ma_hdl); - return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_unmap_mem_area( + status = (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_unmap_mem_area( IBTL_HCA2CIHCA(hca_hdl), ma_hdl)); + if (status == IBT_SUCCESS) { + mutex_enter(&hca_hdl->ha_mutex); + hca_hdl->ha_ma_cnt--; + mutex_exit(&hca_hdl->ha_mutex); + } + + return (status); } @@ -577,12 +595,21 @@ uint_t phys_buf_list_sz, ibt_mr_hdl_t *mr_hdl_p, ibt_pmr_desc_t *mem_desc_p) { + ibt_status_t status; + IBTF_DPRINTF_L3(ibtl_mem, "ibt_alloc_lkey(%p, %p, 0x%X, %d)", hca_hdl, pd, flags, phys_buf_list_sz); - return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_alloc_lkey( + status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_alloc_lkey( IBTL_HCA2CIHCA(hca_hdl), pd, flags, phys_buf_list_sz, mr_hdl_p, - mem_desc_p)); + mem_desc_p); + if (status == IBT_SUCCESS) { + mutex_enter(&hca_hdl->ha_mutex); + hca_hdl->ha_mr_cnt++; + mutex_exit(&hca_hdl->ha_mutex); + } + + return (status); } @@ -606,13 +633,22 @@ ibt_pmr_attr_t *mem_pattr, ibt_mr_hdl_t *mr_hdl_p, ibt_pmr_desc_t *mem_desc_p) { + ibt_status_t status; + IBTF_DPRINTF_L3(ibtl_mem, "ibt_register_phys_mr(%p, %p, %p)", hca_hdl, pd, mem_pattr); - return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_register_physical_mr( + status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_register_physical_mr( IBTL_HCA2CIHCA(hca_hdl), pd, mem_pattr, NULL, /* IBTL_HCA2MODI_P(hca_hdl)->mi_reserved */ - mr_hdl_p, mem_desc_p)); + mr_hdl_p, mem_desc_p); + if (status == IBT_SUCCESS) { + mutex_enter(&hca_hdl->ha_mutex); + hca_hdl->ha_mr_cnt++; + mutex_exit(&hca_hdl->ha_mutex); + } + + return (status); } @@ -637,11 +673,148 @@ ibt_pd_hdl_t pd, ibt_pmr_attr_t *mem_pattr, ibt_mr_hdl_t *mr_hdl_p, ibt_pmr_desc_t *mem_desc_p) { + ibt_status_t status; + IBTF_DPRINTF_L3(ibtl_mem, "ibt_reregister_phys_mr(%p, %p, %p, %p)", hca_hdl, mr_hdl, pd, mem_pattr); - return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_reregister_physical_mr( + status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_reregister_physical_mr( IBTL_HCA2CIHCA(hca_hdl), mr_hdl, pd, mem_pattr, NULL, /* IBTL_HCA2MODI_P(hca_hdl)->mi_reserved */ + mr_hdl_p, mem_desc_p); + + if (!(status == IBT_SUCCESS || status == IBT_MR_IN_USE || + status == IBT_HCA_HDL_INVALID || status == IBT_MR_HDL_INVALID)) { + IBTF_DPRINTF_L2(ibtl_mem, "ibt_reregister_phys_mr: " + "Re-registration Mem Failed: %d", status); + + /* we lost one memory region resource */ + mutex_enter(&hca_hdl->ha_mutex); + hca_hdl->ha_mr_cnt--; + mutex_exit(&hca_hdl->ha_mutex); + + } + return (status); +} + + +/* + * Fast Memory Registration (FMR). + * + * ibt_create_fmr_pool + * Not fast-path. + * ibt_create_fmr_pool() verifies that the HCA supports FMR and allocates + * and initializes an "FMR pool". This pool contains state specific to + * this registration, including the watermark setting to determine when + * to sync, and the total number of FMR regions available within this pool. + * + */ +ibt_status_t +ibt_create_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_pd_hdl_t pd, + ibt_fmr_pool_attr_t *fmr_params, ibt_fmr_pool_hdl_t *fmr_pool_p) +{ + ibt_status_t status; + + IBTF_DPRINTF_L3(ibtl_mem, "ibt_create_fmr_pool(%p, %p, %p)", + hca_hdl, pd, fmr_params); + + status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_create_fmr_pool( + IBTL_HCA2CIHCA(hca_hdl), pd, fmr_params, fmr_pool_p); + if (status != IBT_SUCCESS) { + *fmr_pool_p = NULL; + return (status); + } + + /* Update the FMR resource count */ + mutex_enter(&hca_hdl->ha_mutex); + hca_hdl->ha_fmr_pool_cnt++; + mutex_exit(&hca_hdl->ha_mutex); + + return (status); +} + + +/* + * ibt_destroy_fmr_pool + * ibt_destroy_fmr_pool() deallocates all of the FMR regions in a specific + * pool. All state and information regarding the pool are destroyed and + * returned as free space once again. No more use of FMR regions in this + * pool are possible without a subsequent call to ibt_create_fmr_pool(). + */ +ibt_status_t +ibt_destroy_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_fmr_pool_hdl_t fmr_pool) +{ + ibt_status_t status; + + IBTF_DPRINTF_L3(ibtl_mem, "ibt_destroy_fmr_pool(%p, %p)", + hca_hdl, fmr_pool); + + status = IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_destroy_fmr_pool( + IBTL_HCA2CIHCA(hca_hdl), fmr_pool); + if (status != IBT_SUCCESS) { + IBTF_DPRINTF_L2(ibtl_mem, "ibt_destroy_fmr_pool: " + "CI FMR Pool destroy failed (%d)", status); + return (status); + } + + mutex_enter(&hca_hdl->ha_mutex); + hca_hdl->ha_fmr_pool_cnt--; + mutex_exit(&hca_hdl->ha_mutex); + + return (status); +} + +/* + * ibt_flush_fmr_pool + * ibt_flush_fmr_pool forces a flush to occur. At the client's request, + * any unmapped FMR regions (See 'ibt_deregister_mr())') are returned to + * a free state. This function allows for an asynchronous cleanup of + * formerly used FMR regions. Sync operation is also performed internally + * by HCA driver, when 'watermark' settings for the number of free FMR + * regions left in the "pool" is reached. + */ +ibt_status_t +ibt_flush_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_fmr_pool_hdl_t fmr_pool) +{ + IBTF_DPRINTF_L3(ibtl_mem, "ibt_flush_fmr_pool(%p, %p)", + hca_hdl, fmr_pool); + + return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_flush_fmr_pool( + IBTL_HCA2CIHCA(hca_hdl), fmr_pool)); +} + +/* + * ibt_register_physical_fmr + * ibt_register_physical_fmr() assigns a "free" entry from the FMR Pool. + * It first consults the "FMR cache" to see if this is a duplicate memory + * registration to something already in use. If not, then a free entry + * in the "pool" is marked used. + */ +ibt_status_t +ibt_register_physical_fmr(ibt_hca_hdl_t hca_hdl, ibt_fmr_pool_hdl_t fmr_pool, + ibt_pmr_attr_t *mem_pattr, ibt_mr_hdl_t *mr_hdl_p, + ibt_pmr_desc_t *mem_desc_p) +{ + IBTF_DPRINTF_L3(ibtl_mem, "ibt_register_physical_fmr(%p, %p, %p, %p)", + hca_hdl, fmr_pool, mem_pattr, mem_desc_p); + + return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_register_physical_fmr( + IBTL_HCA2CIHCA(hca_hdl), fmr_pool, mem_pattr, + NULL, /* IBTL_HCA2MODI_P(hca_hdl)->mi_reserved */ mr_hdl_p, mem_desc_p)); } + +/* + * ibt_deregister_fmr + * The ibt_deregister_fmr un-maps the resources reserved from the FMR + * pool by ibt_register_physical_fmr(). The ibt_deregister_fmr() will + * mark the region as free in the FMR Pool. + */ +ibt_status_t +ibt_deregister_fmr(ibt_hca_hdl_t hca, ibt_mr_hdl_t mr_hdl) +{ + IBTF_DPRINTF_L3(ibtl_mem, "ibt_deregister_fmr(%p, %p)", hca, mr_hdl); + + return (IBTL_HCA2CIHCAOPS_P(hca)->ibc_deregister_fmr( + IBTL_HCA2CIHCA(hca), mr_hdl)); +}
--- a/usr/src/uts/common/io/ib/inc.flg Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/inc.flg Tue Nov 15 19:50:27 2005 -0800 @@ -73,7 +73,9 @@ usr/src/uts/intel/ibmf \ usr/src/uts/intel/ibtl \ usr/src/uts/intel/tavor \ - usr/src/uts/intel/daplt + usr/src/uts/intel/ibd \ + usr/src/uts/intel/daplt \ + usr/src/uts/intel/rpcib # packaging files find_files "s.*" \ @@ -120,6 +122,8 @@ echo_file usr/src/uts/intel/Makefile.intel echo_file usr/src/uts/intel/Makefile.targ echo_file usr/src/uts/intel/ia32/ml/ia32.il +echo_file usr/src/uts/sun4/Makefile.files +echo_file usr/src/uts/sun4/Makefile.rules echo_file usr/src/uts/sun4u/Makefile echo_file usr/src/uts/sun4u/Makefile.files echo_file usr/src/uts/sun4u/Makefile.rules
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c Tue Nov 15 19:50:27 2005 -0800 @@ -170,7 +170,7 @@ ibcm_ud_state_data_s::ud_timeout_next)) static ibt_clnt_modinfo_t ibcm_ibt_modinfo = { /* Client's modinfop */ - IBTI_V1, + IBTI_V2, IBT_CM, ibcm_async_handler, NULL,
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c Tue Nov 15 19:50:27 2005 -0800 @@ -2011,13 +2011,13 @@ IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: Path's Packet " "LifeTime too high %d, Maximum allowed %d IB Time (4 sec)", prec_resp->PacketLifeTime, ibcm_max_ib_pkt_lt); - return (IBT_PATH_PKT_LT_TOO_HIGH); + return (ibt_get_module_failure(IBT_FAILURE_IBSM, 0)); } if ((prec_resp->Mtu > IB_MTU_4K) || (prec_resp->Mtu < IB_MTU_256)) { IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: MTU (%d) from " "pathrecord is invalid, reject it.", prec_resp->Mtu); - return (ibt_get_module_failure(IBT_FAILURE_IBCM, 0)); + return (ibt_get_module_failure(IBT_FAILURE_IBSM, 0)); } /* Source Node Information. */ @@ -2030,7 +2030,7 @@ /* Failed to get pkey_index from pkey */ IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: " "Pkey2Index conversion failed: %d", retval); - return (retval); + return (ibt_get_module_failure(IBT_FAILURE_IBSM, 0)); } cep_p->cep_adds_vect.av_sgid_ix = hport->hp_sgid_ix; cep_p->cep_adds_vect.av_src_path = @@ -2049,7 +2049,8 @@ IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: Pkey2Index " "conversion failed: %d", retval); - return (retval); + return (ibt_get_module_failure( + IBT_FAILURE_IBSM, 0)); } cep_p->cep_adds_vect.av_sgid_ix = @@ -2078,7 +2079,7 @@ default: IBTF_DPRINTF_L2(cmlog, "ibcm_update_cep_info: SRate (%d) from " "pathrecord is invalid, reject it.", prec_resp->Rate); - return (IBT_STATIC_RATE_INVALID); + return (ibt_get_module_failure(IBT_FAILURE_IBSM, 0)); } /* * If both Source and Destination GID prefix are same, then GRH is not
--- a/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/mgt/ibdm/ibdm.c Tue Nov 15 19:50:27 2005 -0800 @@ -159,7 +159,7 @@ }; static ibt_clnt_modinfo_t ibdm_ibt_modinfo = { - IBTI_V1, + IBTI_V2, IBT_DM, ibdm_event_hdlr, NULL,
--- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_impl.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_impl.c Tue Nov 15 19:50:27 2005 -0800 @@ -168,7 +168,7 @@ IBMF_TNF_TRACE, "", "ibmf_init() enter\n"); /* setup the IBT module information */ - ibmf_statep->ibmf_ibt_modinfo.mi_ibt_version = IBTI_V1; + ibmf_statep->ibmf_ibt_modinfo.mi_ibt_version = IBTI_V2; ibmf_statep->ibmf_ibt_modinfo.mi_clnt_class = IBT_IBMA; ibmf_statep->ibmf_ibt_modinfo.mi_async_handler = ibmf_ibt_async_handler;
--- a/usr/src/uts/common/rpc/rpcib.c Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/rpc/rpcib.c Tue Nov 15 19:50:27 2005 -0800 @@ -291,7 +291,7 @@ * Registration with IBTF as a consumer */ static struct ibt_clnt_modinfo_s rib_modinfo = { - IBTI_V1, + IBTI_V2, IBT_GENERIC, rib_async_handler, /* async event handler */ NULL, /* Memory Region Handler */
--- a/usr/src/uts/common/sys/ib/ibtl/ibci.h Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibci.h Tue Nov 15 19:50:27 2005 -0800 @@ -66,6 +66,7 @@ typedef struct ibc_cq_s *ibc_cq_hdl_t; /* Completion Queue Handle */ typedef struct ibc_eec_s *ibc_eec_hdl_t; /* End-to-End Context Handle */ +#define ibc_fmr_pool_hdl_t ibt_fmr_pool_hdl_t /* FMR Pool Handle */ #define ibc_mr_hdl_t ibt_mr_hdl_t /* Memory Region Handle */ #define ibc_mw_hdl_t ibt_mw_hdl_t /* Memory Window Handle */ #define ibc_ma_hdl_t ibt_ma_hdl_t /* Memory Area Handle */ @@ -149,6 +150,7 @@ #define hca_max_ether_qp hca_max_ether_chan #define hca_eec_max_ci_priv_sz hca_opaque7 #define hca_rdd_max_ci_priv_sz hca_opaque8 +#define hca_max_map_per_fmr hca_opaque9 /* @@ -159,7 +161,8 @@ /* Channel Interface version */ typedef enum ibc_version_e { - IBCI_V1 = 1 + IBCI_V1 = 1, + IBCI_V2 = 2 /* FMR Support */ } ibc_version_t; @@ -360,7 +363,8 @@ ibt_status_t (*ibc_map_mem_area)(ibc_hca_hdl_t hca_hdl, ibt_va_attr_t *va_attrs, void *ibtl_reserved, uint_t paddr_list_len, ibt_phys_buf_t *paddr_list_p, - uint_t *num_paddr_p, ibc_ma_hdl_t *ibc_ma_hdl_p); + uint_t *num_paddr_p, size_t *paddr_bufsz_p, + ib_memlen_t *paddr_offset_p, ibc_ma_hdl_t *ma_hdl_p); ibt_status_t (*ibc_unmap_mem_area)(ibc_hca_hdl_t hca_hdl, ibc_ma_hdl_t ma_hdl); @@ -377,6 +381,19 @@ ibc_mr_hdl_t mr, ibc_pd_hdl_t pd, ibt_pmr_attr_t *mem_pattr, void *ibtl_reserved, ibc_mr_hdl_t *mr_p, ibt_pmr_desc_t *mem_desc_p); + + /* Fast Memory Registration (FMR) */ + ibt_status_t (*ibc_create_fmr_pool)(ibc_hca_hdl_t hca, ibc_pd_hdl_t pd, + ibt_fmr_pool_attr_t *fmr_params, ibc_fmr_pool_hdl_t *fmr_pool_p); + ibt_status_t (*ibc_destroy_fmr_pool)(ibc_hca_hdl_t hca, + ibc_fmr_pool_hdl_t fmr_pool); + ibt_status_t (*ibc_flush_fmr_pool)(ibc_hca_hdl_t hca, + ibc_fmr_pool_hdl_t fmr_pool); + ibt_status_t (*ibc_register_physical_fmr)(ibc_hca_hdl_t hca, + ibc_fmr_pool_hdl_t fmr_pool, ibt_pmr_attr_t *mem_pattr, + void *ibtl_reserved, ibc_mr_hdl_t *mr_hdl_p, + ibt_pmr_desc_t *mem_desc_p); + ibt_status_t (*ibc_deregister_fmr)(ibc_hca_hdl_t hca, ibc_mr_hdl_t mr); } ibc_operations_t;
--- a/usr/src/uts/common/sys/ib/ibtl/ibti_common.h Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibti_common.h Tue Nov 15 19:50:27 2005 -0800 @@ -100,7 +100,8 @@ /* Transport Interface version */ typedef enum ibt_version_e { - IBTI_V1 = 1 + IBTI_V1 = 1, + IBTI_V2 = 2 /* FMR Support */ } ibt_version_t; /* @@ -1322,7 +1323,7 @@ */ ibt_status_t ibt_map_mem_area(ibt_hca_hdl_t hca_hdl, ibt_va_attr_t *va_attrs, uint_t paddr_list_len, ibt_phys_buf_t *paddr_list_p, uint_t *num_paddr_p, - ibt_ma_hdl_t *ma_hdl_p); + size_t *paddr_bufsz_p, ib_memlen_t *paddr_offset_p, ibt_ma_hdl_t *ma_hdl_p); /* * ibt_unmap_mem_area() @@ -1679,6 +1680,57 @@ ibt_status_t ibt_get_port_state_byguid(ib_guid_t hca_guid, uint8_t port, ib_gid_t *sgid_p, ib_lid_t *base_lid_p); + +/* + * Fast Memory Registration (FMR). + * + * ibt_create_fmr_pool + * Not fast-path. + * ibt_create_fmr_pool() verifies that the HCA supports FMR and allocates + * and initializes an "FMR pool". This pool contains state specific to + * this registration, including the watermark setting to determine when + * to sync, and the total number of FMR regions available within this pool. + * + * ibt_destroy_fmr_pool + * ibt_destroy_fmr_pool() deallocates all of the FMR regions in a specific + * pool. All state and information regarding the pool are destroyed and + * returned as free space once again. No more use of FMR regions in this + * pool are possible without a subsequent call to ibt_create_fmr_pool(). + * + * ibt_flush_fmr_pool + * ibt_flush_fmr_pool forces a flush to occur. At the client's request, + * any unmapped FMR regions (See 'ibt_deregister_mr())') are returned to + * a free state. This function allows for an asynchronous cleanup of + * formerly used FMR regions. Sync operation is also performed internally + * by HCA driver, when 'watermark' settings for the number of free FMR + * regions left in the "pool" is reached. + * + * ibt_register_physical_fmr + * ibt_register_physical_fmr() assigns a "free" entry from the FMR Pool. + * It first consults the "FMR cache" to see if this is a duplicate memory + * registration to something already in use. If not, then a free entry + * in the "pool" is marked used. + * + * ibt_deregister_fmr + * The ibt_deregister_fmr un-maps the resources reserved from the FMR + * pool by ibt_register_physical_fmr(). The ibt_deregister_fmr() will + * mark the region as free in the FMR Pool. + */ +ibt_status_t ibt_create_fmr_pool(ibt_hca_hdl_t hca_hdl, ibt_pd_hdl_t pd, + ibt_fmr_pool_attr_t *fmr_params, ibt_fmr_pool_hdl_t *fmr_pool_p); + +ibt_status_t ibt_destroy_fmr_pool(ibt_hca_hdl_t hca_hdl, + ibt_fmr_pool_hdl_t fmr_pool); + +ibt_status_t ibt_flush_fmr_pool(ibt_hca_hdl_t hca_hdl, + ibt_fmr_pool_hdl_t fmr_pool); + +ibt_status_t ibt_register_physical_fmr(ibt_hca_hdl_t hca_hdl, + ibt_fmr_pool_hdl_t fmr_pool, ibt_pmr_attr_t *mem_pattr, + ibt_mr_hdl_t *mr_hdl_p, ibt_pmr_desc_t *mem_desc_p); + +ibt_status_t ibt_deregister_fmr(ibt_hca_hdl_t hca, ibt_mr_hdl_t mr_hdl); + #ifdef __cplusplus } #endif
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h Tue Nov 15 19:50:27 2005 -0800 @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -155,9 +155,12 @@ IBT_HCA_CNTR_VAL_INVALID = 205, /* Invalid Counter value */ IBT_HCA_PORT_NOT_ACTIVE = 206, /* Port is down */ IBT_HCA_SRQ_NOT_SUPPORTED = 207, /* Shared Receive Queue */ - IBT_HCA_RESIZE_SRQ_NOT_SUPPORTED = 208, /* Not supported */ - IBT_HCA_PAGE_MODE = 209, /* HCA operating in page mode */ - IBT_HCA_BLOCK_MODE = 210, /* HCA in block mode */ + /* not supported */ + IBT_HCA_RESIZE_SRQ_NOT_SUPPORTED = 208, /* SRQ Resize not supported */ + IBT_HCA_PAGE_MODE = 209, /* Not opened in page mode */ + IBT_HCA_BLOCK_MODE = 210, /* HCA does not support Block */ + /* mode or Not opened in */ + /* Block mode */ IBT_HCA_BMM_NOT_SUPPORTED = 211, /* Base Memory Management */ /* Extensions not supported */ IBT_HCA_BQM_NOT_SUPPORTED = 212, /* Base Queue Management */ @@ -170,6 +173,8 @@ IBT_HCA_LIF_NOT_SUPPORTED = 216, /* Local Invalidate Fencing */ /* not supported */ + IBT_HCA_FMR_NOT_SUPPORTED = 217, /* Fast Memory Registration */ + /* not supported */ /* * Address errors */ @@ -247,6 +252,7 @@ IBT_MW_HDL_INVALID = 809, /* Invalid Memory Window */ /* Handle */ IBT_MW_TYPE_INVALID = 810, + IBT_MA_HDL_INVALID = 811, /* Invalid Memory Area Hdl */ /* * Multicast errors @@ -277,8 +283,14 @@ IBT_SRQ_SZ_INSUFFICIENT = 1203, /* The Size of the WQ is too */ /* small, there are more */ /* outstanding entries than */ - IBT_SRQ_IN_USE = 1204 /* SRQ Still has QPs */ + IBT_SRQ_IN_USE = 1204, /* SRQ Still has QPs */ /* associated with it */ + /* + * FMR Errors + */ + IBT_FMR_POOL_HDL_INVALID = 1300, /* Invalid FMR Pool handle */ + IBT_FMR_POOL_IN_USE = 1301, /* FMR Pool in use. */ + IBT_PBL_TOO_SMALL = 1302 } ibt_status_t;
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h Tue Nov 15 19:50:27 2005 -0800 @@ -56,6 +56,7 @@ typedef struct ibcm_svc_info_s *ibt_srv_hdl_t; /* ibt_register_service() */ typedef struct ibcm_svc_bind_s *ibt_sbind_hdl_t; /* ibt_bind_service() */ +typedef struct ibc_fmr_pool_s *ibt_fmr_pool_hdl_t; /* ibt_create_fmr_pool() */ typedef struct ibc_ma_s *ibt_ma_hdl_t; /* ibt_map_mem_area() */ typedef struct ibc_pd_s *ibt_pd_hdl_t; /* ibt_alloc_pd() */ typedef struct ibc_sched_s *ibt_sched_hdl_t; /* ibt_alloc_cq_sched() */ @@ -307,7 +308,8 @@ IBT_HCA_BASE_QUEUE_MGT = 1 << 24, /* Base Queue Mgt supported? */ IBT_HCA_CKSUM_FULL = 1 << 25, /* Checksum offload supported */ IBT_HCA_MEM_WIN_TYPE_2B = 1 << 26, /* Type 2B memory windows */ - IBT_HCA_PHYS_BUF_BLOCK = 1 << 27 /* Block mode phys buf lists */ + IBT_HCA_PHYS_BUF_BLOCK = 1 << 27, /* Block mode phys buf lists */ + IBT_HCA_FMR = 1 << 28 /* FMR Support */ } ibt_hca_flags_t; /* @@ -428,6 +430,8 @@ size_t hca_block_sz_hi; /* supported by the HCA */ uint_t hca_max_cq_handlers; ibt_lkey_t hca_reserved_lkey; + uint_t hca_max_fmrs; /* Max FMR Supported */ + uint_t hca_opaque9; } ibt_hca_attr_t; /* @@ -733,7 +737,8 @@ IBT_MR_REMOTE_ATOMIC = (1 << 4), IBT_MR_ZERO_BASED_VA = (1 << 5), IBT_MR_CONSUMER_OWNED_KEY = (1 << 6), - IBT_MR_SHARED = (1 << 7) + IBT_MR_SHARED = (1 << 7), + IBT_MR_FMR = (1 << 8) } ibt_mr_attr_flags_t; /* Memory region physical descriptor. */ @@ -802,9 +807,12 @@ ibt_lkey_t pmr_lkey; /* Reregister only */ ibt_rkey_t pmr_rkey; /* Reregister only */ uint8_t pmr_key; /* Key to use on new Lkey & Rkey */ - uint_t pmr_num_buf; /* Num of entries in the mr_buf_list */ + uint_t pmr_num_buf; /* Num of entries in the pmr_buf_list */ + size_t pmr_buf_sz; ibt_phys_buf_t *pmr_buf_list; /* List of physical buffers accessed */ /* as an array */ + ibt_ma_hdl_t pmr_ma; /* Memory handle used to obtain the */ + /* pmr_buf_list */ } ibt_pmr_attr_t; @@ -873,7 +881,11 @@ * Flags for Virtual Address to HCA Physical Address translation. */ typedef enum ibt_va_flags_e { - IBT_VA_NO_FLAGS = 0 + IBT_VA_SLEEP = 0, + IBT_VA_NOSLEEP = (1 << 0), + IBT_VA_NONCOHERENT = (1 << 1), + IBT_VA_FMR = (1 << 2), + IBT_VA_BLOCK_MODE = (1 << 3) } ibt_va_flags_t; @@ -883,9 +895,33 @@ ib_memlen_t va_len; /* Length of region to register */ struct as *va_as; /* A pointer to an address space */ /* structure. */ + size_t va_phys_buf_min; + size_t va_phys_buf_max; ibt_va_flags_t va_flags; } ibt_va_attr_t; + +/* + * Fast Memory Registration (FMR) support. + */ + +/* FMR flush function handler. */ +typedef void (*ibt_fmr_flush_handler_t)(ibt_fmr_pool_hdl_t fmr_pool, + void *fmr_func_arg); + +/* FMR Pool create attributes. */ +typedef struct ibt_fmr_pool_attr_s { + uint_t fmr_max_pages_per_fmr; + uint_t fmr_pool_size; + uint_t fmr_dirty_watermark; + size_t fmr_page_sz; + boolean_t fmr_cache; + ibt_mr_flags_t fmr_flags; + ibt_fmr_flush_handler_t fmr_func_hdlr; + void *fmr_func_arg; +} ibt_fmr_pool_attr_t; + + /* * WORK REQUEST AND WORK REQUEST COMPLETION DEFINITIONS. */ @@ -1286,7 +1322,8 @@ IBT_FAILURE_IBMF, IBT_FAILURE_IBTL, IBT_FAILURE_IBCM, - IBT_FAILURE_IBDM + IBT_FAILURE_IBDM, + IBT_FAILURE_IBSM } ibt_failure_type_t; #ifdef __cplusplus
--- a/usr/src/uts/common/sys/ib/ibtl/impl/ibtl.h Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/common/sys/ib/ibtl/impl/ibtl.h Tue Nov 15 19:50:27 2005 -0800 @@ -20,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -206,6 +206,8 @@ uint32_t ha_srq_cnt; /* SRQ resource counter */ ibtl_async_flags_t ha_async_flags; /* see *_async_flags above */ uint32_t ha_async_cnt; /* #asyncs in progress */ + uint32_t ha_fmr_pool_cnt; /* FMR Pool resource count */ + uint32_t ha_ma_cnt; /* Mem Area resource count */ } ibtl_hca_t; /* ha_flags values */
--- a/usr/src/uts/sparc/ibtl/ibtl.wlcmd Tue Nov 15 17:06:33 2005 -0800 +++ b/usr/src/uts/sparc/ibtl/ibtl.wlcmd Tue Nov 15 19:50:27 2005 -0800 @@ -53,8 +53,11 @@ root ibt_unmap_mem_area root ibt_register_phys_mr root ibt_reregister_phys_mr -root ibc_async_handler -root ibc_cq_handler +root ibt_create_fmr_pool +root ibt_destroy_fmr_pool +root ibt_flush_fmr_pool +root ibt_register_physical_fmr +root ibt_deregister_fmr root ibt_enable_cq_notify root ibt_set_cq_handler root ibt_alloc_qp @@ -155,6 +158,7 @@ root ibc_detach root ibc_async_handler root ibc_cq_handler +root ibc_fmr_flush_handler # IBTF Debug entry points root ibtl_dprintf_intr @@ -274,8 +278,14 @@ add ibc_operations_s::ibc_alloc_lkey targets warlock_dummy add ibc_operations_s::ibc_register_physical_mr targets warlock_dummy add ibc_operations_s::ibc_reregister_physical_mr targets warlock_dummy +add ibc_operations_s::ibc_create_fmr_pool targets warlock_dummy +add ibc_operations_s::ibc_destroy_fmr_pool targets warlock_dummy +add ibc_operations_s::ibc_flush_fmr_pool targets warlock_dummy +add ibc_operations_s::ibc_register_physical_fmr targets warlock_dummy +add ibc_operations_s::ibc_deregister_fmr targets warlock_dummy add ibt_clnt_modinfo_s::mi_async_handler targets warlock_dummy add ibtl_handlers.c:ibtl_async_client_call/async_handler targets warlock_dummy add ibtl_handlers.c:ibtl_cq_handler_call/cq_handler targets warlock_dummy +add ibtl_handlers.c:ibtl_fmr_flush_handler_call/fmr_handler targets warlock_dummy add ibtl_handlers.c:ibtl_sm_notice_task/sm_notice_handler targets warlock_dummy add ibtl_mgr_s::mgr_async_handler targets warlock_dummy