Mercurial > illumos > illumos-gate
changeset 12977:b65a8427f8fe
PSARC/2010/234 IBTF 2010.Q2 Enhancements
6538821 add Base Memory Management to ibtl and hermon
6893126 Add OFED ib_get_dma_mr() equivalent memory registration interface to IBTF
6937574 move FMANOTE messages to msgbuf or ibtf buf only
6954821 Expose IB interrupt handles and device info for perf optimization
6955695 IBTF V4 interfaces
line wrap: on
line diff
--- a/usr/src/uts/common/Makefile.files Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/Makefile.files Thu Jul 29 22:10:26 2010 -0700 @@ -697,7 +697,7 @@ hermon_cq.o hermon_event.o hermon_ioctl.o hermon_misc.o \ hermon_mr.o hermon_qp.o hermon_qpmod.o hermon_rsrc.o \ hermon_srq.o hermon_stats.o hermon_umap.o hermon_wr.o \ - hermon_fm.o + hermon_fcoib.o hermon_fm.o DAPLT_OBJS += daplt.o
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon.c Thu Jul 29 22:10:26 2010 -0700 @@ -47,6 +47,9 @@ #include <sys/ib/adapters/hermon/hermon.h> +/* /etc/system can tune this down, if that is desirable. */ +int hermon_msix_max = HERMON_MSIX_MAX; + /* The following works around a problem in pre-2_7_000 firmware. */ #define HERMON_FW_WORKAROUND @@ -972,6 +975,10 @@ * Context: Can be called from base context. * * Only one thread can be here for a given hermon_rsrc_type_t "type". + * + * "num_to_hdl" is set if there is a need for lookups from resource + * number/index to resource handle. This is needed for QPs/CQs/SRQs + * for the various affiliated events/errors. */ int hermon_icm_alloc(hermon_state_t *state, hermon_rsrc_type_t type, @@ -981,6 +988,7 @@ hermon_dma_info_t *dma_info; uint8_t *bitmap; int status; + int num_to_hdl = 0; if (hermon_verbose) { IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: rsrc_type (0x%x) " @@ -1013,18 +1021,21 @@ HERMON_ICM_FREE(HERMON_CMPT_QPC); return (status); } + num_to_hdl = 1; break; case HERMON_SRQC: status = HERMON_ICM_ALLOC(HERMON_CMPT_SRQC); if (status != DDI_SUCCESS) { return (status); } + num_to_hdl = 1; break; case HERMON_CQC: status = HERMON_ICM_ALLOC(HERMON_CMPT_CQC); if (status != DDI_SUCCESS) { return (status); } + num_to_hdl = 1; break; case HERMON_EQC: status = HERMON_ICM_ALLOC(HERMON_CMPT_EQC); @@ -1035,16 +1046,21 @@ } /* ensure existence of bitmap and dmainfo, sets "dma_info" */ - hermon_bitmap(bitmap, dma_info, icm, index1); + hermon_bitmap(bitmap, dma_info, icm, index1, num_to_hdl); /* Set up the DMA handle for allocation and mapping */ - dma_info = icm->icm_dma[index1] + index2; + dma_info += index2; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_info)) dma_info->length = icm->span << icm->log_object_size; dma_info->icmaddr = icm->icm_baseaddr + (((index1 << icm->split_shift) + (index2 << icm->span_shift)) << icm->log_object_size); + /* Allocate memory for the num_to_qp/cq/srq pointers */ + if (num_to_hdl) + icm->num_to_hdl[index1][index2] = + kmem_zalloc(HERMON_ICM_SPAN * sizeof (void *), KM_SLEEP); + if (hermon_verbose) { IBTF_DPRINTF_L2("hermon", "alloc DMA: " "rsrc (0x%x) index (%x, %x) " @@ -1151,6 +1167,66 @@ /* + * hermon_icm_num_to_hdl() + * Context: Can be called from base or interrupt context. + * + * Given an index of a resource, index through the sparsely allocated + * arrays to find the pointer to its software handle. Return NULL if + * any of the arrays of pointers has been freed (should never happen). + */ +void * +hermon_icm_num_to_hdl(hermon_state_t *state, hermon_rsrc_type_t type, + uint32_t idx) +{ + hermon_icm_table_t *icm; + uint32_t span_offset; + uint32_t index1, index2; + void ***p1, **p2; + + icm = &state->hs_icm[type]; + hermon_index(index1, index2, idx, icm, span_offset); + p1 = icm->num_to_hdl[index1]; + if (p1 == NULL) { + IBTF_DPRINTF_L2("hermon", "icm_num_to_hdl failed at level 1" + ": rsrc_type %d, index 0x%x", type, idx); + return (NULL); + } + p2 = p1[index2]; + if (p2 == NULL) { + IBTF_DPRINTF_L2("hermon", "icm_num_to_hdl failed at level 2" + ": rsrc_type %d, index 0x%x", type, idx); + return (NULL); + } + return (p2[span_offset]); +} + +/* + * hermon_icm_set_num_to_hdl() + * Context: Can be called from base or interrupt context. + * + * Given an index of a resource, we index through the sparsely allocated + * arrays to store the software handle, used by hermon_icm_num_to_hdl(). + * This function is used to both set and reset (set to NULL) the handle. + * This table is allocated during ICM allocation for the given resource, + * so its existence is a given, and the store location does not conflict + * with any other stores to the table (no locking needed). + */ +void +hermon_icm_set_num_to_hdl(hermon_state_t *state, hermon_rsrc_type_t type, + uint32_t idx, void *hdl) +{ + hermon_icm_table_t *icm; + uint32_t span_offset; + uint32_t index1, index2; + + icm = &state->hs_icm[type]; + hermon_index(index1, index2, idx, icm, span_offset); + ASSERT((hdl == NULL) ^ + (icm->num_to_hdl[index1][index2][span_offset] == NULL)); + icm->num_to_hdl[index1][index2][span_offset] = hdl; +} + +/* * hermon_device_mode() * Context: Can be called from base or interrupt context. * @@ -1764,8 +1840,8 @@ return (DDI_FAILURE); } - state->hs_devlim.num_rsvd_eq = max(state->hs_devlim.num_rsvd_eq, - (4 * state->hs_devlim.num_rsvd_uar)); /* lesser of resvd's */ + state->hs_rsvd_eqs = max(state->hs_devlim.num_rsvd_eq, + (4 * state->hs_devlim.num_rsvd_uar)); /* now we have enough info to map in the UAR BAR */ /* @@ -2326,14 +2402,14 @@ * either configuration variables or successful queries of the Hermon * hardware abilities */ - state->hs_ibtfinfo.hca_ci_vers = IBCI_V3; - state->hs_ibtfinfo.hca_dip = state->hs_dip; + state->hs_ibtfinfo.hca_ci_vers = IBCI_V4; state->hs_ibtfinfo.hca_handle = (ibc_hca_hdl_t)state; state->hs_ibtfinfo.hca_ops = &hermon_ibc_ops; hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP); state->hs_ibtfinfo.hca_attr = hca_attr; + hca_attr->hca_dip = state->hs_dip; hca_attr->hca_fw_major_version = state->hs_fw.fw_rev_major; hca_attr->hca_fw_minor_version = state->hs_fw.fw_rev_minor; hca_attr->hca_fw_micro_version = state->hs_fw.fw_rev_subminor; @@ -2341,9 +2417,8 @@ /* CQ interrupt moderation maximums - each limited to 16 bits */ hca_attr->hca_max_cq_mod_count = 0xFFFF; hca_attr->hca_max_cq_mod_usec = 0xFFFF; - - /* CQ relocation to other EQs - change when multiple MSI-Xs are used */ - hca_attr->hca_max_cq_handlers = 1; + hca_attr->hca_max_cq_handlers = state->hs_intrmsi_allocd; + /* * Determine HCA capabilities: @@ -2387,15 +2462,19 @@ hca_attr->hca_reserved_lkey = state->hs_devlim.rsv_lkey; } if (state->hs_devlim.local_inv && state->hs_devlim.remote_inv && - state->hs_devlim.fast_reg_wr) { /* fw needs to be >= 2.6.636 */ - if (state->hs_fw.fw_rev_major > 2) + state->hs_devlim.fast_reg_wr) { /* fw needs to be >= 2.7.000 */ + if ((state->hs_fw.fw_rev_major > 2) || + ((state->hs_fw.fw_rev_major == 2) && + (state->hs_fw.fw_rev_minor >= 7))) caps2 |= IBT_HCA2_MEM_MGT_EXT; - else if (state->hs_fw.fw_rev_major == 2) - if (state->hs_fw.fw_rev_minor > 6) - caps2 |= IBT_HCA2_MEM_MGT_EXT; - else if (state->hs_fw.fw_rev_minor == 6) - if (state->hs_fw.fw_rev_subminor >= 636) - caps2 |= IBT_HCA2_MEM_MGT_EXT; + } + if (state->hs_devlim.log_max_rss_tbl_sz) { + hca_attr->hca_rss_max_log2_table = + state->hs_devlim.log_max_rss_tbl_sz; + if (state->hs_devlim.rss_xor) + caps2 |= IBT_HCA2_RSS_XOR_ALG; + if (state->hs_devlim.rss_toep) + caps2 |= IBT_HCA2_RSS_TPL_ALG; } if (state->hs_devlim.mps) { caps |= IBT_HCA_ZERO_BASED_VA; @@ -2406,6 +2485,7 @@ caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT | IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE | IBT_HCA_PORT_UP | IBT_HCA_RC_SRQ | IBT_HCA_UD_SRQ | IBT_HCA_FMR); + caps2 |= IBT_HCA2_DMA_MR; if (state->hs_devlim.log_max_gso_sz) { hca_attr->hca_max_lso_size = @@ -2421,6 +2501,8 @@ hca_attr->hca_ud_send_sgl_sz = (max_send_wqe_bytes / 16) - 4; hca_attr->hca_conn_send_sgl_sz = (max_send_wqe_bytes / 16) - 1; hca_attr->hca_conn_rdma_sgl_overhead = 1; + hca_attr->hca_conn_rdma_write_sgl_sz = (max_send_wqe_bytes / 16) - 2; + hca_attr->hca_conn_rdma_read_sgl_sz = (512 / 16) - 2; /* see PRM */ hca_attr->hca_recv_sgl_sz = max_recv_wqe_bytes / 16; /* We choose not to support "inline" unless it improves performance */ @@ -2429,6 +2511,13 @@ hca_attr->hca_conn_send_inline_sz = 0; hca_attr->hca_conn_rdmaw_inline_overhead = 4; + if (state->hs_devlim.fcoib && (caps2 & IBT_HCA2_MEM_MGT_EXT)) { + caps2 |= IBT_HCA2_FC; + hca_attr->hca_rfci_max_log2_qp = 7; /* 128 per port */ + hca_attr->hca_fexch_max_log2_qp = 16; /* 64K per port */ + hca_attr->hca_fexch_max_log2_mem = 20; /* 1MB per MPT - XXX */ + } + hca_attr->hca_flags = caps; hca_attr->hca_flags2 = caps2; @@ -2669,9 +2758,38 @@ /* Initialize the AVL tree for QP number support */ hermon_qpn_avl_init(state); + /* Initialize the cq_sched info structure */ + status = hermon_cq_sched_init(state); + if (status != DDI_SUCCESS) { + hermon_qpn_avl_fini(state); + mutex_destroy(&state->hs_info_lock); + mutex_destroy(&state->hs_fw_flashlock); + mutex_destroy(&state->hs_uar_lock); + kmem_free(hca_attr, sizeof (ibt_hca_attr_t)); + HERMON_ATTACH_MSG(state->hs_attach_buf, + "soft_state_init_cqsched_init_fail"); + return (DDI_FAILURE); + } + + /* Initialize the fcoib info structure */ + status = hermon_fcoib_init(state); + if (status != DDI_SUCCESS) { + hermon_cq_sched_fini(state); + hermon_qpn_avl_fini(state); + mutex_destroy(&state->hs_info_lock); + mutex_destroy(&state->hs_fw_flashlock); + mutex_destroy(&state->hs_uar_lock); + kmem_free(hca_attr, sizeof (ibt_hca_attr_t)); + HERMON_ATTACH_MSG(state->hs_attach_buf, + "soft_state_init_fcoibinit_fail"); + return (DDI_FAILURE); + } + /* Initialize the kstat info structure */ status = hermon_kstat_init(state); if (status != DDI_SUCCESS) { + hermon_fcoib_fini(state); + hermon_cq_sched_fini(state); hermon_qpn_avl_fini(state); mutex_destroy(&state->hs_info_lock); mutex_destroy(&state->hs_fw_flashlock); @@ -2697,6 +2815,12 @@ /* Teardown the kstat info */ hermon_kstat_fini(state); + /* Teardown the fcoib info */ + hermon_fcoib_fini(state); + + /* Teardown the cq_sched info */ + hermon_cq_sched_fini(state); + /* Teardown the AVL tree for QP number support */ hermon_qpn_avl_fini(state); @@ -3435,7 +3559,7 @@ if (val > maxval) { goto init_ports_fail; } - initport->max_guid = (uint16_t)val; + initport->max_gid = (uint16_t)val; initport->mg = 1; /* Validate max PKey table size */ @@ -4193,7 +4317,6 @@ { int status; - /* Query for the list of supported interrupt event types */ status = ddi_intr_get_supported_types(state->hs_dip, &state->hs_intr_types_avail); @@ -4253,6 +4376,19 @@ return (DDI_FAILURE); } +/* ARGSUSED */ +static int +hermon_intr_cb_handler(dev_info_t *dip, ddi_cb_action_t action, void *cbarg, + void *arg1, void *arg2) +{ + hermon_state_t *state = (hermon_state_t *)arg1; + + IBTF_DPRINTF_L2("hermon", "interrupt callback: instance %d, " + "action %d, cbarg %d\n", state->hs_instance, action, + (uint32_t)(uintptr_t)cbarg); + return (DDI_SUCCESS); +} + /* * hermon_add_intrs() * Context: Only called from attach() patch context @@ -4262,11 +4398,24 @@ { int status; + if (state->hs_intr_cb_hdl == NULL) { + status = ddi_cb_register(state->hs_dip, DDI_CB_FLAG_INTR, + hermon_intr_cb_handler, state, NULL, + &state->hs_intr_cb_hdl); + if (status != DDI_SUCCESS) { + cmn_err(CE_CONT, "ddi_cb_register failed: 0x%x\n", + status); + state->hs_intr_cb_hdl = NULL; + return (DDI_FAILURE); + } + } /* Get number of interrupts/MSI supported */ status = ddi_intr_get_nintrs(state->hs_dip, intr_type, &state->hs_intrmsi_count); if (status != DDI_SUCCESS) { + (void) ddi_cb_unregister(state->hs_intr_cb_hdl); + state->hs_intr_cb_hdl = NULL; return (DDI_FAILURE); } @@ -4274,27 +4423,41 @@ status = ddi_intr_get_navail(state->hs_dip, intr_type, &state->hs_intrmsi_avail); if (status != DDI_SUCCESS) { + (void) ddi_cb_unregister(state->hs_intr_cb_hdl); + state->hs_intr_cb_hdl = NULL; return (DDI_FAILURE); } /* Ensure that we have at least one (1) usable MSI or interrupt */ if ((state->hs_intrmsi_avail < 1) || (state->hs_intrmsi_count < 1)) { + (void) ddi_cb_unregister(state->hs_intr_cb_hdl); + state->hs_intr_cb_hdl = NULL; return (DDI_FAILURE); } - /* Attempt to allocate the maximum #interrupt/MSI handles */ + /* + * Allocate the #interrupt/MSI handles. + * The number we request is the minimum of these three values: + * HERMON_MSIX_MAX driver maximum (array size) + * hermon_msix_max /etc/system override to... + * HERMON_MSIX_MAX + * state->hs_intrmsi_avail Maximum the ddi provides. + */ status = ddi_intr_alloc(state->hs_dip, &state->hs_intrmsi_hdl[0], - intr_type, 0, min(HERMON_MSIX_MAX, state->hs_intrmsi_avail), - &state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL); + intr_type, 0, min(min(HERMON_MSIX_MAX, state->hs_intrmsi_avail), + hermon_msix_max), &state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL); if (status != DDI_SUCCESS) { + (void) ddi_cb_unregister(state->hs_intr_cb_hdl); + state->hs_intr_cb_hdl = NULL; return (DDI_FAILURE); } /* Ensure that we have allocated at least one (1) MSI or interrupt */ if (state->hs_intrmsi_allocd < 1) { + (void) ddi_cb_unregister(state->hs_intr_cb_hdl); + state->hs_intr_cb_hdl = NULL; return (DDI_FAILURE); } - state->hs_eq_dist = state->hs_intrmsi_allocd - 1; /* start at 0 */ /* * Extract the priority for the allocated interrupt/MSI. This @@ -4306,6 +4469,8 @@ /* Free the allocated interrupt/MSI handle */ (void) ddi_intr_free(state->hs_intrmsi_hdl[0]); + (void) ddi_cb_unregister(state->hs_intr_cb_hdl); + state->hs_intr_cb_hdl = NULL; return (DDI_FAILURE); } @@ -4349,6 +4514,10 @@ return (DDI_FAILURE); } } + if (state->hs_intr_cb_hdl) { + (void) ddi_cb_unregister(state->hs_intr_cb_hdl); + state->hs_intr_cb_hdl = NULL; + } return (DDI_SUCCESS); }
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_cfg.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_cfg.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -352,8 +351,16 @@ cp->cp_max_mtu = port->ib_mtu; /* XXX now from query_port */ cp->cp_max_port_width = port->ib_port_wid; /* now from query_port */ cp->cp_max_vlcap = port->max_vl; + cp->cp_log_num_ah = hermon_log_num_ah; + + /* Paranoia, ensure no arrays indexed by port_num are out of bounds */ cp->cp_num_ports = devlim->num_ports; - cp->cp_log_num_ah = hermon_log_num_ah; + if (cp->cp_num_ports > HERMON_MAX_PORTS) { + cmn_err(CE_CONT, "device has more ports (%d) than are " + "supported; Using %d ports\n", + cp->cp_num_ports, HERMON_MAX_PORTS); + cp->cp_num_ports = HERMON_MAX_PORTS; + }; /* allocate variable sized arrays */ for (i = 0; i < HERMON_MAX_PORTS; i++) {
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_ci.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_ci.c Thu Jul 29 22:10:26 2010 -0700 @@ -99,8 +99,10 @@ static ibt_status_t hermon_ci_modify_cq(ibc_hca_hdl_t, ibc_cq_hdl_t, uint_t, uint_t, ibt_cq_handler_id_t); static ibt_status_t hermon_ci_alloc_cq_sched(ibc_hca_hdl_t, - ibt_cq_sched_flags_t, ibc_cq_handler_attr_t *); -static ibt_status_t hermon_ci_free_cq_sched(ibc_hca_hdl_t, ibt_cq_handler_id_t); + ibt_cq_sched_attr_t *, ibc_sched_hdl_t *); +static ibt_status_t hermon_ci_free_cq_sched(ibc_hca_hdl_t, ibc_sched_hdl_t); +static ibt_status_t hermon_ci_query_cq_handler_id(ibc_hca_hdl_t, + ibt_cq_handler_id_t, ibt_cq_handler_attr_t *); /* EE Contexts */ static ibt_status_t hermon_ci_alloc_eec(ibc_hca_hdl_t, ibc_eec_flags_t, @@ -129,6 +131,8 @@ ibc_pd_hdl_t, ibt_smr_attr_t *, struct buf *, void *, ibc_mr_hdl_t *, ibt_mr_desc_t *); static ibt_status_t hermon_ci_sync_mr(ibc_hca_hdl_t, ibt_mr_sync_t *, size_t); +static ibt_status_t hermon_ci_register_dma_mr(ibc_hca_hdl_t, ibc_pd_hdl_t, + ibt_dmr_attr_t *, void *, ibc_mr_hdl_t *, ibt_mr_desc_t *); /* Memory Windows */ static ibt_status_t hermon_ci_alloc_mw(ibc_hca_hdl_t, ibc_pd_hdl_t, @@ -212,6 +216,7 @@ ibc_mem_alloc_hdl_t *mem_alloc_hdl_p); static ibt_status_t hermon_ci_free_io_mem(ibc_hca_hdl_t hca, ibc_mem_alloc_hdl_t mem_alloc_hdl); +static ibt_status_t hermon_ci_not_supported(); /* * This ibc_operations_t structure includes pointers to all the entry points @@ -255,6 +260,7 @@ hermon_ci_modify_cq, hermon_ci_alloc_cq_sched, hermon_ci_free_cq_sched, + hermon_ci_query_cq_handler_id, /* EE Contexts */ hermon_ci_alloc_eec, @@ -321,8 +327,39 @@ /* Memory allocation */ hermon_ci_alloc_io_mem, hermon_ci_free_io_mem, + + /* XRC not yet supported */ + hermon_ci_not_supported, /* ibc_alloc_xrc_domain */ + hermon_ci_not_supported, /* ibc_free_xrc_domain */ + hermon_ci_not_supported, /* ibc_alloc_xrc_srq */ + hermon_ci_not_supported, /* ibc_free_xrc_srq */ + hermon_ci_not_supported, /* ibc_query_xrc_srq */ + hermon_ci_not_supported, /* ibc_modify_xrc_srq */ + hermon_ci_not_supported, /* ibc_alloc_xrc_tgt_qp */ + hermon_ci_not_supported, /* ibc_free_xrc_tgt_qp */ + hermon_ci_not_supported, /* ibc_query_xrc_tgt_qp */ + hermon_ci_not_supported, /* ibc_modify_xrc_tgt_qp */ + + /* Memory Region (physical) */ + hermon_ci_register_dma_mr, + + /* Next enhancements */ + hermon_ci_not_supported, /* ibc_enhancement1 */ + hermon_ci_not_supported, /* ibc_enhancement2 */ + hermon_ci_not_supported, /* ibc_enhancement3 */ + hermon_ci_not_supported, /* ibc_enhancement4 */ }; +/* + * Not yet implemented OPS + */ +/* ARGSUSED */ +static ibt_status_t +hermon_ci_not_supported() +{ + return (IBT_NOT_SUPPORTED); +} + /* * hermon_ci_query_hca_ports() @@ -337,11 +374,6 @@ uint_t start, end, port; int status, indx; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; @@ -381,11 +413,6 @@ hermon_state_t *state; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; @@ -427,11 +454,6 @@ ASSERT(pd_p != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; @@ -460,16 +482,6 @@ hermon_pdhdl_t pdhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid PD handle pointer */ - if (pd == NULL) { - return (IBT_PD_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and PD handle */ state = (hermon_state_t *)hca; pdhdl = (hermon_pdhdl_t)pd; @@ -532,16 +544,6 @@ hermon_pdhdl_t pdhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid PD handle pointer */ - if (pd == NULL) { - return (IBT_PD_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and PD handle */ state = (hermon_state_t *)hca; pdhdl = (hermon_pdhdl_t)pd; @@ -571,16 +573,6 @@ hermon_ahhdl_t ahhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid address handle pointer */ - if (ah == NULL) { - return (IBT_AH_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and AH handle */ state = (hermon_state_t *)hca; ahhdl = (hermon_ahhdl_t)ah; @@ -606,16 +598,6 @@ hermon_pdhdl_t pdhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid address handle pointer */ - if (ah == NULL) { - return (IBT_AH_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and AH handle */ state = (hermon_state_t *)hca; ahhdl = (hermon_ahhdl_t)ah; @@ -645,16 +627,6 @@ hermon_ahhdl_t ahhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid address handle pointer */ - if (ah == NULL) { - return (IBT_AH_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and AH handle */ state = (hermon_state_t *)hca; ahhdl = (hermon_ahhdl_t)ah; @@ -677,17 +649,12 @@ ibt_chan_sizes_t *queue_sizes_p, ib_qpn_t *qpn, ibc_qp_hdl_t *qp_p) { hermon_state_t *state; - hermon_qp_info_t qpinfo; + hermon_qp_info_t qpinfo; int status; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*queue_sizes_p)) - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; @@ -721,17 +688,12 @@ ibc_qp_hdl_t *qp_p) { hermon_state_t *state; - hermon_qp_info_t qpinfo; + hermon_qp_info_t qpinfo; int status; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*queue_sizes_p)) - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; @@ -751,15 +713,37 @@ return (IBT_SUCCESS); } +/* + * hermon_ci_alloc_qp_range() + * Free a Queue Pair + * Context: Can be called only from user or kernel context. + */ /* ARGSUSED */ static ibt_status_t hermon_ci_alloc_qp_range(ibc_hca_hdl_t hca, uint_t log2, - ibtl_qp_hdl_t *ibtl_qp_p, ibt_qp_type_t type, + ibtl_qp_hdl_t *ibtl_qp, ibt_qp_type_t type, ibt_qp_alloc_attr_t *attr_p, ibt_chan_sizes_t *queue_sizes_p, - ibc_cq_hdl_t *send_cq_p, ibc_cq_hdl_t *recv_cq_p, - ib_qpn_t *qpn_p, ibc_qp_hdl_t *qp_p) + ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq, + ib_qpn_t *qpn, ibc_qp_hdl_t *qp_p) { - return (IBT_NOT_SUPPORTED); + hermon_state_t *state; + hermon_qp_info_t qpinfo; + int status; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*queue_sizes_p)) + + /* Grab the Hermon softstate pointer */ + state = (hermon_state_t *)hca; + + /* Allocate the QP */ + qpinfo.qpi_attrp = attr_p; + qpinfo.qpi_type = type; + qpinfo.qpi_queueszp = queue_sizes_p; + qpinfo.qpi_qpn = qpn; + status = hermon_qp_alloc_range(state, log2, &qpinfo, ibtl_qp, + send_cq, recv_cq, (hermon_qphdl_t *)qp_p, HERMON_NOSLEEP); + return (status); } /* @@ -775,16 +759,6 @@ hermon_qphdl_t qphdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid QP handle pointer */ - if (qp == NULL) { - return (IBT_QP_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and QP handle */ state = (hermon_state_t *)hca; qphdl = (hermon_qphdl_t)qp; @@ -808,16 +782,6 @@ hermon_state_t *state; hermon_qpn_entry_t *entry; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid QP handle pointer */ - if (qpnh == NULL) { - return (IBT_QP_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and QP handle */ state = (hermon_state_t *)hca; entry = (hermon_qpn_entry_t *)qpnh; @@ -842,16 +806,6 @@ hermon_qphdl_t qphdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid QP handle */ - if (qp == NULL) { - return (IBT_QP_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and QP handle */ state = (hermon_state_t *)hca; qphdl = (hermon_qphdl_t)qp; @@ -876,16 +830,6 @@ hermon_qphdl_t qphdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid QP handle */ - if (qp == NULL) { - return (IBT_QP_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and QP handle */ state = (hermon_state_t *)hca; qphdl = (hermon_qphdl_t)qp; @@ -910,14 +854,8 @@ hermon_cqhdl_t cqhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; - /* Allocate the CQ */ status = hermon_cq_alloc(state, ibt_cqhdl, attr_p, actual_size, &cqhdl, HERMON_NOSLEEP); @@ -944,16 +882,6 @@ hermon_cqhdl_t cqhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid CQ handle pointer */ - if (cq == NULL) { - return (IBT_CQ_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and CQ handle */ state = (hermon_state_t *)hca; cqhdl = (hermon_cqhdl_t)cq; @@ -974,26 +902,19 @@ hermon_ci_query_cq(ibc_hca_hdl_t hca, ibc_cq_hdl_t cq, uint_t *entries_p, uint_t *count_p, uint_t *usec_p, ibt_cq_handler_id_t *hid_p) { + hermon_state_t *state; hermon_cqhdl_t cqhdl; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid CQ handle pointer */ - if (cq == NULL) { - return (IBT_CQ_HDL_INVALID); - } - /* Grab the CQ handle */ + state = (hermon_state_t *)hca; cqhdl = (hermon_cqhdl_t)cq; /* Query the current CQ size */ *entries_p = cqhdl->cq_bufsz; *count_p = cqhdl->cq_intmod_count; *usec_p = cqhdl->cq_intmod_usec; - *hid_p = 0; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cqhdl)) + *hid_p = HERMON_EQNUM_TO_HID(state, cqhdl->cq_eqnum); return (IBT_SUCCESS); } @@ -1012,16 +933,6 @@ hermon_cqhdl_t cqhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid CQ handle pointer */ - if (cq == NULL) { - return (IBT_CQ_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and CQ handle */ state = (hermon_state_t *)hca; cqhdl = (hermon_cqhdl_t)cq; @@ -1048,16 +959,6 @@ hermon_cqhdl_t cqhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid CQ handle pointer */ - if (cq == NULL) { - return (IBT_CQ_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and CQ handle */ state = (hermon_state_t *)hca; cqhdl = (hermon_cqhdl_t)cq; @@ -1076,24 +977,14 @@ */ /* ARGSUSED */ static ibt_status_t -hermon_ci_alloc_cq_sched(ibc_hca_hdl_t hca, ibt_cq_sched_flags_t flags, - ibc_cq_handler_attr_t *handler_attr_p) +hermon_ci_alloc_cq_sched(ibc_hca_hdl_t hca, ibt_cq_sched_attr_t *attr, + ibc_sched_hdl_t *sched_hdl_p) { - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* - * This is an unsupported interface for the Hermon driver. Hermon - * does not support CQ scheduling classes. - */ - - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*handler_attr_p)) - handler_attr_p->h_id = NULL; - handler_attr_p->h_pri = 0; - handler_attr_p->h_bind = NULL; - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*handler_attr_p)) - return (IBT_SUCCESS); + int status; + + status = hermon_cq_sched_alloc((hermon_state_t *)hca, attr, + (hermon_cq_sched_t **)sched_hdl_p); + return (status); } @@ -1102,27 +993,34 @@ * Free a CQ scheduling class resource * Context: Can be called only from user or kernel context. */ +/* ARGSUSED */ static ibt_status_t -hermon_ci_free_cq_sched(ibc_hca_hdl_t hca, ibt_cq_handler_id_t handler_id) +hermon_ci_free_cq_sched(ibc_hca_hdl_t hca, ibc_sched_hdl_t sched_hdl) { - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* - * This is an unsupported interface for the Hermon driver. Hermon - * does not support CQ scheduling classes. Returning a NULL - * hint is the way to treat this as unsupported. We check for - * the expected NULL, but do not fail in any case. - */ - if (handler_id != NULL) { - cmn_err(CE_NOTE, "hermon_ci_free_cq_sched: unexpected " - "non-NULL handler_id\n"); - } + int status; + + status = hermon_cq_sched_free((hermon_state_t *)hca, + (hermon_cq_sched_t *)sched_hdl); + return (status); +} + +static ibt_status_t +hermon_ci_query_cq_handler_id(ibc_hca_hdl_t hca, + ibt_cq_handler_id_t hid, ibt_cq_handler_attr_t *attrs) +{ + hermon_state_t *state; + + state = (hermon_state_t *)hca; + if (!HERMON_HID_VALID(state, hid)) + return (IBT_CQ_HID_INVALID); + if (attrs == NULL) + return (IBT_INVALID_PARAM); + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attrs)) + attrs->cha_ih = state->hs_intrmsi_hdl[hid - 1]; + attrs->cha_dip = state->hs_dip; return (IBT_SUCCESS); } - /* * hermon_ci_alloc_eec() * Allocate an End-to-End context @@ -1221,16 +1119,6 @@ ASSERT(mr_p != NULL); ASSERT(mr_desc != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid PD handle pointer */ - if (pd == NULL) { - return (IBT_PD_HDL_INVALID); - } - /* * Validate the access flags. Both Remote Write and Remote Atomic * require the Local Write flag to be set @@ -1303,16 +1191,6 @@ ASSERT(mr_p != NULL); ASSERT(mr_desc != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid PD handle pointer */ - if (pd == NULL) { - return (IBT_PD_HDL_INVALID); - } - /* * Validate the access flags. Both Remote Write and Remote Atomic * require the Local Write flag to be set @@ -1374,16 +1252,6 @@ hermon_mrhdl_t mrhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid memory region handle */ - if (mr == NULL) { - return (IBT_MR_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; mrhdl = (hermon_mrhdl_t)mr; @@ -1412,16 +1280,6 @@ ASSERT(mr_attr != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for MemRegion handle */ - if (mr == NULL) { - return (IBT_MR_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and MR handle */ state = (hermon_state_t *)hca; mrhdl = (hermon_mrhdl_t)mr; @@ -1454,20 +1312,6 @@ ASSERT(mr_p != NULL); ASSERT(mr_desc != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid PD handle pointer */ - if (pd == NULL) { - return (IBT_PD_HDL_INVALID); - } - - /* Check for valid memory region handle */ - if (mr == NULL) { - return (IBT_MR_HDL_INVALID); - } /* * Validate the access flags. Both Remote Write and Remote Atomic * require the Local Write flag to be set @@ -1538,16 +1382,6 @@ ASSERT(mr_new != NULL); ASSERT(mr_desc != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid memory region handle */ - if (mr == NULL) { - return (IBT_MR_HDL_INVALID); - } - /* Grab the Hermon softstate pointer, mrhdl, and pdhdl */ state = (hermon_state_t *)hca; mrhdl = (hermon_mrhdl_t)mr; @@ -1609,16 +1443,6 @@ ASSERT(mr_new != NULL); ASSERT(mr_desc != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid memory region handle */ - if (mr == NULL) { - return (IBT_MR_HDL_INVALID); - } - /* Grab the Hermon softstate pointer, mrhdl, and pdhdl */ state = (hermon_state_t *)hca; mrhdl = (hermon_mrhdl_t)mr; @@ -1669,11 +1493,6 @@ ASSERT(mr_segs != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; @@ -1700,16 +1519,6 @@ ASSERT(mw_p != NULL); ASSERT(rkey_p != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid PD handle pointer */ - if (pd == NULL) { - return (IBT_PD_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and PD handle */ state = (hermon_state_t *)hca; pdhdl = (hermon_pdhdl_t)pd; @@ -1741,16 +1550,6 @@ hermon_mwhdl_t mwhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid MW handle */ - if (mw == NULL) { - return (IBT_MW_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and MW handle */ state = (hermon_state_t *)hca; mwhdl = (hermon_mwhdl_t)mw; @@ -1766,6 +1565,7 @@ * Return the attributes of the specified Memory Window * Context: Can be called from interrupt or base context. */ +/* ARGSUSED */ static ibt_status_t hermon_ci_query_mw(ibc_hca_hdl_t hca, ibc_mw_hdl_t mw, ibt_mw_query_attr_t *mw_attr_p) @@ -1774,16 +1574,6 @@ ASSERT(mw_attr_p != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid MemWin handle */ - if (mw == NULL) { - return (IBT_MW_HDL_INVALID); - } - /* Query the memory window pointer and fill in the return values */ mwhdl = (hermon_mwhdl_t)mw; mutex_enter(&mwhdl->mr_lock); @@ -1796,6 +1586,71 @@ /* + * hermon_ci_register_dma_mr() + * Allocate a memory region that maps physical addresses. + * Context: Can be called only from user or kernel context. + */ +/* ARGSUSED */ +static ibt_status_t +hermon_ci_register_dma_mr(ibc_hca_hdl_t hca, ibc_pd_hdl_t pd, + ibt_dmr_attr_t *mr_attr, void *ibtl_reserved, ibc_mr_hdl_t *mr_p, + ibt_mr_desc_t *mr_desc) +{ + hermon_state_t *state; + hermon_pdhdl_t pdhdl; + hermon_mrhdl_t mrhdl; + int status; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_desc)) + + ASSERT(mr_attr != NULL); + ASSERT(mr_p != NULL); + ASSERT(mr_desc != NULL); + + /* + * Validate the access flags. Both Remote Write and Remote Atomic + * require the Local Write flag to be set + */ + if (((mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) || + (mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && + !(mr_attr->dmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)) { + return (IBT_MR_ACCESS_REQ_INVALID); + } + + /* Grab the Hermon softstate pointer and PD handle */ + state = (hermon_state_t *)hca; + pdhdl = (hermon_pdhdl_t)pd; + + status = hermon_dma_mr_register(state, pdhdl, mr_attr, &mrhdl); + if (status != DDI_SUCCESS) { + return (status); + } + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mrhdl)) + + /* Fill in the mr_desc structure */ + mr_desc->md_vaddr = mr_attr->dmr_paddr; + mr_desc->md_lkey = mrhdl->mr_lkey; + /* Only set RKey if remote access was requested */ + if ((mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) || + (mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) || + (mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_READ)) { + mr_desc->md_rkey = mrhdl->mr_rkey; + } + + /* + * If region is mapped for streaming (i.e. noncoherent), then set + * sync is required + */ + mr_desc->md_sync_required = B_FALSE; + + /* Return the Hermon MR handle */ + *mr_p = (ibc_mr_hdl_t)mrhdl; + + return (IBT_SUCCESS); +} + + +/* * hermon_ci_attach_mcg() * Attach a Queue Pair to a Multicast Group * Context: Can be called only from user or kernel context. @@ -1808,16 +1663,6 @@ hermon_qphdl_t qphdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid QP handle pointer */ - if (qp == NULL) { - return (IBT_QP_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and QP handles */ state = (hermon_state_t *)hca; qphdl = (hermon_qphdl_t)qp; @@ -1841,16 +1686,6 @@ hermon_qphdl_t qphdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid QP handle pointer */ - if (qp == NULL) { - return (IBT_QP_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and QP handle */ state = (hermon_state_t *)hca; qphdl = (hermon_qphdl_t)qp; @@ -1877,16 +1712,6 @@ ASSERT(wr_p != NULL); ASSERT(num_wr != 0); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid QP handle pointer */ - if (qp == NULL) { - return (IBT_QP_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and QP handle */ state = (hermon_state_t *)hca; qphdl = (hermon_qphdl_t)qp; @@ -1916,15 +1741,6 @@ state = (hermon_state_t *)hca; qphdl = (hermon_qphdl_t)qp; - if (state == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid QP handle pointer */ - if (qphdl == NULL) { - return (IBT_QP_HDL_INVALID); - } - /* Post the receive WQEs */ status = hermon_post_recv(state, qphdl, wr_p, num_wr, num_posted_p); return (status); @@ -1946,16 +1762,6 @@ ASSERT(wc_p != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid CQ handle pointer */ - if (cq == NULL) { - return (IBT_CQ_HDL_INVALID); - } - /* Check for valid num_wc field */ if (num_wc == 0) { return (IBT_INVALID_PARAM); @@ -1984,16 +1790,6 @@ hermon_cqhdl_t cqhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid CQ handle pointer */ - if (cq_hdl == NULL) { - return (IBT_CQ_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and CQ handle */ state = (hermon_state_t *)hca; cqhdl = (hermon_cqhdl_t)cq_hdl; @@ -2016,11 +1812,6 @@ hermon_state_t *state; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; @@ -2043,11 +1834,6 @@ hermon_state_t *state; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; @@ -2074,18 +1860,7 @@ hermon_srq_info_t srqinfo; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - state = (hermon_state_t *)hca; - - /* Check for valid PD handle pointer */ - if (pd == NULL) { - return (IBT_PD_HDL_INVALID); - } - pdhdl = (hermon_pdhdl_t)pd; srqinfo.srqi_ibt_srqhdl = ibt_srq; @@ -2117,11 +1892,6 @@ hermon_srqhdl_t srqhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - state = (hermon_state_t *)hca; /* Check for valid SRQ handle pointer */ @@ -2141,22 +1911,13 @@ * Query properties of a Shared Receive Queue (SRQ) * Context: Can be called from interrupt or base context. */ +/* ARGSUSED */ static ibt_status_t hermon_ci_query_srq(ibc_hca_hdl_t hca, ibc_srq_hdl_t srq, ibc_pd_hdl_t *pd_p, ibt_srq_sizes_t *sizes_p, uint_t *limit_p) { hermon_srqhdl_t srqhdl; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid SRQ handle pointer */ - if (srq == NULL) { - return (IBT_SRQ_HDL_INVALID); - } - srqhdl = (hermon_srqhdl_t)srq; mutex_enter(&srqhdl->srq_lock); @@ -2189,18 +1950,7 @@ uint_t resize_supported, cur_srq_size; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - state = (hermon_state_t *)hca; - - /* Check for valid SRQ handle pointer */ - if (srq == NULL) { - return (IBT_SRQ_HDL_INVALID); - } - srqhdl = (hermon_srqhdl_t)srq; /* @@ -2282,18 +2032,7 @@ hermon_srqhdl_t srqhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - state = (hermon_state_t *)hca; - - /* Check for valid SRQ handle pointer */ - if (srq == NULL) { - return (IBT_SRQ_HDL_INVALID); - } - srqhdl = (hermon_srqhdl_t)srq; status = hermon_post_srq(state, srqhdl, wr, num_wr, num_posted_p); @@ -2436,7 +2175,7 @@ /* * hermon_ci_map_mem_area() - * Context: Can be called from interrupt or base context. + * Context: Can be called from user or base context. * * Creates the memory mapping suitable for a subsequent posting of an * FRWR work request. All the info about the memory area for the @@ -2476,6 +2215,8 @@ /* FRWR */ state = (hermon_state_t *)hca; + if (!(state->hs_ibtfinfo.hca_attr->hca_flags2 & IBT_HCA2_MEM_MGT_EXT)) + return (IBT_NOT_SUPPORTED); hermon_dma_attr_init(state, &dma_attr); #ifdef __sparc if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) @@ -2561,7 +2302,8 @@ len = 0; pagesize = PAGESIZE; kaddr = (uint64_t *)(void *)ma_hdl->h_ma_kaddr; - kcookie_paddr = kcookie.dmac_laddress + HERMON_PAGEMASK; + kcookie.dmac_size += kcookie.dmac_laddress & HERMON_PAGEOFFSET; + kcookie_paddr = kcookie.dmac_laddress & HERMON_PAGEMASK; khdl = ma_hdl->h_ma_list_hdl; while (cookie_cnt-- > 0) { addr = dmacookie.dmac_laddress; @@ -2704,13 +2446,6 @@ _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wr)) - if (mi_hdl_p == NULL) - return (IBT_MI_HDL_INVALID); - - /* Check for valid HCA handle */ - if (hca == NULL) - return (IBT_HCA_HDL_INVALID); - state = (hermon_state_t *)hca; hermon_dma_attr_init(state, &dma_attr); #ifdef __sparc @@ -2726,7 +2461,8 @@ if (iov_attr->iov_lso_hdr_sz) max_nds -= (iov_attr->iov_lso_hdr_sz + sizeof (uint32_t) + 0xf) >> 4; /* 0xf is for rounding up to a multiple of 16 */ - rsvd_lkey = state->hs_devlim.rsv_lkey; + rsvd_lkey = (iov_attr->iov_flags & IBT_IOV_ALT_LKEY) ? + iov_attr->iov_alt_lkey : state->hs_devlim.rsv_lkey; if ((iov_attr->iov_flags & IBT_IOV_NOSLEEP) == 0) { kmflag = KM_SLEEP; callback = DDI_DMA_SLEEP; @@ -2875,9 +2611,10 @@ return (IBT_SUCCESS); } -/* Allocate L_Key */ /* * hermon_ci_alloc_lkey() + * Allocate an empty memory region for use with FRWR. + * Context: Can be called from user or base context. */ /* ARGSUSED */ static ibt_status_t @@ -2885,7 +2622,41 @@ ibt_lkey_flags_t flags, uint_t list_sz, ibc_mr_hdl_t *mr_p, ibt_pmr_desc_t *mem_desc_p) { - return (IBT_NOT_SUPPORTED); + hermon_state_t *state; + hermon_pdhdl_t pdhdl; + hermon_mrhdl_t mrhdl; + int status; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mem_desc_p)) + + ASSERT(mr_p != NULL); + ASSERT(mem_desc_p != NULL); + + state = (hermon_state_t *)hca; + pdhdl = (hermon_pdhdl_t)pd; + + if (!(state->hs_ibtfinfo.hca_attr->hca_flags2 & IBT_HCA2_MEM_MGT_EXT)) + return (IBT_NOT_SUPPORTED); + + status = hermon_mr_alloc_lkey(state, pdhdl, flags, list_sz, &mrhdl); + if (status != DDI_SUCCESS) { + return (status); + } + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mrhdl)) + + /* Fill in the mem_desc_p structure */ + mem_desc_p->pmd_iova = 0; + mem_desc_p->pmd_phys_buf_list_sz = list_sz; + mem_desc_p->pmd_lkey = mrhdl->mr_lkey; + /* Only set RKey if remote access was requested */ + if (flags & IBT_KEY_REMOTE) { + mem_desc_p->pmd_rkey = mrhdl->mr_rkey; + } + mem_desc_p->pmd_sync_required = B_FALSE; + + /* Return the Hermon MR handle */ + *mr_p = (ibc_mr_hdl_t)mrhdl; + return (IBT_SUCCESS); } /* Physical Register Memory Region */ @@ -2928,11 +2699,6 @@ hermon_fmrhdl_t fmrpoolhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - state = (hermon_state_t *)hca; /* Check for valid PD handle pointer */ @@ -2975,18 +2741,7 @@ hermon_fmrhdl_t fmrpoolhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - state = (hermon_state_t *)hca; - - /* Check for valid FMR Pool handle */ - if (fmr_pool == NULL) { - return (IBT_FMR_POOL_HDL_INVALID); - } - fmrpoolhdl = (hermon_fmrhdl_t)fmr_pool; status = hermon_destroy_fmr_pool(state, fmrpoolhdl); @@ -3005,20 +2760,9 @@ hermon_fmrhdl_t fmrpoolhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - state = (hermon_state_t *)hca; - /* Check for valid FMR Pool handle */ - if (fmr_pool == NULL) { - return (IBT_FMR_POOL_HDL_INVALID); - } - fmrpoolhdl = (hermon_fmrhdl_t)fmr_pool; - status = hermon_flush_fmr_pool(state, fmrpoolhdl); return (status); } @@ -3044,19 +2788,9 @@ ASSERT(mr_p != NULL); ASSERT(mem_desc_p != NULL); - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; - /* Check for valid FMR Pool handle */ - if (fmr_pool == NULL) { - return (IBT_FMR_POOL_HDL_INVALID); - } - fmrpoolhdl = (hermon_fmrhdl_t)fmr_pool; status = hermon_register_physical_fmr(state, fmrpoolhdl, mem_pattr, @@ -3097,16 +2831,6 @@ hermon_mrhdl_t mrhdl; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid memory region handle */ - if (mr == NULL) { - return (IBT_MR_HDL_INVALID); - } - /* Grab the Hermon softstate pointer */ state = (hermon_state_t *)hca; mrhdl = (hermon_mrhdl_t)mr; @@ -3179,16 +2903,6 @@ hermon_state_t *state; int status; - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid mem_alloc_hdl_p handle pointer */ - if (mem_alloc_hdl_p == NULL) { - return (IBT_MEM_ALLOC_HDL_INVALID); - } - /* Grab the Hermon softstate pointer and mem handle */ state = (hermon_state_t *)hca; @@ -3210,19 +2924,10 @@ * hermon_ci_free_io_mem() * Unbind handl and free the memory */ +/* ARGSUSED */ static ibt_status_t hermon_ci_free_io_mem(ibc_hca_hdl_t hca, ibc_mem_alloc_hdl_t mem_alloc_hdl) { - /* Check for valid HCA handle */ - if (hca == NULL) { - return (IBT_HCA_HDL_INVALID); - } - - /* Check for valid mem_alloc_hdl handle pointer */ - if (mem_alloc_hdl == NULL) { - return (IBT_MEM_ALLOC_HDL_INVALID); - } - /* Unbind the handles and free the memory */ (void) ddi_dma_unbind_handle(mem_alloc_hdl->ibc_dma_hdl); ddi_dma_mem_free(&mem_alloc_hdl->ibc_acc_hdl);
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_cmd.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_cmd.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -399,22 +398,12 @@ * hermon_cmd_complete_handler() * Context: Called only from interrupt context. */ +/* ARGSUSED */ int hermon_cmd_complete_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) { hermon_cmd_t *cmdp; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_COMMAND_INTF_COMP || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* * Find the outstanding command pointer based on value returned @@ -1596,7 +1585,7 @@ /* * hermon_map_cmd_post() - * Context: Can be called only from attach() path + * Context: Can be called only from user or kernel context * * Generic routine to map FW, ICMA, and ICM. */ @@ -2954,6 +2943,7 @@ * Note: This common function should be used only with the following * opcodes: QUERY_DEV_LIM, QUERY_FW, QUERY_DDR, QUERY_ADAPTER, QUERY_PORT * QUERY_HCA, QUERY_MPT, QUERY_EQ, QUERY_CQ, and QUERY_QP. + * With support of FCoIB, this also supports QUERY_FC. */ int hermon_cmn_query_cmd_post(hermon_state_t *state, uint_t opcode, uint_t opmod, @@ -3185,6 +3175,36 @@ /* + * hermon_get_heart_beat_rq_cmd_post() + * Context: Can be called only from kernel or interrupt context + */ +int +hermon_get_heart_beat_rq_cmd_post(hermon_state_t *state, uint_t qpindx, + uint64_t *outparm) +{ + hermon_cmd_post_t cmd; + int status; + + bzero((void *)&cmd, sizeof (hermon_cmd_post_t)); + + /* Setup and post the Hermon "HEART_BEAT_RQ" command */ + cmd.cp_inparm = 0; + cmd.cp_outparm = 0; + cmd.cp_inmod = qpindx; + cmd.cp_opcode = HEART_BEAT_RQ; + cmd.cp_opmod = 0; + cmd.cp_flags = HERMON_CMD_NOSLEEP_SPIN; + status = hermon_cmd_post(state, &cmd); + + /* + * Return immediate out param through argument pointer. + */ + *outparm = cmd.cp_outparm; + return (status); +} + + +/* * hermon_mgid_hash_cmd_post() * Context: Can be called from interrupt or base context. */ @@ -3455,6 +3475,135 @@ } +/* + * hermon_config_fc_cmd_post() + * Context: Can be called from user or kernel context. + * This can do either a basic config passing in + * *hermon_hw_config_fc_basic_s, or config the N_Port table. + * passing in pointer to an array of 32-bit id's + * Note that either one needs to be cast to void * + */ +int +hermon_config_fc_cmd_post(hermon_state_t *state, void *cfginfo, int enable, + int selector, int n_ports, int portnum, uint_t sleepflag) +{ + hermon_mbox_info_t mbox_info; + hermon_cmd_post_t cmd; + uint64_t data; + uint32_t portid; + uint_t size; + int status, i; + + bzero((void *)&cmd, sizeof (hermon_cmd_post_t)); + + /* Get an "In" mailbox for the command */ + mbox_info.mbi_alloc_flags = HERMON_ALLOC_INMBOX; + status = hermon_mbox_alloc(state, &mbox_info, sleepflag); + if (status != HERMON_CMD_SUCCESS) { + return (status); + } + + /* Copy the appropriate info into mailbox */ + if (selector == HERMON_HW_FC_CONF_BASIC) { /* basic info */ + size = sizeof (hermon_hw_config_fc_basic_t); + for (i = 0; i < (size >> 3); i++) { + data = ((uint64_t *)cfginfo)[i]; + ddi_put64(mbox_info.mbi_in->mb_acchdl, + ((uint64_t *)mbox_info.mbi_in->mb_addr + i), data); + } + } else { /* NPort config */ + ASSERT(selector == HERMON_HW_FC_CONF_NPORT); + size = n_ports * sizeof (uint32_t); + /* + * n_ports must == number queried from card + * + * passed in is an array but for little endian needs to + * be rearranged in the mbox + */ + for (i = 0; i < (size >> 3); i++) { + portid = ((uint32_t *)cfginfo)[i * 2]; + data = (uint64_t)portid << 32; + if (i * 2 < n_ports) { + portid = ((uint32_t *)cfginfo)[i * 2 + 1]; + data |= portid; + } + ddi_put64(mbox_info.mbi_in->mb_acchdl, + ((uint64_t *)mbox_info.mbi_in->mb_addr + i), data); + } + } + + /* Sync the mailbox for the device to read */ + hermon_mbox_sync(mbox_info.mbi_in, 0, size, DDI_DMA_SYNC_FORDEV); + + /* Setup and post Hermon "CONFIG_FC" command */ + cmd.cp_inparm = mbox_info.mbi_in->mb_mapaddr; + cmd.cp_outparm = 0; + cmd.cp_inmod = (uint32_t)(selector | portnum); + cmd.cp_opcode = CONFIG_FC; + cmd.cp_opmod = (uint16_t)enable; + cmd.cp_flags = sleepflag; + status = hermon_cmd_post(state, &cmd); + + /* Free the mailbox */ + hermon_mbox_free(state, &mbox_info); + return (status); +} + +/* + * hermon_sense_port_post() - used to send protocol running on a port + * Context: Can be called from interrupt or base context + */ + +int +hermon_sense_port_post(hermon_state_t *state, uint_t portnum, + uint32_t *protocol) +{ + hermon_cmd_post_t cmd; + int status; + + bzero((void *)&cmd, sizeof (hermon_cmd_post_t)); + + /* Setup and post Hermon "CMD_NOP" command */ + cmd.cp_inparm = 0; + cmd.cp_outparm = 0; + cmd.cp_inmod = (uint32_t)portnum; + cmd.cp_opcode = SENSE_PORT; + cmd.cp_opmod = 0; + cmd.cp_flags = HERMON_CMD_NOSLEEP_SPIN; + status = hermon_cmd_post(state, &cmd); + if (status == HERMON_CMD_SUCCESS) *protocol = (uint32_t)cmd.cp_outparm; + return (status); +} + + +/* + * CONFIG_INT_MOD - used to configure INTERRUPT moderation + * if command fails, *health is invalid/undefined + */ +int +hermon_config_int_mod(hermon_state_t *state, uint_t min_delay, uint_t vector) +{ + hermon_cmd_post_t cmd; + int status; + uint64_t inparm = 0; + + bzero((void *)&cmd, sizeof (hermon_cmd_post_t)); + + /* Setup and post Hermon "CONFIG_INT_MOD" command */ + inparm = (((uint64_t)min_delay & 0xFFFF) << 48) || + (((uint64_t)vector & 0xFFFF) << 32); + + cmd.cp_inparm = inparm; + cmd.cp_outparm = 0; + cmd.cp_inmod = 0; + cmd.cp_opcode = CONFIG_INT_MOD; + cmd.cp_opmod = 0; + cmd.cp_flags = HERMON_CMD_NOSLEEP_SPIN; + status = hermon_cmd_post(state, &cmd); + return (status); +} + + int hermon_nop_post(hermon_state_t *state, uint_t interval, uint_t sleep) { @@ -3476,6 +3625,26 @@ } int +hermon_hw_health_check(hermon_state_t *state, int *health) +{ + hermon_cmd_post_t cmd; + int status; + + bzero((void *)&cmd, sizeof (hermon_cmd_post_t)); + + /* Setup and post Hermon "CMD_NOP" command */ + cmd.cp_inparm = 0; + cmd.cp_outparm = 0; + cmd.cp_inmod = 0; + cmd.cp_opcode = HW_HEALTH_CHECK; + cmd.cp_opmod = 0; + cmd.cp_flags = HERMON_CMD_NOSLEEP_SPIN; + status = hermon_cmd_post(state, &cmd); + *health = (int)cmd.cp_outparm; + return (status); +} + +int hermon_setdebug_post(hermon_state_t *state) { hermon_cmd_post_t cmd;
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_cq.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_cq.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -59,8 +58,6 @@ hermon_hw_cqe_t *cqe, ibt_wc_t *wc); static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe, ibt_wc_t *wc); -static void hermon_cqe_sync(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe, - uint_t flag); /* @@ -85,6 +82,7 @@ uint32_t log_cq_size, uarpg; uint_t cq_is_umap; uint32_t status, flag; + hermon_cq_sched_t *cq_schedp; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr)) @@ -143,6 +141,8 @@ _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq)) cq->cq_is_umap = cq_is_umap; cq->cq_cqnum = cqc->hr_indx; /* just use index, implicit in Hermon */ + cq->cq_intmod_count = 0; + cq->cq_intmod_usec = 0; /* * If this will be a user-mappable CQ, then allocate an entry for @@ -252,9 +252,40 @@ } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) - /* Sync entire CQ for use by the hardware. */ - (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0, - cq->cq_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV); + cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state); + if (cq_attr->cq_flags & IBT_CQ_HID) { + if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) { + IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x", + cq_attr->cq_hid); + status = IBT_INVALID_PARAM; + goto cqalloc_fail5; + } + cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid); + IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum); + } else { + cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched; + if (cq_schedp == NULL) { + cq_schedp = &state->hs_cq_sched_default; + } else if (cq_schedp != &state->hs_cq_sched_default) { + int i; + hermon_cq_sched_t *tmp; + + tmp = state->hs_cq_sched_array; + for (i = 0; i < state->hs_cq_sched_array_size; i++) + if (cq_schedp == &tmp[i]) + break; /* found it */ + if (i >= state->hs_cq_sched_array_size) { + cmn_err(CE_CONT, "!Invalid cq_sched argument: " + "ignored\n"); + cq_schedp = &state->hs_cq_sched_default; + } + } + cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, + HERMON_CQSCHED_NEXT_HID(cq_schedp)); + IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, " + "eqn %d", cq_schedp->cqs_start_hid - 1, + cq_schedp->cqs_len, cq->cq_eqnum); + } /* * Fill in the CQC entry. This is the final step before passing @@ -266,9 +297,6 @@ */ bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); - cq->cq_eqnum = HERMON_CQ_EQNUM_GET(state); - cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state); - cqc_entry.state = HERMON_CQ_DISARMED; cqc_entry.pg_offs = cq->cq_cqinfo.qa_pgoffs >> 5; cqc_entry.log_cq_sz = log_cq_size; @@ -327,8 +355,7 @@ * Put CQ handle in Hermon CQNum-to-CQHdl list. Then fill in the * "actual_size" and "cqhdl" and return success */ - ASSERT(state->hs_cqhdl[cqc->hr_indx] == NULL); - state->hs_cqhdl[cqc->hr_indx] = cq; + hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq); /* * If this is a user-mappable CQ, then we need to insert the previously @@ -458,7 +485,7 @@ * in-progress events to detect that the CQ corresponding to this * number has been freed. */ - state->hs_cqhdl[cqc->hr_indx] = NULL; + hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL); mutex_exit(&cq->cq_lock); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq)) @@ -654,9 +681,6 @@ } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) - (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0, - new_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV); - /* * Now we grab the CQ lock. Since we will be updating the actual * CQ location and the producer/consumer indexes, we should hold @@ -733,10 +757,6 @@ bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo), sizeof (struct hermon_qalloc_info_s)); - /* sync the new buffer for use by the device */ - (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0, - new_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV); - /* now, save the address in the cq_handle */ cq->cq_resize_hdl = resize_hdl; @@ -763,8 +783,6 @@ /* * hermon_cq_modify() * Context: Can be called base context. - * - * XXX - still need to implement use of the 'hid' argument. */ /* ARGSUSED */ int @@ -784,8 +802,8 @@ cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag); if (status != HERMON_CMD_SUCCESS) { mutex_exit(&cq->cq_lock); - cmn_err(CE_CONT, "Hermon: MODIFY_CQ command failed: " - "%08x\n", status); + cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ " + "command failed: %08x\n", status); if (status == HERMON_CMD_INVALID_STATUS) { hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); @@ -795,6 +813,23 @@ cq->cq_intmod_count = count; cq->cq_intmod_usec = usec; } + if (hid && (hid - 1 != cq->cq_eqnum)) { + bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); + cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid); + status = hermon_modify_cq_cmd_post(state, &cqc_entry, + cq->cq_cqnum, MODIFY_EQN, sleepflag); + if (status != HERMON_CMD_SUCCESS) { + mutex_exit(&cq->cq_lock); + cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: " + "%08x\n", status); + if (status == HERMON_CMD_INVALID_STATUS) { + hermon_fm_ereport(state, HCA_SYS_ERR, + HCA_ERR_SRV_LOST); + } + return (ibc_get_ci_failure(0)); + } + cq->cq_eqnum = hid - 1; + } mutex_exit(&cq->cq_lock); return (DDI_SUCCESS); } @@ -834,7 +869,7 @@ { hermon_hw_cqe_t *cqe; uint_t opcode; - uint32_t cons_indx, wrap_around_mask; + uint32_t cons_indx, wrap_around_mask, shift, mask; uint32_t polled_cnt, spec_op = 0; int status; @@ -851,6 +886,8 @@ /* Get the consumer index */ cons_indx = cq->cq_consindx; + shift = cq->cq_log_cqsz; + mask = cq->cq_bufsz; /* * Calculate the wrap around mask. Note: This operation only works @@ -861,9 +898,6 @@ /* Calculate the pointer to the first CQ entry */ cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; - /* Sync the current CQE to read */ - hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); - /* * Keep pulling entries from the CQ until we find an entry owned by * the hardware. As long as there the CQE's owned by SW, process @@ -875,7 +909,7 @@ * completion). */ polled_cnt = 0; - while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx)) { + while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) { if (cq->cq_resize_hdl != 0) { /* in midst of resize */ /* peek at the opcode */ opcode = HERMON_CQE_OPCODE_GET(cq, cqe); @@ -891,9 +925,6 @@ /* Update the pointer to the next CQ entry */ cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; - /* Sync the next CQE to read */ - hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); - continue; } } /* in resizing CQ */ @@ -904,18 +935,12 @@ */ hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]); - /* Sync the current CQE for device */ - hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORDEV); - /* Increment the consumer index */ cons_indx = (cons_indx + 1); /* Update the pointer to the next CQ entry */ cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; - /* Sync the next CQE to read */ - hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); - /* * If we have run out of space to store work completions, * then stop and return the ones we have pulled of the CQ. @@ -927,9 +952,7 @@ /* * Now we only ring the doorbell (to update the consumer index) if - * we've actually consumed a CQ entry. If we have, for example, - * pulled from a CQE that we are still in the process of "recycling" - * for error purposes, then we would not update the consumer index. + * we've actually consumed a CQ entry. */ if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) { /* @@ -1029,23 +1052,13 @@ * hermon_cq_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ int hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) { hermon_cqhdl_t cq; uint_t cqnum; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_COMPLETION || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the CQ handle from CQ number in event descriptor */ cqnum = HERMON_EQE_CQNUM_GET(eq, eqe); @@ -1085,6 +1098,7 @@ * hermon_cq_err_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ int hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1093,18 +1107,6 @@ uint_t cqnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - - ASSERT(eqe_evttype == HERMON_EVT_CQ_ERRORS || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } HERMON_FMANOTE(state, HERMON_FMA_OVERRUN); /* Get the CQ handle from CQ number in event descriptor */ @@ -1278,7 +1280,7 @@ /* Calculate the CQ table index from the cqnum */ cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1; cqindx = cqnum & cqmask; - return (state->hs_cqhdl[cqindx]); + return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx)); } /* @@ -1318,20 +1320,18 @@ * be associated with it (e.g. whether immediate data is present). */ flags = IBT_WC_NO_FLAGS; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->hs_fcoib_may_be_running)) if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) { /* Send CQE */ switch (opcode) { case HERMON_CQE_SND_RDMAWR_IMM: - flags |= IBT_WC_IMMED_DATA_PRESENT; - /* FALLTHROUGH */ case HERMON_CQE_SND_RDMAWR: type = IBT_WRC_RDMAW; break; + case HERMON_CQE_SND_SEND_INV: case HERMON_CQE_SND_SEND_IMM: - flags |= IBT_WC_IMMED_DATA_PRESENT; - /* FALLTHROUGH */ case HERMON_CQE_SND_SEND: type = IBT_WRC_SEND; break; @@ -1356,16 +1356,70 @@ type = IBT_WRC_BIND; break; + case HERMON_CQE_SND_FRWR: + type = IBT_WRC_FAST_REG_PMR; + break; + + case HERMON_CQE_SND_LCL_INV: + type = IBT_WRC_LOCAL_INVALIDATE; + break; + default: HERMON_WARNING(state, "unknown send CQE type"); wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR; return; } + } else if ((state->hs_fcoib_may_be_running == B_TRUE) && + hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) { + type = IBT_WRC_RECV; + if (HERMON_CQE_FEXCH_DIFE(cq, cqe)) + flags |= IBT_WC_DIF_ERROR; + wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe); + wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe); + wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe); + wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe); + wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe); + wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) & + IBT_WC_DETAIL_FC_MATCH_MASK; + wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); + flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED; } else { + /* + * Parse the remaining contents of the CQE into the work + * completion. This means filling in SL, QP number, SLID, + * immediate data, etc. + * + * Note: Not all of these fields are valid in a given + * completion. Many of them depend on the actual type of + * completion. So we fill in all of the fields and leave + * it up to the IBTF and consumer to sort out which are + * valid based on their context. + */ + wc->wc_sl = HERMON_CQE_SL_GET(cq, cqe); + wc->wc_qpn = HERMON_CQE_DQPN_GET(cq, cqe); + wc->wc_slid = HERMON_CQE_DLID_GET(cq, cqe); + wc->wc_immed_data = + HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); + wc->wc_ethertype = (wc->wc_immed_data & 0xFFFF); + wc->wc_pkey_ix = (wc->wc_immed_data & + ((1 << state->hs_queryport.log_max_pkey) - 1)); + /* + * Fill in "bytes transferred" as appropriate. Also, + * if necessary, fill in the "path bits" field. + */ + wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe); + wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe); + + /* + * Check for GRH, update the flags, then fill in "wc_flags" + * field in the work completion + */ + if (HERMON_CQE_GRH_GET(cq, cqe) != 0) { + flags |= IBT_WC_GRH_PRESENT; + } /* Receive CQE */ - switch (opcode & 0x1F) { - /* for sendonly w/imm or sendlast w/imm */ + switch (opcode) { case HERMON_CQE_RCV_SEND_IMM: /* * Note: According to the PRM, all QP1 recv @@ -1380,7 +1434,7 @@ flags |= IBT_WC_IMMED_DATA_PRESENT; } /* FALLTHROUGH */ - /* for sendonly or sendlast */ + case HERMON_CQE_RCV_SEND: type = IBT_WRC_RECV; if (HERMON_CQE_IS_IPOK(cq, cqe)) { @@ -1390,14 +1444,19 @@ HERMON_CQE_IPOIB_STATUS(cq, cqe); } break; - /* for RDMAwrite only or RDMAwrite last w/imm */ + + case HERMON_CQE_RCV_SEND_INV: + type = IBT_WRC_RECV; + flags |= IBT_WC_RKEY_INVALIDATED; + wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */ + break; + case HERMON_CQE_RCV_RDMAWR_IMM: flags |= IBT_WC_IMMED_DATA_PRESENT; type = IBT_WRC_RECV_RDMAWI; break; default: - /* still don't support send/invalidate, need to add later */ HERMON_WARNING(state, "unknown recv CQE type"); wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR; @@ -1405,47 +1464,8 @@ } } wc->wc_type = type; - - /* - * Check for GRH, update the flags, then fill in "wc_flags" field - * in the work completion - */ - if (HERMON_CQE_GRH_GET(cq, cqe) != 0) { - flags |= IBT_WC_GRH_PRESENT; - } wc->wc_flags = flags; - - /* If we got here, completion status must be success */ wc->wc_status = IBT_WC_SUCCESS; - - /* - * Parse the remaining contents of the CQE into the work completion. - * This means filling in SL, QP number, SLID, immediate data, etc. - * Note: Not all of these fields are valid in a given completion. - * Many of them depend on the actual type of completion. So we fill - * in all of the fields and leave it up to the IBTF and consumer to - * sort out which are valid based on their context. - */ - wc->wc_sl = HERMON_CQE_SL_GET(cq, cqe); - wc->wc_immed_data = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); - wc->wc_qpn = HERMON_CQE_DQPN_GET(cq, cqe); - wc->wc_slid = HERMON_CQE_DLID_GET(cq, cqe); - wc->wc_ethertype = (wc->wc_immed_data & 0xFFFF); - wc->wc_pkey_ix = (wc->wc_immed_data & - ((1 << state->hs_queryport.log_max_pkey) - 1)); - /* - * Depending on whether the completion was a receive or a send - * completion, fill in "bytes transferred" as appropriate. Also, - * if necessary, fill in the "path bits" field. - */ - if (HERMON_CQE_SENDRECV_GET(cq, cqe) == HERMON_COMPLETION_RECV) { - wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe); - wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe); - - } else if ((wc->wc_type == IBT_WRC_RDMAR) || - (wc->wc_type == IBT_WRC_CSWAP) || (wc->wc_type == IBT_WRC_FADD)) { - wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe); - } } /* @@ -1474,26 +1494,26 @@ imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe); status = imm_eth_pkey_cred; if (status != HERMON_CQE_WR_FLUSHED_ERR) - IBTF_DPRINTF_L2("errcqe", "cqe %p indx %x status 0x%x " - "vendor syndrome %x", cqe, HERMON_CQE_WQECNTR_GET(cq, cqe), - status, ((uint8_t *)cqe)[26]); - + IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x " + "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe), + HERMON_CQE_WQECNTR_GET(cq, cqe), status, + HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe)); switch (status) { case HERMON_CQE_LOC_LEN_ERR: - HERMON_FMANOTE(state, HERMON_FMA_LOCLEN); + HERMON_WARNING(state, HERMON_FMA_LOCLEN); ibt_status = IBT_WC_LOCAL_LEN_ERR; break; case HERMON_CQE_LOC_OP_ERR: - HERMON_FMANOTE(state, HERMON_FMA_LOCQPOP); + HERMON_WARNING(state, HERMON_FMA_LOCQPOP); ibt_status = IBT_WC_LOCAL_QP_OP_ERR; break; case HERMON_CQE_LOC_PROT_ERR: - HERMON_FMANOTE(state, HERMON_FMA_LOCPROT); + HERMON_WARNING(state, HERMON_FMA_LOCPROT); ibt_status = IBT_WC_LOCAL_PROTECT_ERR; + IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe); if (hermon_should_panic) { - IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe); cmn_err(CE_PANIC, "Hermon intentional PANIC - " "Local Protection Error\n"); } @@ -1504,42 +1524,42 @@ break; case HERMON_CQE_MW_BIND_ERR: - HERMON_FMANOTE(state, HERMON_FMA_MWBIND); + HERMON_WARNING(state, HERMON_FMA_MWBIND); ibt_status = IBT_WC_MEM_WIN_BIND_ERR; break; case HERMON_CQE_BAD_RESPONSE_ERR: - HERMON_FMANOTE(state, HERMON_FMA_RESP); + HERMON_WARNING(state, HERMON_FMA_RESP); ibt_status = IBT_WC_BAD_RESPONSE_ERR; break; case HERMON_CQE_LOCAL_ACCESS_ERR: - HERMON_FMANOTE(state, HERMON_FMA_LOCACC); + HERMON_WARNING(state, HERMON_FMA_LOCACC); ibt_status = IBT_WC_LOCAL_ACCESS_ERR; break; case HERMON_CQE_REM_INV_REQ_ERR: - HERMON_FMANOTE(state, HERMON_FMA_REMREQ); + HERMON_WARNING(state, HERMON_FMA_REMREQ); ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR; break; case HERMON_CQE_REM_ACC_ERR: - HERMON_FMANOTE(state, HERMON_FMA_REMACC); + HERMON_WARNING(state, HERMON_FMA_REMACC); ibt_status = IBT_WC_REMOTE_ACCESS_ERR; break; case HERMON_CQE_REM_OP_ERR: - HERMON_FMANOTE(state, HERMON_FMA_REMOP); + HERMON_WARNING(state, HERMON_FMA_REMOP); ibt_status = IBT_WC_REMOTE_OP_ERR; break; case HERMON_CQE_TRANS_TO_ERR: - HERMON_FMANOTE(state, HERMON_FMA_XPORTCNT); + HERMON_WARNING(state, HERMON_FMA_XPORTCNT); ibt_status = IBT_WC_TRANS_TIMEOUT_ERR; break; case HERMON_CQE_RNRNAK_TO_ERR: - HERMON_FMANOTE(state, HERMON_FMA_RNRCNT); + HERMON_WARNING(state, HERMON_FMA_RNRCNT); ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR; break; @@ -1566,29 +1586,6 @@ /* - * hermon_cqe_sync() - * Context: Can be called from interrupt or base context. - */ -static void -hermon_cqe_sync(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe, uint_t flag) -{ - ddi_dma_handle_t dmahdl; - off_t offset; - int status; - - /* Get the DMA handle from CQ context */ - dmahdl = cq->cq_mrhdl->mr_bindinfo.bi_dmahdl; - - /* Calculate offset of next CQE */ - offset = (off_t)((uintptr_t)cqe - (uintptr_t)&cq->cq_buf[0]); - status = ddi_dma_sync(dmahdl, offset, sizeof (hermon_hw_cqe_t), flag); - if (status != DDI_SUCCESS) { - return; - } -} - - -/* * hermon_cq_resize_helper() * Context: Can be called only from user or kernel context. */ @@ -1655,6 +1652,7 @@ hermon_workq_hdr_t *wq; uint32_t cons_indx, tail_cons_indx, wrap_around_mask; uint32_t new_indx, check_indx, qpnum; + uint32_t shift, mask; int outstanding_cqes; qpnum = qp->qp_qpnum; @@ -1664,17 +1662,22 @@ wq = NULL; cq = qp->qp_rq_cqhdl; + if (cq == NULL) { + cq = qp->qp_sq_cqhdl; + } + do_send_cq: /* loop back to here if send_cq is not the same as recv_cq */ + if (cq == NULL) + return; cons_indx = cq->cq_consindx; - wrap_around_mask = (cq->cq_bufsz - 1); + shift = cq->cq_log_cqsz; + mask = cq->cq_bufsz; + wrap_around_mask = mask - 1; /* Calculate the pointer to the first CQ entry */ cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; - /* Sync the current CQE to read */ - hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); - /* * Loop through the CQ looking for entries owned by software. If an * entry is owned by software then we increment an 'outstanding_cqes' @@ -1684,7 +1687,7 @@ */ outstanding_cqes = 0; tail_cons_indx = cons_indx; - while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx)) { + while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) { /* increment total cqes count */ outstanding_cqes++; @@ -1693,9 +1696,6 @@ /* update the pointer to the next cq entry */ cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask]; - - /* sync the next cqe to read */ - hermon_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); } /* @@ -1767,3 +1767,222 @@ goto do_send_cq; } } + +/* + * hermon_get_cq_sched_list() + * Context: Only called from attach() path context + * + * Read properties, creating entries in hs_cq_sched_list with + * information about the requested "expected" and "minimum" + * number of MSI-X interrupt vectors per list entry. + */ +static int +hermon_get_cq_sched_list(hermon_state_t *state) +{ + char **listp, ulp_prop[HERMON_CQH_MAX + 4]; + uint_t nlist, i, j, ndata; + int *data; + size_t len; + hermon_cq_sched_t *cq_schedp; + + if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip, + DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) != + DDI_PROP_SUCCESS) + return (0); + + state->hs_cq_sched_array_size = nlist; + state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist * + sizeof (hermon_cq_sched_t), KM_SLEEP); + for (i = 0; i < nlist; i++) { + if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) { + cmn_err(CE_CONT, "'cqh' property name too long\n"); + goto game_over; + } + for (j = 0; j < i; j++) { + if (strcmp(listp[j], listp[i]) == 0) { + cmn_err(CE_CONT, "Duplicate 'cqh' property\n"); + goto game_over; + } + } + (void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX); + ulp_prop[0] = 'c'; + ulp_prop[1] = 'q'; + ulp_prop[2] = 'h'; + ulp_prop[3] = '-'; + (void) strncpy(ulp_prop + 4, listp[i], len + 1); + if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip, + DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) != + DDI_PROP_SUCCESS) { + cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop); + goto game_over; + } + if (ndata != 2) { + cmn_err(CE_CONT, "property '%s' does not " + "have 2 integers\n", ulp_prop); + goto game_over_free_data; + } + cq_schedp[i].cqs_desired = data[0]; + cq_schedp[i].cqs_minimum = data[1]; + cq_schedp[i].cqs_refcnt = 0; + ddi_prop_free(data); + } + if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip, + DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) != + DDI_PROP_SUCCESS) { + cmn_err(CE_CONT, "property 'cqh-default' not found\n"); + goto game_over; + } + if (ndata != 2) { + cmn_err(CE_CONT, "property 'cqh-default' does not " + "have 2 integers\n"); + goto game_over_free_data; + } + cq_schedp = &state->hs_cq_sched_default; + cq_schedp->cqs_desired = data[0]; + cq_schedp->cqs_minimum = data[1]; + cq_schedp->cqs_refcnt = 0; + ddi_prop_free(data); + ddi_prop_free(listp); + return (1); /* game on */ + +game_over_free_data: + ddi_prop_free(data); +game_over: + cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n"); + cmn_err(CE_CONT, "completion handler groups not being used\n"); + kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t)); + state->hs_cq_sched_array_size = 0; + ddi_prop_free(listp); + return (0); +} + +/* + * hermon_cq_sched_init() + * Context: Only called from attach() path context + * + * Read the hermon.conf properties looking for cq_sched info, + * creating reserved pools of MSI-X interrupt ranges for the + * specified ULPs. + */ +int +hermon_cq_sched_init(hermon_state_t *state) +{ + hermon_cq_sched_t *cq_schedp, *defp; + int i, desired, array_size; + + mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(state->hs_intrmsi_pri)); + + mutex_enter(&state->hs_cq_sched_lock); + state->hs_cq_sched_array = NULL; + + /* initialize cq_sched_default */ + defp = &state->hs_cq_sched_default; + defp->cqs_start_hid = 1; + defp->cqs_len = state->hs_intrmsi_allocd; + defp->cqs_next_alloc = defp->cqs_len - 1; + (void) strncpy(defp->cqs_name, "default", 8); + + /* Read properties to determine which ULPs use cq_sched */ + if (hermon_get_cq_sched_list(state) == 0) + goto done; + + /* Determine if we have enough vectors, or if we have to scale down */ + desired = defp->cqs_desired; /* default desired (from hermon.conf) */ + if (desired <= 0) + goto done; /* all interrupts in the default pool */ + cq_schedp = state->hs_cq_sched_array; + array_size = state->hs_cq_sched_array_size; + for (i = 0; i < array_size; i++) + desired += cq_schedp[i].cqs_desired; + if (desired > state->hs_intrmsi_allocd) { + cmn_err(CE_CONT, "#interrupts allocated (%d) is less than " + "the #interrupts desired (%d)\n", + state->hs_intrmsi_allocd, desired); + cmn_err(CE_CONT, "completion handler groups not being used\n"); + goto done; /* all interrupts in the default pool */ + } + /* Game on. For each cq_sched group, reserve the MSI-X range */ + for (i = 0; i < array_size; i++) { + desired = cq_schedp[i].cqs_desired; + cq_schedp[i].cqs_start_hid = defp->cqs_start_hid; + cq_schedp[i].cqs_len = desired; + cq_schedp[i].cqs_next_alloc = desired - 1; + defp->cqs_len -= desired; + defp->cqs_start_hid += desired; + } + /* reset default's start allocation seed */ + state->hs_cq_sched_default.cqs_next_alloc = + state->hs_cq_sched_default.cqs_len - 1; + +done: + mutex_exit(&state->hs_cq_sched_lock); + return (IBT_SUCCESS); +} + +void +hermon_cq_sched_fini(hermon_state_t *state) +{ + mutex_enter(&state->hs_cq_sched_lock); + if (state->hs_cq_sched_array_size) { + kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) * + state->hs_cq_sched_array_size); + state->hs_cq_sched_array_size = 0; + state->hs_cq_sched_array = NULL; + } + mutex_exit(&state->hs_cq_sched_lock); + mutex_destroy(&state->hs_cq_sched_lock); +} + +int +hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr, + hermon_cq_sched_t **cq_sched_pp) +{ + hermon_cq_sched_t *cq_schedp; + int i; + char *name; + ibt_cq_sched_flags_t flags; + + flags = attr->cqs_flags; + if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) { + *cq_sched_pp = NULL; + return (IBT_SUCCESS); + } + name = attr->cqs_pool_name; + + mutex_enter(&state->hs_cq_sched_lock); + cq_schedp = state->hs_cq_sched_array; + for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) { + if (strcmp(name, cq_schedp->cqs_name) == 0) { + if (cq_schedp->cqs_len != 0) + cq_schedp->cqs_refcnt++; + break; /* found it */ + } + } + if ((i == state->hs_cq_sched_array_size) || /* not found, or */ + (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */ + cq_schedp = NULL; + mutex_exit(&state->hs_cq_sched_lock); + + *cq_sched_pp = cq_schedp; /* set to valid hdl, or to NULL */ + if ((cq_schedp == NULL) && + (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP)) + return (IBT_CQ_NO_SCHED_GROUP); + else + return (IBT_SUCCESS); +} + +int +hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp) +{ + if (cq_schedp != NULL) { + /* Just decrement refcnt */ + mutex_enter(&state->hs_cq_sched_lock); + if (cq_schedp->cqs_refcnt == 0) + HERMON_WARNING(state, "cq_sched free underflow\n"); + else + cq_schedp->cqs_refcnt--; + mutex_exit(&state->hs_cq_sched_lock); + } + return (IBT_SUCCESS); +}
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_event.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_event.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -73,7 +72,7 @@ hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe); static int hermon_srq_last_wqe_reached_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe); -static int hermon_ecc_detection_handler(hermon_state_t *state, +static int hermon_fexch_error_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe); static int hermon_no_eqhandler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe); @@ -91,7 +90,7 @@ uint_t num_eq, num_eq_init, num_eq_unmap, num_eq_rsvd; uint32_t event_mask; /* used for multiple event types */ int status, i, num_extra; - uint64_t offset; + struct hermon_sw_eq_s **eq; ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); /* initialize the FMA retry loop */ @@ -118,7 +117,8 @@ * (see below for more details). */ num_eq = HERMON_NUM_EQ_USED; - num_eq_rsvd = state->hs_devlim.num_rsvd_eq; + num_eq_rsvd = state->hs_rsvd_eqs; + eq = &state->hs_eqhdl[num_eq_rsvd]; /* * If MSI is to be used, then set intr_num to the MSI number. @@ -131,20 +131,17 @@ } else { /* If we have more than one MSI-X vector, init them. */ for (i = 0; i + 1 < state->hs_intrmsi_allocd; i++) { - status = hermon_eq_alloc(state, log_eq_size, i, - &state->hs_eqhdl[i + num_eq_rsvd]); + status = hermon_eq_alloc(state, log_eq_size, i, &eq[i]); if (status != DDI_SUCCESS) { while (--i >= 0) { (void) hermon_eq_handler_fini(state, - state->hs_eqhdl[i + num_eq_rsvd]); - (void) hermon_eq_free(state, - &state->hs_eqhdl[i + num_eq_rsvd]); + eq[i]); + (void) hermon_eq_free(state, &eq[i]); } return (DDI_FAILURE); } - (void) hermon_eq_handler_init(state, - state->hs_eqhdl[i + num_eq_rsvd], + (void) hermon_eq_handler_init(state, eq[i], HERMON_EVT_NO_MASK, hermon_cq_handler); } intr_num = i; @@ -158,7 +155,7 @@ */ for (i = 0; i < num_eq; i++) { status = hermon_eq_alloc(state, log_eq_size, intr_num, - &state->hs_eqhdl[num_eq_rsvd + num_extra + i]); + &eq[num_extra + i]); if (status != DDI_SUCCESS) { num_eq_init = i; goto all_eq_init_fail; @@ -171,15 +168,14 @@ * possible event class unmapping. */ num_eq_unmap = 0; + /* * Setup EQ0 (first avail) for use with Completion Queues. Note: We can * cast the return value to void here because, when we use the * HERMON_EVT_NO_MASK flag, it is not possible for * hermon_eq_handler_init() to return an error. */ - - (void) hermon_eq_handler_init(state, - state->hs_eqhdl[num_eq_unmap + num_extra + num_eq_rsvd], + (void) hermon_eq_handler_init(state, eq[num_eq_unmap + num_extra], HERMON_EVT_NO_MASK, hermon_cq_handler); num_eq_unmap++; @@ -193,15 +189,14 @@ * everything that has been successfully initialized, and return an * error. */ - status = hermon_eq_handler_init(state, - state->hs_eqhdl[num_eq_unmap + num_extra + num_eq_rsvd], + status = hermon_eq_handler_init(state, eq[num_eq_unmap + num_extra], HERMON_EVT_MSK_CQ_ERRORS, hermon_cq_err_handler); if (status != DDI_SUCCESS) { goto all_eq_init_fail; } + state->hs_cq_erreqnum = num_eq_unmap + num_extra + num_eq_rsvd; num_eq_unmap++; - /* * Setup EQ2 for handling most other things including: * @@ -239,12 +234,13 @@ * These events correspond to the IB affiliated asynchronous events * that are used to indicate that path migration was not successful. * + * Fibre Channel Error Event + * This event is affiliated with an Fexch QP. + * * NOTE: When an event fires on this EQ, it will demux the type and * send it to the right specific handler routine * */ - - event_mask = HERMON_EVT_MSK_PORT_STATE_CHANGE | HERMON_EVT_MSK_COMM_ESTABLISHED | @@ -257,10 +253,9 @@ HERMON_EVT_MSK_PATH_MIGRATE_FAILED | HERMON_EVT_MSK_SRQ_CATASTROPHIC_ERROR | HERMON_EVT_MSK_SRQ_LAST_WQE_REACHED | - HERMON_EVT_MSK_ECC_DETECTION; + HERMON_EVT_MSK_FEXCH_ERROR; - status = hermon_eq_handler_init(state, - state->hs_eqhdl[num_eq_unmap + num_extra + num_eq_rsvd], + status = hermon_eq_handler_init(state, eq[num_eq_unmap + num_extra], event_mask, hermon_eq_demux); if (status != DDI_SUCCESS) { goto all_eq_init_fail; @@ -276,8 +271,7 @@ * since the Arbel firmware does not currently support any such * handling), we allow these events to go to the catch-all handler. */ - status = hermon_eq_handler_init(state, - state->hs_eqhdl[num_eq_unmap + num_extra + num_eq_rsvd], + status = hermon_eq_handler_init(state, eq[num_eq_unmap + num_extra], HERMON_EVT_CATCHALL_MASK, hermon_no_eqhandler); if (status != DDI_SUCCESS) { goto all_eq_init_fail; @@ -292,10 +286,7 @@ * Run through and initialize the Consumer Index for each EQC. */ for (i = 0; i < num_eq + num_extra; i++) { - offset = ARM_EQ_INDEX(i + num_eq_rsvd); - ddi_put32(uarhdl, - (uint32_t *)((uintptr_t)state->hs_reg_uar_baseaddr + - (uint32_t)offset), 0x0); + ddi_put32(uarhdl, eq[i]->eq_doorbell, 0x0); } /* the FMA retry loop ends. */ @@ -308,13 +299,12 @@ /* Unmap any of the partially mapped EQs from above */ for (i = 0; i < num_eq_unmap + num_extra; i++) { - (void) hermon_eq_handler_fini(state, - state->hs_eqhdl[i + num_eq_rsvd]); + (void) hermon_eq_handler_fini(state, eq[i]); } /* Free up any of the partially allocated EQs from above */ for (i = 0; i < num_eq_init + num_extra; i++) { - (void) hermon_eq_free(state, &state->hs_eqhdl[i]); + (void) hermon_eq_free(state, &eq[i]); } /* If a HW error happen during ddi_pio, return DDI_FAILURE */ @@ -336,6 +326,7 @@ { uint_t num_eq, num_eq_rsvd; int status, i; + struct hermon_sw_eq_s **eq; /* * Grab the total number of supported EQs again. This is the same @@ -343,15 +334,15 @@ * initialization.) */ num_eq = HERMON_NUM_EQ_USED + state->hs_intrmsi_allocd - 1; - num_eq_rsvd = state->hs_devlim.num_rsvd_eq; + num_eq_rsvd = state->hs_rsvd_eqs; + eq = &state->hs_eqhdl[num_eq_rsvd]; /* * For each of the event queues that we initialized and mapped * earlier, attempt to unmap the events from the EQ. */ for (i = 0; i < num_eq; i++) { - status = hermon_eq_handler_fini(state, - state->hs_eqhdl[i + num_eq_rsvd]); + status = hermon_eq_handler_fini(state, eq[i]); if (status != DDI_SUCCESS) { return (DDI_FAILURE); } @@ -362,8 +353,7 @@ * earlier. */ for (i = 0; i < num_eq; i++) { - status = hermon_eq_free(state, - &state->hs_eqhdl[i + num_eq_rsvd]); + status = hermon_eq_free(state, &eq[i]); if (status != DDI_SUCCESS) { return (DDI_FAILURE); } @@ -372,50 +362,31 @@ return (DDI_SUCCESS); } + /* - * hermon_eq_arm() - * Context: called from interrupt - * - * Arms a single eq - eqn is the __logical__ eq number 0-based + * hermon_eq_reset_uar_baseaddr + * Context: Only called from attach() */ void -hermon_eq_arm(hermon_state_t *state, int eqn) +hermon_eq_reset_uar_baseaddr(hermon_state_t *state) { - uint64_t offset; - hermon_eqhdl_t eq; - uint32_t eq_ci; - ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); - - /* initialize the FMA retry loop */ - hermon_pio_init(fm_loop_cnt, fm_status, fm_test); - - offset = ARM_EQ_INDEX(eqn + state->hs_devlim.num_rsvd_eq); - eq = state->hs_eqhdl[eqn + state->hs_devlim.num_rsvd_eq]; - eq_ci = (eq->eq_consindx & HERMON_EQ_CI_MASK) | EQ_ARM_BIT; + int i, num_eq; + hermon_eqhdl_t eq, *eqh; - /* the FMA retry loop starts. */ - hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status, - fm_test); - - ddi_put32(uarhdl, - (uint32_t *)((uintptr_t)state->hs_reg_uar_baseaddr + - (uint32_t)offset), eq_ci); - - /* the FMA retry loop ends. */ - hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status, - fm_test); - - return; - -pio_error: - hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_FATAL); + num_eq = HERMON_NUM_EQ_USED + state->hs_intrmsi_allocd - 1; + eqh = &state->hs_eqhdl[state->hs_rsvd_eqs]; + for (i = 0; i < num_eq; i++) { + eq = eqh[i]; + eq->eq_doorbell = (uint32_t *) + ((uintptr_t)state->hs_reg_uar_baseaddr + + (uint32_t)ARM_EQ_INDEX(eq->eq_eqnum)); + } } /* * hermon_eq_arm_all * Context: Only called from attach() and/or detach() path contexts - * Arbel calls in interrupt, currently (initial impl) in Hermon as well */ int hermon_eq_arm_all(hermon_state_t *state) @@ -430,8 +401,8 @@ /* initialize the FMA retry loop */ hermon_pio_init(fm_loop_cnt, fm_status, fm_test); - num_eq = HERMON_NUM_EQ_USED; - num_eq_rsvd = state->hs_devlim.num_rsvd_eq; + num_eq = HERMON_NUM_EQ_USED + state->hs_intrmsi_allocd - 1; + num_eq_rsvd = state->hs_rsvd_eqs; /* the FMA retry loop starts. */ hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status, @@ -511,7 +482,7 @@ * events, whereas the "if" case deals with the required interrupt * vector that is used for all classes of events. */ - r = state->hs_devlim.num_rsvd_eq; + r = state->hs_rsvd_eqs; if (intr + 1 == state->hs_intrmsi_allocd) { /* last intr */ r += state->hs_intrmsi_allocd - 1; @@ -538,9 +509,8 @@ hermon_eq_poll(hermon_state_t *state, hermon_eqhdl_t eq) { hermon_hw_eqe_t *eqe; - uint64_t offset; int polled_some; - uint32_t cons_indx, wrap_around_mask; + uint32_t cons_indx, wrap_around_mask, shift; int (*eqfunction)(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe); ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); @@ -552,6 +522,7 @@ /* Get the consumer pointer index */ cons_indx = eq->eq_consindx; + shift = eq->eq_log_eqsz - HERMON_EQE_OWNER_SHIFT; /* * Calculate the wrap around mask. Note: This operation only works @@ -573,7 +544,7 @@ for (;;) { polled_some = 0; - while (HERMON_EQE_OWNER_IS_SW(eq, eqe)) { + while (HERMON_EQE_OWNER_IS_SW(eq, eqe, cons_indx, shift)) { /* * Call the EQ handler function. But only call if we @@ -598,8 +569,6 @@ /* Reset to hardware ownership is implicit */ - eq->eq_nexteqe++; /* for next time through */ - /* Increment the consumer index */ cons_indx++; @@ -616,16 +585,12 @@ eq->eq_consindx = cons_indx; - offset = ARM_EQ_INDEX(eq->eq_eqnum); - /* the FMA retry loop starts. */ hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status, fm_test); - ddi_put32(uarhdl, - (uint32_t *)((uintptr_t)state->hs_reg_uar_baseaddr + - (uint32_t)offset), (cons_indx & HERMON_EQ_CI_MASK) | - EQ_ARM_BIT); + ddi_put32(uarhdl, eq->eq_doorbell, + (cons_indx & HERMON_EQ_CI_MASK) | EQ_ARM_BIT); /* the FMA retry loop starts. */ hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, @@ -807,8 +772,7 @@ * Allocate the memory for Event Queue. */ eq->eq_eqinfo.qa_size = (1 << log_eq_size) * sizeof (hermon_hw_eqe_t); - eq->eq_eqinfo.qa_alloc_align = PAGESIZE; - eq->eq_eqinfo.qa_bind_align = PAGESIZE; + eq->eq_eqinfo.qa_alloc_align = eq->eq_eqinfo.qa_bind_align = PAGESIZE; eq->eq_eqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; status = hermon_queue_alloc(state, &eq->eq_eqinfo, HERMON_SLEEP); @@ -848,12 +812,6 @@ } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) - /* Sync entire EQ for use by the hardware */ - eq->eq_sync = 1; - - (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0, - eq->eq_eqinfo.qa_size, DDI_DMA_SYNC_FORDEV); - /* * Fill in the EQC entry. This is the final step before passing * ownership of the EQC entry to the Hermon hardware. We use all of @@ -904,8 +862,9 @@ eq->eq_buf = buf; eq->eq_bufsz = (1 << log_eq_size); eq->eq_log_eqsz = log_eq_size; - eq->eq_nexteqe = 0; eq->eq_mrhdl = mr; + eq->eq_doorbell = (uint32_t *)((uintptr_t)state->hs_reg_uar_baseaddr + + (uint32_t)ARM_EQ_INDEX(eq->eq_eqnum)); *eqhdl = eq; return (DDI_SUCCESS); @@ -1112,17 +1071,17 @@ break; case HERMON_EVT_LOCAL_WQ_CAT_ERROR: - HERMON_FMANOTE(state, HERMON_FMA_LOCCAT); + HERMON_WARNING(state, HERMON_FMA_LOCCAT); status = hermon_local_wq_cat_err_handler(state, eq, eqe); break; case HERMON_EVT_INV_REQ_LOCAL_WQ_ERROR: - HERMON_FMANOTE(state, HERMON_FMA_LOCINV); + HERMON_WARNING(state, HERMON_FMA_LOCINV); status = hermon_invreq_local_wq_err_handler(state, eq, eqe); break; case HERMON_EVT_LOCAL_ACC_VIO_WQ_ERROR: - HERMON_FMANOTE(state, HERMON_FMA_LOCACEQ); + HERMON_WARNING(state, HERMON_FMA_LOCACEQ); IBTF_DPRINTF_L2("async", HERMON_FMA_LOCACEQ); status = hermon_local_acc_vio_wq_err_handler(state, eq, eqe); break; @@ -1135,12 +1094,12 @@ break; case HERMON_EVT_PATH_MIGRATE_FAILED: - HERMON_FMANOTE(state, HERMON_FMA_PATHMIG); + HERMON_WARNING(state, HERMON_FMA_PATHMIG); status = hermon_path_mig_err_handler(state, eq, eqe); break; case HERMON_EVT_SRQ_CATASTROPHIC_ERROR: - HERMON_FMANOTE(state, HERMON_FMA_SRQCAT); + HERMON_WARNING(state, HERMON_FMA_SRQCAT); status = hermon_catastrophic_handler(state, eq, eqe); break; @@ -1148,8 +1107,8 @@ status = hermon_srq_last_wqe_reached_handler(state, eq, eqe); break; - case HERMON_EVT_ECC_DETECTION: - status = hermon_ecc_detection_handler(state, eq, eqe); + case HERMON_EVT_FEXCH_ERROR: + status = hermon_fexch_error_handler(state, eq, eqe); break; default: @@ -1162,6 +1121,7 @@ * hermon_port_state_change_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_port_state_change_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1170,19 +1130,8 @@ ibt_async_code_t type; uint_t subtype; uint8_t port; - uint_t eqe_evttype; char link_msg[24]; - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_PORT_STATE_CHANGE || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } - /* * Depending on the type of Port State Change event, pass the * appropriate asynch event to the IBTF. @@ -1238,6 +1187,7 @@ * hermon_comm_estbl_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_comm_estbl_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1246,17 +1196,6 @@ uint_t qpnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_COMM_ESTABLISHED || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); @@ -1298,6 +1237,7 @@ * hermon_local_wq_cat_err_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_local_wq_cat_err_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1306,17 +1246,6 @@ uint_t qpnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_LOCAL_WQ_CAT_ERROR || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); @@ -1358,6 +1287,7 @@ * hermon_invreq_local_wq_err_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_invreq_local_wq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1366,17 +1296,6 @@ uint_t qpnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_INV_REQ_LOCAL_WQ_ERROR || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); @@ -1418,6 +1337,7 @@ * hermon_local_acc_vio_wq_err_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_local_acc_vio_wq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1426,17 +1346,6 @@ uint_t qpnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_LOCAL_ACC_VIO_WQ_ERROR || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); @@ -1478,6 +1387,7 @@ * hermon_sendq_drained_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_sendq_drained_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1487,17 +1397,6 @@ ibc_async_event_t event; uint_t forward_sqd_event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_SEND_QUEUE_DRAINED || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); @@ -1554,6 +1453,7 @@ * hermon_path_mig_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_path_mig_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1562,17 +1462,6 @@ uint_t qpnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_PATH_MIGRATED || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); @@ -1614,6 +1503,7 @@ * hermon_path_mig_err_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_path_mig_err_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1622,17 +1512,6 @@ uint_t qpnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_PATH_MIGRATE_FAILED || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); @@ -1674,6 +1553,7 @@ * hermon_catastrophic_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_catastrophic_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1682,7 +1562,6 @@ uint_t qpnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; if (eq->eq_evttypemask == HERMON_EVT_MSK_LOCAL_CAT_ERROR) { HERMON_FMANOTE(state, HERMON_FMA_INTERNAL); @@ -1690,16 +1569,6 @@ return (DDI_SUCCESS); } - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_SRQ_CATASTROPHIC_ERROR || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } - /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); qp = hermon_qphdl_from_qpnum(state, qpnum); @@ -1744,6 +1613,7 @@ * hermon_srq_last_wqe_reached_handler() * Context: Only called from interrupt context */ +/* ARGSUSED */ static int hermon_srq_last_wqe_reached_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) @@ -1752,17 +1622,6 @@ uint_t qpnum; ibc_async_event_t event; ibt_async_code_t type; - uint_t eqe_evttype; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); - - ASSERT(eqe_evttype == HERMON_EVT_SRQ_LAST_WQE_REACHED || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); - - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } /* Get the QP handle from QP number in event descriptor */ qpnum = HERMON_EQE_QPNUM_GET(eq, eqe); @@ -1800,38 +1659,49 @@ } -/* - * hermon_ecc_detection_handler() - * Context: Only called from interrupt context - */ -static int -hermon_ecc_detection_handler(hermon_state_t *state, hermon_eqhdl_t eq, - hermon_hw_eqe_t *eqe) +/* ARGSUSED */ +static int hermon_fexch_error_handler(hermon_state_t *state, + hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe) { - uint_t eqe_evttype; - uint_t data; - int i; - - eqe_evttype = HERMON_EQE_EVTTYPE_GET(eq, eqe); + hermon_qphdl_t qp; + uint_t qpnum; + ibc_async_event_t event; + ibt_async_code_t type; - ASSERT(eqe_evttype == HERMON_EVT_ECC_DETECTION || - eqe_evttype == HERMON_EVT_EQ_OVERFLOW); + /* Get the QP handle from QP number in event descriptor */ + event.ev_port = HERMON_EQE_FEXCH_PORTNUM_GET(eq, eqe); + qpnum = hermon_fcoib_qpnum_from_fexch(state, + event.ev_port, HERMON_EQE_FEXCH_FEXCH_GET(eq, eqe)); + qp = hermon_qphdl_from_qpnum(state, qpnum); - if (eqe_evttype == HERMON_EVT_EQ_OVERFLOW) { - hermon_eq_overflow_handler(state, eq, eqe); - return (DDI_FAILURE); - } + event.ev_fc = HERMON_EQE_FEXCH_SYNDROME_GET(eq, eqe); /* - * The "ECC Detection Event" indicates that a correctable single-bit - * has occurred with the attached DDR. The EQE provides some - * additional information about the errored EQ. So we print a warning - * message here along with that additional information. + * If the QP handle is NULL, this is probably an indication + * that the QP has been freed already. In which case, we + * should not deliver this event. + * + * We also check that the QP number in the handle is the + * same as the QP number in the event queue entry. This + * extra check allows us to handle the case where a QP was + * freed and then allocated again in the time it took to + * handle the event queue processing. By constantly incrementing + * the non-constrained portion of the QP number every time + * a new QP is allocated, we mitigate (somewhat) the chance + * that a stale event could be passed to the client's QP + * handler. + * + * Lastly, we check if "hs_ibtfpriv" is NULL. If it is then it + * means that we've have either received this event before we + * finished attaching to the IBTF or we've received it while we + * are in the process of detaching. */ - HERMON_WARNING(state, "ECC Correctable Error Event Detected"); - for (i = 0; i < sizeof (hermon_hw_eqe_t) >> 2; i++) { - data = ((uint_t *)eqe)[i]; - cmn_err(CE_CONT, "! EQE[%02x]: %08x\n", i, data); + if ((qp != NULL) && (qp->qp_qpnum == qpnum) && + (state->hs_ibtfpriv != NULL)) { + event.ev_qp_hdl = (ibtl_qp_hdl_t)qp->qp_hdlrarg; + type = IBT_FEXCH_ERROR; + + HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event); } return (DDI_SUCCESS); @@ -1839,34 +1709,6 @@ /* - * hermon_eq_overflow_handler() - * Context: Only called from interrupt context - */ -/* ARGSUSED */ -void -hermon_eq_overflow_handler(hermon_state_t *state, hermon_eqhdl_t eq, - hermon_hw_eqe_t *eqe) -{ - uint_t error_type, data; - - ASSERT(HERMON_EQE_EVTTYPE_GET(eq, eqe) == HERMON_EVT_EQ_OVERFLOW); - - /* - * The "Event Queue Overflow Event" indicates that something has - * probably gone seriously wrong with some hardware (or, perhaps, - * with the software... though it's unlikely in this case). The EQE - * provides some additional information about the errored EQ. So we - * print a warning message here along with that additional information. - */ - error_type = HERMON_EQE_OPERRTYPE_GET(eq, eqe); - data = HERMON_EQE_OPERRDATA_GET(eq, eqe); - - HERMON_WARNING(state, "Event Queue overflow"); - cmn_err(CE_CONT, " Error type: %02x, data: %08x\n", error_type, data); -} - - -/* * hermon_no_eqhandler * Context: Only called from interrupt context */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_fcoib.c Thu Jul 29 22:10:26 2010 -0700 @@ -0,0 +1,462 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +/* + * hermon_fcoib.c + * Hermon Fibre Channel over IB routines + * + * Implements all the routines necessary for setting up, using, and + * (later) tearing down all the FCoIB state. + */ + +#include <sys/ib/adapters/hermon/hermon.h> + +/* + * hermon_fcoib_enable() + * Context: user or kernel context + */ +static int +hermon_fcoib_enable(hermon_state_t *state, int port) +{ + hermon_fcoib_t *fcoib; + hermon_hw_config_fc_basic_t config_fc_basic; + int status; + + port--; /* passed in as 1 or 2, used as 0 or 1 */ + ASSERT(port >= 0 && port < HERMON_MAX_PORTS); + fcoib = &state->hs_fcoib; + + /* Configure FCoIB on the port */ + bzero(&config_fc_basic, sizeof (config_fc_basic)); + config_fc_basic.fexch_base_hi = fcoib->hfc_fexch_base[port] >> 16; + config_fc_basic.fx_base_mpt_hi = fcoib->hfc_mpt_base[port] >> 17; + config_fc_basic.fx_base_mpt_lo = 0; + config_fc_basic.log2_num_rfci = + state->hs_ibtfinfo.hca_attr->hca_rfci_max_log2_qp; + config_fc_basic.rfci_base = fcoib->hfc_rfci_qps_per_port * port + + fcoib->hfc_rfci_rsrc->hr_indx; +#if 1 + status = hermon_config_fc_cmd_post(state, &config_fc_basic, 1, + HERMON_HW_FC_CONF_BASIC, 0, port + 1, HERMON_CMD_NOSLEEP_SPIN); +#else + status = hermon_config_fc_cmd_post(state, &config_fc_basic, 1, + HERMON_HW_FC_CONF_BASIC, 0, 0, HERMON_CMD_NOSLEEP_SPIN); +#endif + if (status != HERMON_CMD_SUCCESS) { + cmn_err(CE_CONT, "fcoib_enable failed: status 0x%x\n", status); + HERMON_WARNING(state, "fcoib_enable failed"); + return (DDI_FAILURE); + } + fcoib->hfc_port_enabled[port] = 1; + state->hs_fcoib_may_be_running = B_TRUE; + return (DDI_SUCCESS); +} + +/* + * hermon_fcoib_set_id() + * Context: user or kernel context + */ +int +hermon_fcoib_set_id(hermon_state_t *state, int port, uint32_t rfci_qpn, + uint32_t src_id) +{ + hermon_fcoib_t *fcoib; + int status; + int offset; + uint32_t *n_port_ids; + + port--; /* passed in as 1 or 2, used as 0 or 1 */ + ASSERT(port >= 0 && port < HERMON_MAX_PORTS); + fcoib = &state->hs_fcoib; + mutex_enter(&fcoib->hfc_lock); + + if (fcoib->hfc_port_enabled[port] == 0) { + if (hermon_fcoib_enable(state, port + 1) != DDI_SUCCESS) { + mutex_exit(&fcoib->hfc_lock); + return (DDI_FAILURE); + } + } + + n_port_ids = fcoib->hfc_n_port_ids[port]; + offset = rfci_qpn - fcoib->hfc_rfci_base[port]; + ASSERT(offset >= 0 && offset < fcoib->hfc_rfci_qps_per_port); + n_port_ids[offset] = src_id; + + status = hermon_config_fc_cmd_post(state, n_port_ids, 1, + HERMON_HW_FC_CONF_NPORT, fcoib->hfc_rfci_qps_per_port, + port + 1, HERMON_CMD_NOSLEEP_SPIN); + if (status != HERMON_CMD_SUCCESS) { + HERMON_WARNING(state, "fcoib_set_id failed"); + mutex_exit(&fcoib->hfc_lock); + return (DDI_FAILURE); + } + mutex_exit(&fcoib->hfc_lock); + return (DDI_SUCCESS); +} + +/* + * hermon_fcoib_get_id_idx() + * Context: user or kernel context + */ +int +hermon_fcoib_get_id_idx(hermon_state_t *state, int port, ibt_fc_attr_t *fcp) +{ + hermon_fcoib_t *fcoib; + int idx; + + port--; /* passed in as 1 or 2, used as 0 or 1 */ + ASSERT(port >= 0 && port < HERMON_MAX_PORTS); + fcoib = &state->hs_fcoib; + + idx = fcp->fc_rfci_qpn - fcoib->hfc_rfci_base[port]; + if (idx < 0 || idx >= fcoib->hfc_rfci_qps_per_port) + idx = -1; + + return (idx); +} + +/* + * hermon_fcoib_get_exch_base() + * Context: user or kernel context + */ +int +hermon_fcoib_check_exch_base_off(hermon_state_t *state, int port, + ibt_fc_attr_t *fcp) +{ + hermon_fcoib_t *fcoib; + int exch_base_off; + + port--; /* passed in as 1 or 2, used as 0 or 1 */ + ASSERT(port >= 0 && port < HERMON_MAX_PORTS); + fcoib = &state->hs_fcoib; + + exch_base_off = fcp->fc_exch_base_off; + if (exch_base_off >= fcoib->hfc_fexch_qps_per_port) + exch_base_off = -1; + + return (exch_base_off); +} + +/* + * hermon_fcoib_qpnum_from_fexch() + * Context: user, kernel, or interrupt context + */ +int +hermon_fcoib_is_fexch_qpn(hermon_state_t *state, uint_t qpnum) +{ + hermon_fcoib_t *fcoib; + + fcoib = &state->hs_fcoib; + qpnum -= fcoib->hfc_fexch_rsrc->hr_indx; + return (qpnum < fcoib->hfc_nports * fcoib->hfc_fexch_qps_per_port); +} + +/* + * hermon_fcoib_qpnum_from_fexch() + * Context: user, kernel, or interrupt context + */ +uint_t +hermon_fcoib_qpnum_from_fexch(hermon_state_t *state, int port, + uint16_t fexch) +{ + hermon_fcoib_t *fcoib; + uint_t qpnum; + + port--; /* passed in as 1 or 2, used as 0 or 1 */ + ASSERT(port >= 0 && port < HERMON_MAX_PORTS); + fcoib = &state->hs_fcoib; + qpnum = fexch + fcoib->hfc_fexch_base[port]; + return (qpnum); +} + +/* + * hermon_fcoib_qpn_to_mkey + * Context: user or kernel context + */ +uint32_t +hermon_fcoib_qpn_to_mkey(hermon_state_t *state, uint_t qpnum) +{ + int i; + hermon_fcoib_t *fcoib; + uint32_t qp_indx; + + fcoib = &state->hs_fcoib; + for (i = 0; i < fcoib->hfc_nports; i++) { + qp_indx = qpnum - fcoib->hfc_fexch_base[i]; + if (qp_indx < fcoib->hfc_fexch_qps_per_port) + return ((qp_indx + fcoib->hfc_mpt_base[i]) << 8); + } + return ((uint32_t)-1); /* cannot get here with valid qpnum argument */ +} + +/* + * hermon_fcoib_fexch_relative_qpn() + * Context: user or kernel context + */ +uint32_t +hermon_fcoib_fexch_relative_qpn(hermon_state_t *state, uint8_t port, + uint32_t qp_indx) +{ + port--; + ASSERT(port < HERMON_MAX_PORTS); + qp_indx -= state->hs_fcoib.hfc_fexch_base[port]; + return (qp_indx); +} + +/* + * hermon_fcoib_fexch_mkey_init() + * Context: user or kernel context + */ +int +hermon_fcoib_fexch_mkey_init(hermon_state_t *state, hermon_pdhdl_t pd, + uint8_t port, uint32_t qp_indx, uint_t sleep) +{ + int status; + uint32_t mpt_indx; + uint_t nummtt; + uint64_t mtt_addr; + hermon_fcoib_t *fcoib; + + port--; + ASSERT(port < HERMON_MAX_PORTS); + fcoib = &state->hs_fcoib; + qp_indx -= fcoib->hfc_fexch_base[port]; /* relative to FEXCH base */ + if (qp_indx > fcoib->hfc_fexch_qps_per_port) + return (IBT_INVALID_PARAM); + mpt_indx = qp_indx + fcoib->hfc_mpt_base[port]; + nummtt = fcoib->hfc_mtts_per_mpt; + mtt_addr = ((uint64_t)qp_indx * nummtt + fcoib->hfc_mtt_base[port]) << + HERMON_MTT_SIZE_SHIFT; + + status = hermon_mr_fexch_mpt_init(state, pd, mpt_indx, + nummtt, mtt_addr, sleep); + return (status); +} + +/* + * hermon_fcoib_fexch_mkey_fini() + * Context: user or kernel context + */ +int +hermon_fcoib_fexch_mkey_fini(hermon_state_t *state, hermon_pdhdl_t pd, + uint32_t qpnum, uint_t sleep) +{ + int status; + uint8_t port; + uint32_t qp_indx; + uint32_t mpt_indx; + hermon_fcoib_t *fcoib; + + fcoib = &state->hs_fcoib; + for (port = 0; port < fcoib->hfc_nports; port++) { + qp_indx = qpnum - fcoib->hfc_fexch_base[port]; + if (qp_indx < fcoib->hfc_fexch_qps_per_port) + goto found; + } + return (IBT_INVALID_PARAM); +found: + /* qp_indx relative to FEXCH base */ + mpt_indx = qp_indx + fcoib->hfc_mpt_base[port]; + + status = hermon_mr_fexch_mpt_fini(state, pd, mpt_indx, sleep); + return (status); +} + +/* + * hermon_fcoib_query_fc() + * Context: user or kernel context + */ +void +hermon_fcoib_query_fc(hermon_state_t *state, hermon_fcoib_t *fcoib) +{ + int status; + struct hermon_hw_query_fc_s query_fc; + + status = hermon_cmn_query_cmd_post(state, QUERY_FC, 0, 0, &query_fc, + sizeof (query_fc), HERMON_CMD_NOSLEEP_SPIN); + if (status == HERMON_CMD_SUCCESS) { + fcoib->hfc_log2_max_port_ids_queried = query_fc.log2_max_nports; + fcoib->hfc_log2_max_fexch_queried = query_fc.log2_max_fexch; + fcoib->hfc_log2_max_rfci_queried = query_fc.log2_max_rfci; + } else + cmn_err(CE_CONT, "!query_fc status 0x%x\n", status); +} + +/* + * hermon_fcoib_init() + * Context: Only called from attach() path context + */ +int +hermon_fcoib_init(hermon_state_t *state) +{ + hermon_fcoib_t *fcoib; + uint_t numports; + char string[128]; + int i; + uintptr_t vmemstart = (uintptr_t)0x10000000; + + /* used for fast checking for FCoIB during cqe_consume */ + state->hs_fcoib_may_be_running = B_FALSE; + + if ((state->hs_ibtfinfo.hca_attr->hca_flags2 & IBT_HCA2_FC) == 0) + return (DDI_SUCCESS); + + fcoib = &state->hs_fcoib; + bzero(fcoib, sizeof (*fcoib)); + + hermon_fcoib_query_fc(state, fcoib); + + mutex_init(&fcoib->hfc_lock, NULL, MUTEX_DRIVER, NULL); + mutex_enter(&fcoib->hfc_lock); + + /* use a ROUND value that works on both 32 and 64-bit kernels */ + fcoib->hfc_vmemstart = vmemstart; + + fcoib->hfc_nports = numports = state->hs_cfg_profile->cp_num_ports; + fcoib->hfc_fexch_qps_per_port = + 1 << state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp; + fcoib->hfc_mpts_per_port = fcoib->hfc_fexch_qps_per_port * 2; + fcoib->hfc_mtts_per_mpt = + (1 << state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_mem) >> + PAGESHIFT; + fcoib->hfc_rfci_qps_per_port = + 1 << state->hs_ibtfinfo.hca_attr->hca_rfci_max_log2_qp; + + if (hermon_rsrc_reserve(state, HERMON_DMPT, numports * + fcoib->hfc_mpts_per_port, HERMON_SLEEP, + &fcoib->hfc_mpt_rsrc) != DDI_SUCCESS) { + mutex_exit(&fcoib->hfc_lock); + hermon_fcoib_fini(state); + return (DDI_FAILURE); + } + + /* + * Only reserve MTTs for the Primary MPTs (first half of the + * range for each port). + */ + if (hermon_rsrc_reserve(state, HERMON_MTT, numports * + fcoib->hfc_mpts_per_port * fcoib->hfc_mtts_per_mpt / 2, + HERMON_SLEEP, &fcoib->hfc_mtt_rsrc) != DDI_SUCCESS) { + mutex_exit(&fcoib->hfc_lock); + hermon_fcoib_fini(state); + return (DDI_FAILURE); + } + if (hermon_rsrc_reserve(state, HERMON_QPC, numports * + fcoib->hfc_fexch_qps_per_port, HERMON_SLEEP, + &fcoib->hfc_fexch_rsrc) != DDI_SUCCESS) { + mutex_exit(&fcoib->hfc_lock); + hermon_fcoib_fini(state); + return (DDI_FAILURE); + } + if (hermon_rsrc_reserve(state, HERMON_QPC, numports * + fcoib->hfc_rfci_qps_per_port, HERMON_SLEEP, + &fcoib->hfc_rfci_rsrc) != DDI_SUCCESS) { + mutex_exit(&fcoib->hfc_lock); + hermon_fcoib_fini(state); + return (DDI_FAILURE); + } + + for (i = 0; i < numports; i++) { + fcoib->hfc_port_enabled[i] = 0; + fcoib->hfc_n_port_ids[i] = kmem_zalloc(sizeof (uint32_t) * + fcoib->hfc_rfci_qps_per_port, KM_SLEEP); + + fcoib->hfc_mpt_base[i] = i * fcoib->hfc_mpts_per_port + + fcoib->hfc_mpt_rsrc->hr_indx; + /* "/ 2" is for Secondary MKEYs never used on Client side */ + fcoib->hfc_mtt_base[i] = (i * fcoib->hfc_mpts_per_port * + fcoib->hfc_mtts_per_mpt / 2) + fcoib->hfc_mtt_rsrc->hr_indx; + fcoib->hfc_fexch_base[i] = i * fcoib->hfc_fexch_qps_per_port + + fcoib->hfc_fexch_rsrc->hr_indx; + fcoib->hfc_rfci_base[i] = i * fcoib->hfc_rfci_qps_per_port + + fcoib->hfc_rfci_rsrc->hr_indx; + + /* init FEXCH QP rsrc pool */ + (void) sprintf(string, "hermon%d_port%d_fexch_vmem", + state->hs_instance, i + 1); + fcoib->hfc_fexch_vmemp[i] = vmem_create(string, + (void *)vmemstart, fcoib->hfc_fexch_qps_per_port, + 1, NULL, NULL, NULL, 0, VM_SLEEP); + + /* init RFCI QP rsrc pool */ + (void) sprintf(string, "hermon%d_port%d_rfci_vmem", + state->hs_instance, i + 1); + fcoib->hfc_rfci_vmemp[i] = vmem_create(string, + (void *)vmemstart, fcoib->hfc_rfci_qps_per_port, + 1, NULL, NULL, NULL, 0, VM_SLEEP); + } + + mutex_exit(&fcoib->hfc_lock); + + return (DDI_SUCCESS); +} + + +/* + * hermon_fcoib_fini() + * Context: Only called from attach() and/or detach() path contexts + */ +void +hermon_fcoib_fini(hermon_state_t *state) +{ + hermon_fcoib_t *fcoib; + uint_t numports; + int i; + + if ((state->hs_ibtfinfo.hca_attr->hca_flags2 & IBT_HCA2_FC) == 0) + return; + + fcoib = &state->hs_fcoib; + + mutex_enter(&fcoib->hfc_lock); + + numports = fcoib->hfc_nports; + + for (i = 0; i < numports; i++) { + if (fcoib->hfc_rfci_vmemp[i]) + vmem_destroy(fcoib->hfc_rfci_vmemp[i]); + if (fcoib->hfc_fexch_vmemp[i]) + vmem_destroy(fcoib->hfc_fexch_vmemp[i]); + if (fcoib->hfc_n_port_ids[i]) + kmem_free(fcoib->hfc_n_port_ids[i], sizeof (uint32_t) * + fcoib->hfc_rfci_qps_per_port); + + /* XXX --- should we issue HERMON_HW_FC_CONF_BASIC disable? */ + fcoib->hfc_port_enabled[i] = 0; + } + if (fcoib->hfc_rfci_rsrc) + hermon_rsrc_free(state, &fcoib->hfc_rfci_rsrc); + if (fcoib->hfc_fexch_rsrc) + hermon_rsrc_free(state, &fcoib->hfc_fexch_rsrc); + if (fcoib->hfc_mpt_rsrc) + hermon_rsrc_free(state, &fcoib->hfc_mpt_rsrc); + if (fcoib->hfc_mtt_rsrc) + hermon_rsrc_free(state, &fcoib->hfc_mtt_rsrc); + + mutex_exit(&fcoib->hfc_lock); + mutex_destroy(&fcoib->hfc_lock); + + bzero(fcoib, sizeof (*fcoib)); +}
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_fm.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_fm.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -627,6 +626,8 @@ goto error; } + hermon_eq_reset_uar_baseaddr(state); + /* Drop the Hermon FM Attach Mode */ hermon_clr_state(state, HCA_ATTCH_FM);
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_misc.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_misc.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -46,11 +45,8 @@ #include <sys/ib/adapters/hermon/hermon.h> -extern uint32_t hermon_kernel_data_ro; extern int hermon_rdma_debug; - -/* used for helping uniquify fmr pool taskq name */ -static uint_t hermon_debug_fmrpool_cnt = 0x00000000; +int hermon_fmr_verbose = 0; static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found); @@ -68,11 +64,7 @@ hermon_hw_mcg_t *mcg_entry, uint_t indx); static int hermon_mgid_is_valid(ib_gid_t gid); static int hermon_mlid_is_valid(ib_lid_t lid); -static void hermon_fmr_processing(void *fmr_args); -static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool); -static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr); -static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr); -static int hermon_fmr_avl_compare(const void *q, const void *e); +static void hermon_fmr_cleanup(hermon_fmrhdl_t pool); #define HERMON_MAX_DBR_PAGES_PER_USER 64 @@ -88,19 +80,20 @@ uint_t cookiecnt; int status; hermon_umap_db_entry_t *umapdb; + ulong_t pagesize = PAGESIZE; pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP); pagep->upg_index = page; - pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t); + pagep->upg_nfree = pagesize / sizeof (hermon_dbr_t); /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */ - pagep->upg_free = kmem_zalloc(PAGESIZE / sizeof (hermon_dbr_t) / 8, + pagep->upg_free = kmem_zalloc(pagesize / sizeof (hermon_dbr_t) / 8, KM_SLEEP); - pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, + pagep->upg_kvaddr = ddi_umem_alloc(pagesize, DDI_UMEM_SLEEP, &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */ pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0, - PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); + pagesize, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); hermon_dma_attr_init(state, &dma_attr); #ifdef __sparc @@ -281,6 +274,7 @@ hermon_dbr_info_t *info; caddr_t dmaaddr; uint64_t dmalen; + ulong_t pagesize = PAGESIZE; info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP); @@ -290,7 +284,7 @@ * page aligned. Also use the configured value for IOMMU bypass */ hermon_dma_attr_init(state, &dma_attr); - dma_attr.dma_attr_align = PAGESIZE; + dma_attr.dma_attr_align = pagesize; dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */ #ifdef __sparc if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) @@ -305,7 +299,7 @@ return (DDI_FAILURE); } - status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE, + status = ddi_dma_mem_alloc(dma_hdl, pagesize, &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl); if (status != DDI_SUCCESS) { @@ -2559,7 +2553,6 @@ hermon_fmrhdl_t fmrpool; hermon_fmr_list_t *fmr, *fmr_next; hermon_mrhdl_t mr; - char taskqname[48]; int status; int sleep; int i; @@ -2580,35 +2573,33 @@ mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(state->hs_intrmsi_pri)); + mutex_init(&fmrpool->remap_lock, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(state->hs_intrmsi_pri)); + mutex_init(&fmrpool->dirty_lock, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(state->hs_intrmsi_pri)); fmrpool->fmr_state = state; fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; fmrpool->fmr_pool_size = 0; - fmrpool->fmr_cache = 0; fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; - fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark; + fmrpool->fmr_dirty_watermark = fmr_attr->fmr_pool_size / 4; fmrpool->fmr_dirty_len = 0; + fmrpool->fmr_remap_watermark = fmr_attr->fmr_pool_size / 32; + fmrpool->fmr_remap_len = 0; fmrpool->fmr_flags = fmr_attr->fmr_flags; + fmrpool->fmr_stat_register = 0; + fmrpool->fmr_max_remaps = state->hs_cfg_profile->cp_fmr_max_remaps; + fmrpool->fmr_remap_gen = 1; - /* Create taskq to handle cleanup and flush processing */ - (void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64, - fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt, - (uint64_t)(uintptr_t)fmrpool); - fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname, - HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); - if (fmrpool->fmr_taskq == NULL) { - status = IBT_INSUFF_RESOURCE; - goto fail1; - } - - fmrpool->fmr_free_list = NULL; + fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list; fmrpool->fmr_dirty_list = NULL; - - if (fmr_attr->fmr_cache) { - hermon_fmr_cache_init(fmrpool); - } + fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list; + fmrpool->fmr_remap_list = NULL; + fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list; + fmrpool->fmr_pool_size = fmrpool->fmr_free_len = + fmr_attr->fmr_pool_size; for (i = 0; i < fmr_attr->fmr_pool_size; i++) { status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr); @@ -2621,34 +2612,34 @@ _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) fmr->fmr = mr; - fmr->fmr_refcnt = 0; fmr->fmr_remaps = 0; + fmr->fmr_remap_gen = fmrpool->fmr_remap_gen; fmr->fmr_pool = fmrpool; - fmr->fmr_in_cache = 0; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) mr->mr_fmr = fmr; + if (!i) /* address of last entry's link */ + fmrpool->fmr_free_list_tail = &fmr->fmr_next; fmr->fmr_next = fmrpool->fmr_free_list; fmrpool->fmr_free_list = fmr; - fmrpool->fmr_pool_size++; } /* Set to return pool */ *fmrpoolp = fmrpool; + IBTF_DPRINTF_L2("fmr", "create_fmr_pool SUCCESS"); return (IBT_SUCCESS); fail2: - hermon_fmr_cache_fini(fmrpool); for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) fmr_next = fmr->fmr_next; (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); kmem_free(fmr, sizeof (hermon_fmr_list_t)); } - ddi_taskq_destroy(fmrpool->fmr_taskq); -fail1: kmem_free(fmrpool, sizeof (*fmrpool)); fail: + *fmrpoolp = NULL; + IBTF_DPRINTF_L2("fmr", "create_fmr_pool FAILED"); if (status == DDI_FAILURE) { return (ibc_get_ci_failure(0)); } else { @@ -2665,31 +2656,27 @@ hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) { hermon_fmr_list_t *fmr, *fmr_next; - int status; mutex_enter(&fmrpool->fmr_lock); - status = hermon_fmr_cleanup(state, fmrpool); - if (status != DDI_SUCCESS) { - mutex_exit(&fmrpool->fmr_lock); - return (status); - } - - if (fmrpool->fmr_cache) { - hermon_fmr_cache_fini(fmrpool); - } + hermon_fmr_cleanup(fmrpool); for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { fmr_next = fmr->fmr_next; (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); kmem_free(fmr, sizeof (hermon_fmr_list_t)); + + --fmrpool->fmr_pool_size; } + ASSERT(fmrpool->fmr_pool_size == 0); mutex_exit(&fmrpool->fmr_lock); - ddi_taskq_destroy(fmrpool->fmr_taskq); mutex_destroy(&fmrpool->fmr_lock); + mutex_destroy(&fmrpool->dirty_lock); + mutex_destroy(&fmrpool->remap_lock); kmem_free(fmrpool, sizeof (*fmrpool)); + IBTF_DPRINTF_L2("fmr", "destroy_fmr_pool SUCCESS"); return (DDI_SUCCESS); } @@ -2698,24 +2685,23 @@ * Ensure that all unmapped FMRs are fully invalidated. * Context: Can be called from kernel context only. */ +/* ARGSUSED */ int hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) { - int status; - /* * Force the unmapping of all entries on the dirty list, regardless of * whether the watermark has been hit yet. */ /* grab the pool lock */ mutex_enter(&fmrpool->fmr_lock); - status = hermon_fmr_cleanup(state, fmrpool); + hermon_fmr_cleanup(fmrpool); mutex_exit(&fmrpool->fmr_lock); - return (status); + return (DDI_SUCCESS); } /* - * hermon_deregister_fmr() + * hermon_register_physical_fmr() * Map memory into FMR * Context: Can be called from interrupt or base context. */ @@ -2725,71 +2711,59 @@ ibt_pmr_desc_t *mem_desc_p) { hermon_fmr_list_t *fmr; - hermon_fmr_list_t query; - avl_index_t where; int status; /* Check length */ - mutex_enter(&fmrpool->fmr_lock); if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > fmrpool->fmr_max_pages)) { - mutex_exit(&fmrpool->fmr_lock); return (IBT_MR_LEN_INVALID); } - mutex_enter(&fmrpool->fmr_cachelock); - /* lookup in fmr cache */ - /* if exists, grab it, and return it */ - if (fmrpool->fmr_cache) { - query.fmr_desc.pmd_iova = mem_pattr->pmr_iova; - query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len; - fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl, - &query, &where); + mutex_enter(&fmrpool->fmr_lock); + if (fmrpool->fmr_free_list == NULL) { + if (hermon_fmr_verbose & 2) + IBTF_DPRINTF_L2("fmr", "register needs remap"); + mutex_enter(&fmrpool->remap_lock); + if (fmrpool->fmr_remap_list) { + /* add to free list */ + *(fmrpool->fmr_free_list_tail) = + fmrpool->fmr_remap_list; + fmrpool->fmr_remap_list = NULL; + fmrpool->fmr_free_list_tail = + fmrpool->fmr_remap_list_tail; - /* - * If valid FMR was found in cache, return that fmr info - */ - if (fmr != NULL) { - fmr->fmr_refcnt++; - /* Store pmr desc for use in cache */ - (void) memcpy(mem_desc_p, &fmr->fmr_desc, - sizeof (ibt_pmr_desc_t)); - *mr = (hermon_mrhdl_t)fmr->fmr; - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS( - *(fmr->fmr->mr_mptrsrcp))) - if (hermon_rdma_debug & 0x4) - IBTF_DPRINTF_L2("fmr", " reg cache: mr %p " - "index %x", fmr->fmr, - fmr->fmr->mr_mptrsrcp->hr_indx); - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS( - *(fmr->fmr->mr_mptrsrcp))) - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) - mutex_exit(&fmrpool->fmr_cachelock); - mutex_exit(&fmrpool->fmr_lock); - return (DDI_SUCCESS); + /* reset list */ + fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list; + fmrpool->fmr_free_len += fmrpool->fmr_remap_len; + fmrpool->fmr_remap_len = 0; } + mutex_exit(&fmrpool->remap_lock); } - - /* FMR does not exist in cache, proceed with registration */ + if (fmrpool->fmr_free_list == NULL) { + if (hermon_fmr_verbose & 2) + IBTF_DPRINTF_L2("fmr", "register needs cleanup"); + hermon_fmr_cleanup(fmrpool); + } /* grab next free entry */ fmr = fmrpool->fmr_free_list; if (fmr == NULL) { IBTF_DPRINTF_L2("fmr", "WARNING: no free fmr resource"); - mutex_exit(&fmrpool->fmr_cachelock); + cmn_err(CE_CONT, "no free fmr resource\n"); mutex_exit(&fmrpool->fmr_lock); return (IBT_INSUFF_RESOURCE); } - fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next; + if ((fmrpool->fmr_free_list = fmr->fmr_next) == NULL) + fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list; fmr->fmr_next = NULL; + fmrpool->fmr_stat_register++; + mutex_exit(&fmrpool->fmr_lock); + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, mem_desc_p); if (status != DDI_SUCCESS) { - mutex_exit(&fmrpool->fmr_cachelock); - mutex_exit(&fmrpool->fmr_lock); return (status); } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr->fmr)) @@ -2797,24 +2771,15 @@ IBTF_DPRINTF_L2("fmr", " reg: mr %p key %x", fmr->fmr, fmr->fmr->mr_rkey); _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*fmr->fmr)) + if (fmr->fmr_remap_gen != fmrpool->fmr_remap_gen) { + fmr->fmr_remap_gen = fmrpool->fmr_remap_gen; + fmr->fmr_remaps = 0; + } - fmr->fmr_refcnt = 1; fmr->fmr_remaps++; - /* Store pmr desc for use in cache */ - (void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t)); *mr = (hermon_mrhdl_t)fmr->fmr; - /* Store in cache */ - if (fmrpool->fmr_cache) { - if (!fmr->fmr_in_cache) { - avl_insert(&fmrpool->fmr_cache_avl, fmr, where); - fmr->fmr_in_cache = 1; - } - } - - mutex_exit(&fmrpool->fmr_cachelock); - mutex_exit(&fmrpool->fmr_lock); return (DDI_SUCCESS); } @@ -2826,218 +2791,136 @@ int hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) { - hermon_fmr_list_t *fmr; hermon_fmrhdl_t fmrpool; - int status; + hermon_fmr_list_t *fmr, **fmrlast; + int len; fmr = mr->mr_fmr; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) fmrpool = fmr->fmr_pool; - /* Grab pool lock */ - mutex_enter(&fmrpool->fmr_lock); - fmr->fmr_refcnt--; - - if (fmr->fmr_refcnt == 0) { - /* - * First, do some bit of invalidation, reducing our exposure to - * having this region still registered in hardware. - */ - (void) hermon_mr_invalidate_fmr(state, mr); + /* mark as owned by software */ + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) + *(uint8_t *)(fmr->fmr->mr_mptrsrcp->hr_addr) = 0xF0; - /* - * If we've exhausted our remaps then add the FMR to the dirty - * list, not allowing it to be re-used until we have done a - * flush. Otherwise, simply add it back to the free list for - * re-mapping. - */ - if (fmr->fmr_remaps < - state->hs_cfg_profile->cp_fmr_max_remaps) { - /* add to free list */ - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) - if (hermon_rdma_debug & 0x4) - IBTF_DPRINTF_L2("fmr", "dereg: mr %p key %x", - fmr->fmr, fmr->fmr->mr_rkey); - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) - fmr->fmr_next = fmrpool->fmr_free_list; - fmrpool->fmr_free_list = fmr; - } else { - /* add to dirty list */ - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) - if (hermon_rdma_debug & 0x4) - IBTF_DPRINTF_L2("fmr", "dirty: mr %p key %x", - fmr->fmr, fmr->fmr->mr_rkey); - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) - fmr->fmr_next = fmrpool->fmr_dirty_list; - fmrpool->fmr_dirty_list = fmr; - fmrpool->fmr_dirty_len++; + if (fmr->fmr_remaps < + state->hs_cfg_profile->cp_fmr_max_remaps) { + /* add to remap list */ + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) + if (hermon_rdma_debug & 0x4) + IBTF_DPRINTF_L2("fmr", "dereg: mr %p key %x", + fmr->fmr, fmr->fmr->mr_rkey); + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) + mutex_enter(&fmrpool->remap_lock); + fmr->fmr_next = NULL; + *(fmrpool->fmr_remap_list_tail) = fmr; + fmrpool->fmr_remap_list_tail = &fmr->fmr_next; + fmrpool->fmr_remap_len++; - status = ddi_taskq_dispatch(fmrpool->fmr_taskq, - hermon_fmr_processing, fmrpool, DDI_NOSLEEP); - if (status == DDI_FAILURE) { - mutex_exit(&fmrpool->fmr_lock); - return (IBT_INSUFF_RESOURCE); - } + /* conditionally add remap list back to free list */ + fmrlast = NULL; + if (fmrpool->fmr_remap_len >= + fmrpool->fmr_remap_watermark) { + fmr = fmrpool->fmr_remap_list; + fmrlast = fmrpool->fmr_remap_list_tail; + len = fmrpool->fmr_remap_len; + fmrpool->fmr_remap_len = 0; + fmrpool->fmr_remap_list = NULL; + fmrpool->fmr_remap_list_tail = + &fmrpool->fmr_remap_list; } - } - /* Release pool lock */ - mutex_exit(&fmrpool->fmr_lock); - - return (DDI_SUCCESS); -} - + mutex_exit(&fmrpool->remap_lock); + if (fmrlast) { + mutex_enter(&fmrpool->fmr_lock); + *(fmrpool->fmr_free_list_tail) = fmr; + fmrpool->fmr_free_list_tail = fmrlast; + fmrpool->fmr_free_len += len; + mutex_exit(&fmrpool->fmr_lock); + } + } else { + /* add to dirty list */ + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) + if (hermon_rdma_debug & 0x4) + IBTF_DPRINTF_L2("fmr", "dirty: mr %p key %x", + fmr->fmr, fmr->fmr->mr_rkey); + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) -/* - * hermon_fmr_processing() - * If required, perform cleanup. - * Context: Called from taskq context only. - */ -static void -hermon_fmr_processing(void *fmr_args) -{ - hermon_fmrhdl_t fmrpool; - int status; - - ASSERT(fmr_args != NULL); - - fmrpool = (hermon_fmrhdl_t)fmr_args; + mutex_enter(&fmrpool->dirty_lock); + fmr->fmr_next = NULL; + *(fmrpool->fmr_dirty_list_tail) = fmr; + fmrpool->fmr_dirty_list_tail = &fmr->fmr_next; + fmrpool->fmr_dirty_len++; - /* grab pool lock */ - mutex_enter(&fmrpool->fmr_lock); - if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) { - status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool); - if (status != DDI_SUCCESS) { + if (fmrpool->fmr_dirty_len >= + fmrpool->fmr_dirty_watermark) { + mutex_exit(&fmrpool->dirty_lock); + mutex_enter(&fmrpool->fmr_lock); + hermon_fmr_cleanup(fmrpool); mutex_exit(&fmrpool->fmr_lock); - return; - } - - if (fmrpool->fmr_flush_function != NULL) { - (void) fmrpool->fmr_flush_function( - (ibc_fmr_pool_hdl_t)fmrpool, - fmrpool->fmr_flush_arg); - } + } else + mutex_exit(&fmrpool->dirty_lock); } - - /* let pool lock go */ - mutex_exit(&fmrpool->fmr_lock); + return (DDI_SUCCESS); } /* * hermon_fmr_cleanup() - * Perform cleaning processing, walking the list and performing the MTT sync - * operation if required. - * Context: can be called from taskq or base context. + * Context: Called from any context. */ -static int -hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool) +static void +hermon_fmr_cleanup(hermon_fmrhdl_t fmrpool) { - hermon_fmr_list_t *fmr; - hermon_fmr_list_t *fmr_next; - int sync_needed; int status; ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); - sync_needed = 0; - for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) { - fmr_next = fmr->fmr_next; - fmr->fmr_remaps = 0; + if (fmrpool->fmr_stat_register == 0) + return; - (void) hermon_mr_deregister_fmr(state, fmr->fmr); + fmrpool->fmr_stat_register = 0; + membar_producer(); - /* - * Update lists. - * - add fmr back to free list - * - remove fmr from dirty list - */ - fmr->fmr_next = fmrpool->fmr_free_list; - fmrpool->fmr_free_list = fmr; - + if (hermon_fmr_verbose) + IBTF_DPRINTF_L2("fmr", "TPT_SYNC"); + status = hermon_sync_tpt_cmd_post(fmrpool->fmr_state, + HERMON_CMD_NOSLEEP_SPIN); + if (status != HERMON_CMD_SUCCESS) { + cmn_err(CE_WARN, "fmr SYNC_TPT failed(%x)\n", status); + } + fmrpool->fmr_remap_gen++; - /* - * Because we have updated the dirty list, and deregistered the - * FMR entry, we do need to sync the TPT, so we set the - * 'sync_needed' flag here so we sync once we finish dirty_list - * processing. - */ - sync_needed = 1; - } - - fmrpool->fmr_dirty_list = NULL; - fmrpool->fmr_dirty_len = 0; + /* add everything back to the free list */ + mutex_enter(&fmrpool->dirty_lock); + if (fmrpool->fmr_dirty_list) { + /* add to free list */ + *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_dirty_list; + fmrpool->fmr_dirty_list = NULL; + fmrpool->fmr_free_list_tail = fmrpool->fmr_dirty_list_tail; - if (sync_needed) { - status = hermon_sync_tpt_cmd_post(state, - HERMON_CMD_NOSLEEP_SPIN); - if (status != HERMON_CMD_SUCCESS) { - return (status); - } + /* reset list */ + fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list; + fmrpool->fmr_free_len += fmrpool->fmr_dirty_len; + fmrpool->fmr_dirty_len = 0; } - - return (DDI_SUCCESS); -} + mutex_exit(&fmrpool->dirty_lock); -/* - * hermon_fmr_avl_compare() - * Context: Can be called from user or kernel context. - */ -static int -hermon_fmr_avl_compare(const void *q, const void *e) -{ - hermon_fmr_list_t *entry, *query; + mutex_enter(&fmrpool->remap_lock); + if (fmrpool->fmr_remap_list) { + /* add to free list */ + *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_remap_list; + fmrpool->fmr_remap_list = NULL; + fmrpool->fmr_free_list_tail = fmrpool->fmr_remap_list_tail; - entry = (hermon_fmr_list_t *)e; - query = (hermon_fmr_list_t *)q; + /* reset list */ + fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list; + fmrpool->fmr_free_len += fmrpool->fmr_remap_len; + fmrpool->fmr_remap_len = 0; + } + mutex_exit(&fmrpool->remap_lock); - if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) { - return (-1); - } else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) { - return (+1); - } else { - return (0); + if (fmrpool->fmr_flush_function != NULL) { + (void) fmrpool->fmr_flush_function( + (ibc_fmr_pool_hdl_t)fmrpool, + fmrpool->fmr_flush_arg); } } - - -/* - * hermon_fmr_cache_init() - * Context: Can be called from user or kernel context. - */ -static void -hermon_fmr_cache_init(hermon_fmrhdl_t fmr) -{ - /* Initialize the lock used for FMR cache AVL tree access */ - mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER, - DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri)); - - /* Initialize the AVL tree for the FMR cache */ - avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare, - sizeof (hermon_fmr_list_t), - offsetof(hermon_fmr_list_t, fmr_avlnode)); - - fmr->fmr_cache = 1; -} - - -/* - * hermon_fmr_cache_fini() - * Context: Can be called from user or kernel context. - */ -static void -hermon_fmr_cache_fini(hermon_fmrhdl_t fmr) -{ - void *cookie; - - /* - * Empty all entries (if necessary) and destroy the AVL tree. - * The FMRs themselves are freed as part of destroy_pool() - */ - cookie = NULL; - while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes( - &fmr->fmr_cache_avl, &cookie)) != NULL) { - /* loop through */ - } - avl_destroy(&fmr->fmr_cache_avl); - - /* Destroy the lock used for FMR cache */ - mutex_destroy(&fmr->fmr_cachelock); -}
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_mr.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_mr.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -547,6 +546,7 @@ */ status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt); if (status != DDI_SUCCESS) { + IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs"); status = IBT_INSUFF_RESOURCE; goto fmralloc_fail3; } @@ -874,27 +874,32 @@ } } - /* - * Decrement the MTT reference count. Since the MTT resource - * may be shared between multiple memory regions (as a result - * of a "RegisterSharedMR" verb) it is important that we not - * free up or unbind resources prematurely. If it's not shared (as - * indicated by the return status), then free the resource. - */ - shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt); - if (!shared_mtt) { - hermon_rsrc_free(state, &mtt_refcnt); - } - - /* - * Free up the MTT entries and unbind the memory. Here, as above, we - * attempt to free these resources only if it is appropriate to do so. - */ - if (!shared_mtt) { - if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) { - hermon_mr_mem_unbind(state, bind); + /* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */ + if (mtt_refcnt != NULL) { + /* + * Decrement the MTT reference count. Since the MTT resource + * may be shared between multiple memory regions (as a result + * of a "RegisterSharedMR" verb) it is important that we not + * free up or unbind resources prematurely. If it's not shared + * (as indicated by the return status), then free the resource. + */ + shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt); + if (!shared_mtt) { + hermon_rsrc_free(state, &mtt_refcnt); } - hermon_rsrc_free(state, &mtt); + + /* + * Free up the MTT entries and unbind the memory. Here, + * as above, we attempt to free these resources only if + * it is appropriate to do so. + * Note, 'bind' is NULL in the alloc_lkey case. + */ + if (!shared_mtt) { + if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) { + hermon_mr_mem_unbind(state, bind); + } + hermon_rsrc_free(state, &mtt); + } } /* @@ -972,65 +977,6 @@ return (DDI_SUCCESS); } -/* - * hermon_mr_invalidate_fmr() - * Context: Can be called from interrupt or base context. - */ -/* ARGSUSED */ -int -hermon_mr_invalidate_fmr(hermon_state_t *state, hermon_mrhdl_t mr) -{ - hermon_rsrc_t *mpt; - uint64_t *mpt_table; - - mutex_enter(&mr->mr_lock); - mpt = mr->mr_mptrsrcp; - mpt_table = (uint64_t *)mpt->hr_addr; - - /* Write MPT status to SW bit */ - *(uint8_t *)&mpt_table[0] = 0xF0; - - membar_producer(); - - /* invalidate mem key value */ - *(uint32_t *)&mpt_table[1] = 0; - - /* invalidate lkey value */ - *(uint32_t *)&mpt_table[4] = 0; - - membar_producer(); - - /* Write MPT status to HW bit */ - *(uint8_t *)&mpt_table[0] = 0x00; - - mutex_exit(&mr->mr_lock); - - return (DDI_SUCCESS); -} - -/* - * hermon_mr_deregister_fmr() - * Context: Can be called from interrupt or base context. - */ -/* ARGSUSED */ -int -hermon_mr_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) -{ - hermon_rsrc_t *mpt; - uint64_t *mpt_table; - - mutex_enter(&mr->mr_lock); - mpt = mr->mr_mptrsrcp; - mpt_table = (uint64_t *)mpt->hr_addr; - - /* Write MPT status to SW bit */ - *(uint8_t *)&mpt_table[0] = 0xF0; - - mutex_exit(&mr->mr_lock); - - return (DDI_SUCCESS); -} - /* * hermon_mr_query() @@ -1877,6 +1823,442 @@ } /* + * hermon_dma_mr_register() + * Context: Can be called from base context. + */ +int +hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd, + ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl) +{ + hermon_rsrc_t *mpt, *rsrc; + hermon_hw_dmpt_t mpt_entry; + hermon_mrhdl_t mr; + ibt_mr_flags_t flags; + uint_t sleep; + int status; + + /* Extract the flags field */ + flags = mr_attr->dmr_flags; + + /* + * Check the sleep flag. Ensure that it is consistent with the + * current thread context (i.e. if we are currently in the interrupt + * context, then we shouldn't be attempting to sleep). + */ + sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; + if ((sleep == HERMON_SLEEP) && + (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { + status = IBT_INVALID_PARAM; + goto mrcommon_fail; + } + + /* Increment the reference count on the protection domain (PD) */ + hermon_pd_refcnt_inc(pd); + + /* + * Allocate an MPT entry. This will be filled in with all the + * necessary parameters to define the memory region. And then + * ownership will be passed to the hardware in the final step + * below. If we fail here, we must undo the protection domain + * reference count. + */ + status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto mrcommon_fail1; + } + + /* + * Allocate the software structure for tracking the memory region (i.e. + * the Hermon Memory Region handle). If we fail here, we must undo + * the protection domain reference count and the previous resource + * allocation. + */ + status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto mrcommon_fail2; + } + mr = (hermon_mrhdl_t)rsrc->hr_addr; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) + bzero(mr, sizeof (*mr)); + + /* + * Setup and validate the memory region access flags. This means + * translating the IBTF's enable flags into the access flags that + * will be used in later operations. + */ + mr->mr_accflag = 0; + if (flags & IBT_MR_ENABLE_WINDOW_BIND) + mr->mr_accflag |= IBT_MR_WINDOW_BIND; + if (flags & IBT_MR_ENABLE_LOCAL_WRITE) + mr->mr_accflag |= IBT_MR_LOCAL_WRITE; + if (flags & IBT_MR_ENABLE_REMOTE_READ) + mr->mr_accflag |= IBT_MR_REMOTE_READ; + if (flags & IBT_MR_ENABLE_REMOTE_WRITE) + mr->mr_accflag |= IBT_MR_REMOTE_WRITE; + if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) + mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; + + /* + * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed + * from a certain number of "constrained" bits (the least significant + * bits) and some number of "unconstrained" bits. The constrained + * bits must be set to the index of the entry in the MPT table, but + * the unconstrained bits can be set to any value we wish. Note: + * if no remote access is required, then the RKey value is not filled + * in. Otherwise both Rkey and LKey are given the same value. + */ + if (mpt) + mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); + + /* + * Fill in the MPT entry. This is the final step before passing + * ownership of the MPT entry to the Hermon hardware. We use all of + * the information collected/calculated above to fill in the + * requisite portions of the MPT. Do this ONLY for DMPTs. + */ + bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); + + mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; + mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; + mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; + mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; + mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; + mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; + mpt_entry.lr = 1; + mpt_entry.phys_addr = 1; /* critical bit for this */ + mpt_entry.reg_win = HERMON_MPT_IS_REGION; + + mpt_entry.entity_sz = mr->mr_logmttpgsz; + mpt_entry.mem_key = mr->mr_lkey; + mpt_entry.pd = pd->pd_pdnum; + mpt_entry.rem_acc_en = 0; + mpt_entry.fast_reg_en = 0; + mpt_entry.en_inval = 0; + mpt_entry.lkey = 0; + mpt_entry.win_cnt = 0; + + mpt_entry.start_addr = mr_attr->dmr_paddr; + mpt_entry.reg_win_len = mr_attr->dmr_len; + if (mr_attr->dmr_len == 0) + mpt_entry.len_b64 = 1; /* needed for 2^^64 length */ + + mpt_entry.mtt_addr_h = 0; + mpt_entry.mtt_addr_l = 0; + + /* + * Write the MPT entry to hardware. Lastly, we pass ownership of + * the entry to the hardware if needed. Note: in general, this + * operation shouldn't fail. But if it does, we have to undo + * everything we've done above before returning error. + * + * For Hermon, this routine (which is common to the contexts) will only + * set the ownership if needed - the process of passing the context + * itself to HW will take care of setting up the MPT (based on type + * and index). + */ + + mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */ + status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, + sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); + if (status != HERMON_CMD_SUCCESS) { + cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", + status); + if (status == HERMON_CMD_INVALID_STATUS) { + hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); + } + status = ibc_get_ci_failure(0); + goto mrcommon_fail7; + } + + /* + * Fill in the rest of the Hermon Memory Region handle. Having + * successfully transferred ownership of the MPT, we can update the + * following fields for use in further operations on the MR. + */ + mr->mr_mttaddr = 0; + + mr->mr_log2_pgsz = 0; + mr->mr_mptrsrcp = mpt; + mr->mr_mttrsrcp = NULL; + mr->mr_pdhdl = pd; + mr->mr_rsrcp = rsrc; + mr->mr_is_umem = 0; + mr->mr_is_fmr = 0; + mr->mr_umemcookie = NULL; + mr->mr_umem_cbfunc = NULL; + mr->mr_umem_cbarg1 = NULL; + mr->mr_umem_cbarg2 = NULL; + mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); + mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); + mr->mr_mpt_type = HERMON_MPT_DMPT; + + *mrhdl = mr; + + return (DDI_SUCCESS); + +/* + * The following is cleanup for all possible failure cases in this routine + */ +mrcommon_fail7: + hermon_rsrc_free(state, &rsrc); +mrcommon_fail2: + hermon_rsrc_free(state, &mpt); +mrcommon_fail1: + hermon_pd_refcnt_dec(pd); +mrcommon_fail: + return (status); +} + +/* + * hermon_mr_alloc_lkey() + * Context: Can be called from base context. + */ +int +hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd, + ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl) +{ + hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; + hermon_sw_refcnt_t *swrc_tmp; + hermon_hw_dmpt_t mpt_entry; + hermon_mrhdl_t mr; + uint64_t mtt_addr; + uint_t sleep; + int status; + + /* Increment the reference count on the protection domain (PD) */ + hermon_pd_refcnt_inc(pd); + + sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; + + /* + * Allocate an MPT entry. This will be filled in with "some" of the + * necessary parameters to define the memory region. And then + * ownership will be passed to the hardware in the final step + * below. If we fail here, we must undo the protection domain + * reference count. + * + * The MTTs will get filled in when the FRWR is processed. + */ + status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto alloclkey_fail1; + } + + /* + * Allocate the software structure for tracking the memory region (i.e. + * the Hermon Memory Region handle). If we fail here, we must undo + * the protection domain reference count and the previous resource + * allocation. + */ + status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto alloclkey_fail2; + } + mr = (hermon_mrhdl_t)rsrc->hr_addr; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) + bzero(mr, sizeof (*mr)); + mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY; + + mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); + + status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto alloclkey_fail3; + } + mr->mr_logmttpgsz = PAGESHIFT; + + /* + * Allocate MTT reference count (to track shared memory regions). + * This reference count resource may never be used on the given + * memory region, but if it is ever later registered as "shared" + * memory region then this resource will be necessary. If we fail + * here, we do pretty much the same as above to clean up. + */ + status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep, + &mtt_refcnt); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto alloclkey_fail4; + } + mr->mr_mttrefcntp = mtt_refcnt; + swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) + HERMON_MTT_REFCNT_INIT(swrc_tmp); + + mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); + + bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); + mpt_entry.status = HERMON_MPT_FREE; + mpt_entry.lw = 1; + mpt_entry.lr = 1; + mpt_entry.reg_win = HERMON_MPT_IS_REGION; + mpt_entry.entity_sz = mr->mr_logmttpgsz; + mpt_entry.mem_key = mr->mr_lkey; + mpt_entry.pd = pd->pd_pdnum; + mpt_entry.fast_reg_en = 1; + mpt_entry.rem_acc_en = 1; + mpt_entry.en_inval = 1; + if (flags & IBT_KEY_REMOTE) { + mpt_entry.ren_inval = 1; + } + mpt_entry.mtt_size = nummtt; + mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ + mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ + + /* + * Write the MPT entry to hardware. Lastly, we pass ownership of + * the entry to the hardware if needed. Note: in general, this + * operation shouldn't fail. But if it does, we have to undo + * everything we've done above before returning error. + * + * For Hermon, this routine (which is common to the contexts) will only + * set the ownership if needed - the process of passing the context + * itself to HW will take care of setting up the MPT (based on type + * and index). + */ + status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, + sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); + if (status != HERMON_CMD_SUCCESS) { + cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command " + "failed: %08x\n", status); + if (status == HERMON_CMD_INVALID_STATUS) { + hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); + } + status = ibc_get_ci_failure(0); + goto alloclkey_fail5; + } + + /* + * Fill in the rest of the Hermon Memory Region handle. Having + * successfully transferred ownership of the MPT, we can update the + * following fields for use in further operations on the MR. + */ + mr->mr_accflag = IBT_MR_LOCAL_WRITE; + mr->mr_mttaddr = mtt_addr; + mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT); + mr->mr_mptrsrcp = mpt; + mr->mr_mttrsrcp = mtt; + mr->mr_pdhdl = pd; + mr->mr_rsrcp = rsrc; + mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); + mr->mr_rkey = mr->mr_lkey; + mr->mr_mpt_type = HERMON_MPT_DMPT; + + *mrhdl = mr; + return (DDI_SUCCESS); + +alloclkey_fail5: + hermon_rsrc_free(state, &mtt_refcnt); +alloclkey_fail4: + hermon_rsrc_free(state, &mtt); +alloclkey_fail3: + hermon_rsrc_free(state, &rsrc); +alloclkey_fail2: + hermon_rsrc_free(state, &mpt); +alloclkey_fail1: + hermon_pd_refcnt_dec(pd); + return (status); +} + +/* + * hermon_mr_fexch_mpt_init() + * Context: Can be called from base context. + * + * This is the same as alloc_lkey, but not returning an mrhdl. + */ +int +hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd, + uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep) +{ + hermon_hw_dmpt_t mpt_entry; + int status; + + /* + * The MTTs will get filled in when the FRWR is processed. + */ + + bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); + mpt_entry.status = HERMON_MPT_FREE; + mpt_entry.lw = 1; + mpt_entry.lr = 1; + mpt_entry.rw = 1; + mpt_entry.rr = 1; + mpt_entry.reg_win = HERMON_MPT_IS_REGION; + mpt_entry.entity_sz = PAGESHIFT; + mpt_entry.mem_key = mpt_indx; + mpt_entry.pd = pd->pd_pdnum; + mpt_entry.fast_reg_en = 1; + mpt_entry.rem_acc_en = 1; + mpt_entry.en_inval = 1; + mpt_entry.ren_inval = 1; + mpt_entry.mtt_size = nummtt; + mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ + mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ + + /* + * Write the MPT entry to hardware. Lastly, we pass ownership of + * the entry to the hardware if needed. Note: in general, this + * operation shouldn't fail. But if it does, we have to undo + * everything we've done above before returning error. + * + * For Hermon, this routine (which is common to the contexts) will only + * set the ownership if needed - the process of passing the context + * itself to HW will take care of setting up the MPT (based on type + * and index). + */ + status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, + sizeof (hermon_hw_dmpt_t), mpt_indx, sleep); + if (status != HERMON_CMD_SUCCESS) { + cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command " + "failed: %08x\n", status); + if (status == HERMON_CMD_INVALID_STATUS) { + hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); + } + status = ibc_get_ci_failure(0); + return (status); + } + /* Increment the reference count on the protection domain (PD) */ + hermon_pd_refcnt_inc(pd); + + return (DDI_SUCCESS); +} + +/* + * hermon_mr_fexch_mpt_fini() + * Context: Can be called from base context. + * + * This is the same as deregister_mr, without an mrhdl. + */ +int +hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd, + uint32_t mpt_indx, uint_t sleep) +{ + int status; + + status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, + NULL, 0, mpt_indx, sleep); + if (status != DDI_SUCCESS) { + cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command " + "failed: %08x\n", status); + if (status == HERMON_CMD_INVALID_STATUS) { + hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); + } + status = ibc_get_ci_failure(0); + return (status); + } + + /* Decrement the reference count on the protection domain (PD) */ + hermon_pd_refcnt_dec(pd); + + return (DDI_SUCCESS); +} + +/* * hermon_mr_mtt_bind() * Context: Can be called from interrupt or base context. */ @@ -2810,6 +3192,11 @@ { int status; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) + /* there is nothing to unbind for alloc_lkey */ + if (bind->bi_type == HERMON_BINDHDL_LKEY) + return; + /* * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to * is actually allocated by ddi_umem_iosetup() internally, then
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_qp.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_qp.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -63,9 +62,11 @@ uint_t sleepflag) { hermon_rsrc_t *qpc, *rsrc; + hermon_rsrc_type_t rsrc_type; hermon_umap_db_entry_t *umapdb; hermon_qphdl_t qp; ibt_qp_alloc_attr_t *attr_p; + ibt_qp_alloc_flags_t alloc_flags; ibt_qp_type_t type; hermon_qp_wq_type_t swq_type; ibtl_qp_hdl_t ibt_qphdl; @@ -101,6 +102,49 @@ queuesz_p = qpinfo->qpi_queueszp; qpn = qpinfo->qpi_qpn; qphdl = &qpinfo->qpi_qphdl; + alloc_flags = attr_p->qp_alloc_flags; + + /* + * Verify correctness of alloc_flags. + * + * 1. FEXCH and RSS are only allocated via qp_range. + */ + if (alloc_flags & (IBT_QP_USES_FEXCH | IBT_QP_USES_RSS)) { + return (IBT_INVALID_PARAM); + } + rsrc_type = HERMON_QPC; + qp_is_umap = 0; + + /* 2. Make sure only one of these flags is set. */ + switch (alloc_flags & + (IBT_QP_USER_MAP | IBT_QP_USES_RFCI | IBT_QP_USES_FCMD)) { + case IBT_QP_USER_MAP: + qp_is_umap = 1; + break; + case IBT_QP_USES_RFCI: + if (type != IBT_UD_RQP) + return (IBT_INVALID_PARAM); + + switch (attr_p->qp_fc.fc_hca_port) { + case 1: + rsrc_type = HERMON_QPC_RFCI_PORT1; + break; + case 2: + rsrc_type = HERMON_QPC_RFCI_PORT2; + break; + default: + return (IBT_INVALID_PARAM); + } + break; + case IBT_QP_USES_FCMD: + if (type != IBT_UD_RQP) + return (IBT_INVALID_PARAM); + break; + case 0: + break; + default: + return (IBT_INVALID_PARAM); /* conflicting flags set */ + } /* * Determine whether QP is being allocated for userland access or @@ -110,15 +154,11 @@ * (e.g. if the process has not previously open()'d the Hermon driver), * then an error is returned. */ - - - qp_is_umap = (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) ? 1 : 0; if (qp_is_umap) { status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(), MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); if (status != DDI_SUCCESS) { - status = IBT_INVALID_PARAM; - goto qpalloc_fail; + return (IBT_INVALID_PARAM); } uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx; } else { @@ -128,7 +168,7 @@ /* * Determine whether QP is being associated with an SRQ */ - qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0; + qp_srq_en = (alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0; if (qp_srq_en) { /* * Check for valid SRQ handle pointers @@ -172,14 +212,22 @@ /* * Check for valid CQ handle pointers + * + * FCMD QPs do not require a receive cq handle. */ - if ((attr_p->qp_ibc_scq_hdl == NULL) || - (attr_p->qp_ibc_rcq_hdl == NULL)) { + if (attr_p->qp_ibc_scq_hdl == NULL) { status = IBT_CQ_HDL_INVALID; goto qpalloc_fail1; } sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl; - rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl; + if ((attr_p->qp_ibc_rcq_hdl == NULL)) { + if ((alloc_flags & IBT_QP_USES_FCMD) == 0) { + status = IBT_CQ_HDL_INVALID; + goto qpalloc_fail1; + } + rq_cq = sq_cq; /* just use the send cq */ + } else + rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl; /* * Increment the reference count on the CQs. One or both of these @@ -206,7 +254,7 @@ * passing the QP to hardware. If we fail here, we must undo all * the reference count (CQ and PD). */ - status = hermon_rsrc_alloc(state, HERMON_QPC, 1, sleepflag, &qpc); + status = hermon_rsrc_alloc(state, rsrc_type, 1, sleepflag, &qpc); if (status != DDI_SUCCESS) { status = IBT_INSUFF_RESOURCE; goto qpalloc_fail3; @@ -226,15 +274,23 @@ bzero(qp, sizeof (struct hermon_sw_qp_s)); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) + qp->qp_alloc_flags = alloc_flags; + /* * Calculate the QP number from QPC index. This routine handles * all of the operations necessary to keep track of used, unused, * and released QP numbers. */ - status = hermon_qp_create_qpn(state, qp, qpc); - if (status != DDI_SUCCESS) { - status = IBT_INSUFF_RESOURCE; - goto qpalloc_fail5; + if (type == IBT_UD_RQP) { + qp->qp_qpnum = qpc->hr_indx; + qp->qp_ring = qp->qp_qpnum << 8; + qp->qp_qpn_hdl = NULL; + } else { + status = hermon_qp_create_qpn(state, qp, qpc); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto qpalloc_fail5; + } } /* @@ -548,12 +604,12 @@ qp->qp_qpcrsrcp = qpc; qp->qp_rsrcp = rsrc; qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); qp->qp_pdhdl = pd; qp->qp_mrhdl = mr; qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED; qp->qp_is_special = 0; - qp->qp_is_umap = qp_is_umap; qp->qp_uarpg = uarpg; qp->qp_umap_dhp = (devmap_cookie_t)NULL; qp->qp_sq_cqhdl = sq_cq; @@ -585,18 +641,22 @@ */ if (qp_srq_en) { qp->qp_srqhdl = srq; - qp->qp_srq_en = HERMON_QP_SRQ_ENABLED; hermon_srq_refcnt_inc(qp->qp_srqhdl); } else { qp->qp_srqhdl = NULL; - qp->qp_srq_en = HERMON_QP_SRQ_DISABLED; } /* Determine the QP service type */ + qp->qp_type = type; if (type == IBT_RC_RQP) { qp->qp_serv_type = HERMON_QP_RC; } else if (type == IBT_UD_RQP) { - qp->qp_serv_type = HERMON_QP_UD; + if (alloc_flags & IBT_QP_USES_RFCI) + qp->qp_serv_type = HERMON_QP_RFCI; + else if (alloc_flags & IBT_QP_USES_FCMD) + qp->qp_serv_type = HERMON_QP_FCMND; + else + qp->qp_serv_type = HERMON_QP_UD; } else { qp->qp_serv_type = HERMON_QP_UC; } @@ -634,8 +694,7 @@ * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the * "qphdl" and return success */ - ASSERT(state->hs_qphdl[qpc->hr_indx] == NULL); - state->hs_qphdl[qpc->hr_indx] = qp; + hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, qp); /* * If this is a user-mappable QP, then we need to insert the previously @@ -647,8 +706,6 @@ } mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(state->hs_intrmsi_pri)); - mutex_init(&qp->qp_rq_lock, NULL, MUTEX_DRIVER, - DDI_INTR_PRI(state->hs_intrmsi_pri)); *qphdl = qp; @@ -677,7 +734,12 @@ * Releasing the QPN will also free up the QPC context. Update * the QPC context pointer to indicate this. */ - hermon_qp_release_qpn(state, qp->qp_qpn_hdl, HERMON_QPN_RELEASE); + if (qp->qp_qpn_hdl) { + hermon_qp_release_qpn(state, qp->qp_qpn_hdl, + HERMON_QPN_RELEASE); + } else { + hermon_rsrc_free(state, &qpc); + } qpc = NULL; qpalloc_fail5: hermon_rsrc_free(state, &rsrc); @@ -826,6 +888,7 @@ bzero(qp, sizeof (struct hermon_sw_qp_s)); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) + qp->qp_alloc_flags = attr_p->qp_alloc_flags; /* * Actual QP number is a combination of the index of the QPC and @@ -1053,13 +1116,13 @@ qp->qp_qpcrsrcp = qpc; qp->qp_rsrcp = rsrc; qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); qp->qp_pdhdl = pd; qp->qp_mrhdl = mr; qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED; qp->qp_is_special = (type == IBT_SMI_SQP) ? HERMON_QP_SMI : HERMON_QP_GSI; - qp->qp_is_umap = 0; qp->qp_uarpg = uarpg; qp->qp_umap_dhp = (devmap_cookie_t)NULL; qp->qp_sq_cqhdl = sq_cq; @@ -1077,10 +1140,10 @@ qp->qp_sqd_still_draining = 0; qp->qp_hdlrarg = (void *)ibt_qphdl; qp->qp_mcg_refcnt = 0; - qp->qp_srq_en = 0; qp->qp_srqhdl = NULL; /* All special QPs are UD QP service type */ + qp->qp_type = IBT_UD_RQP; qp->qp_serv_type = HERMON_QP_UD; /* @@ -1116,8 +1179,10 @@ * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the * "qphdl" and return success */ - ASSERT(state->hs_qphdl[qpc->hr_indx + port] == NULL); - state->hs_qphdl[qpc->hr_indx + port] = qp; + hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, qp); + + mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(state->hs_intrmsi_pri)); *qphdl = qp; @@ -1152,6 +1217,638 @@ /* + * hermon_qp_alloc_range() + * Context: Can be called only from user or kernel context. + */ +int +hermon_qp_alloc_range(hermon_state_t *state, uint_t log2, + hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibt_qphdl, + ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq, + hermon_qphdl_t *qphdl, uint_t sleepflag) +{ + hermon_rsrc_t *qpc, *rsrc; + hermon_rsrc_type_t rsrc_type; + hermon_qphdl_t qp; + hermon_qp_range_t *qp_range_p; + ibt_qp_alloc_attr_t *attr_p; + ibt_qp_type_t type; + hermon_qp_wq_type_t swq_type; + ibt_chan_sizes_t *queuesz_p; + ibt_mr_attr_t mr_attr; + hermon_mr_options_t mr_op; + hermon_srqhdl_t srq; + hermon_pdhdl_t pd; + hermon_cqhdl_t sq_cq, rq_cq; + hermon_mrhdl_t mr; + uint64_t qp_desc_off; + uint64_t *thewqe, thewqesz; + uint32_t *sq_buf, *rq_buf; + uint32_t log_qp_sq_size, log_qp_rq_size; + uint32_t sq_size, rq_size; + uint32_t sq_depth, rq_depth; + uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift; + uint32_t max_sgl, max_recv_sgl, uarpg; + uint_t qp_srq_en, i, j; + int ii; /* loop counter for range */ + int status, flag; + uint_t serv_type; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p)) + + /* + * Extract the necessary info from the hermon_qp_info_t structure + */ + attr_p = qpinfo->qpi_attrp; + type = qpinfo->qpi_type; + queuesz_p = qpinfo->qpi_queueszp; + + if (attr_p->qp_alloc_flags & IBT_QP_USES_RSS) { + if (log2 > state->hs_ibtfinfo.hca_attr->hca_rss_max_log2_table) + return (IBT_INSUFF_RESOURCE); + rsrc_type = HERMON_QPC; + serv_type = HERMON_QP_UD; + } else if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) { + if (log2 > state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp) + return (IBT_INSUFF_RESOURCE); + switch (attr_p->qp_fc.fc_hca_port) { + case 1: + rsrc_type = HERMON_QPC_FEXCH_PORT1; + break; + case 2: + rsrc_type = HERMON_QPC_FEXCH_PORT2; + break; + default: + return (IBT_INVALID_PARAM); + } + serv_type = HERMON_QP_FEXCH; + } else + return (IBT_INVALID_PARAM); + + /* + * Determine whether QP is being allocated for userland access or + * whether it is being allocated for kernel access. If the QP is + * being allocated for userland access, fail (too complex for now). + */ + if (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) { + return (IBT_NOT_SUPPORTED); + } else { + uarpg = state->hs_kernel_uar_index; + } + + /* + * Determine whether QP is being associated with an SRQ + */ + qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0; + if (qp_srq_en) { + /* + * Check for valid SRQ handle pointers + */ + if (attr_p->qp_ibc_srq_hdl == NULL) { + return (IBT_SRQ_HDL_INVALID); + } + srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl; + } + + /* + * Check for valid QP service type (only UD supported) + */ + if (type != IBT_UD_RQP) { + return (IBT_QP_SRV_TYPE_INVALID); + } + + /* + * Check for valid PD handle pointer + */ + if (attr_p->qp_pd_hdl == NULL) { + return (IBT_PD_HDL_INVALID); + } + pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl; + + /* + * If on an SRQ, check to make sure the PD is the same + */ + if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) { + return (IBT_PD_HDL_INVALID); + } + + /* set loop variable here, for freeing resources on error */ + ii = 0; + + /* + * Allocate 2^log2 contiguous/aligned QP context entries. This will + * be filled in with all the necessary parameters to define the + * Queue Pairs. Unlike other Hermon hardware resources, ownership + * is not immediately given to hardware in the final step here. + * Instead, we must wait until the QP is later transitioned to the + * "Init" state before passing the QP to hardware. If we fail here, + * we must undo all the reference count (CQ and PD). + */ + status = hermon_rsrc_alloc(state, rsrc_type, 1 << log2, sleepflag, + &qpc); + if (status != DDI_SUCCESS) { + return (IBT_INSUFF_RESOURCE); + } + + if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) + /* + * Need to init the MKEYs for the FEXCH QPs. + * + * For FEXCH QP subranges, we return the QPN base as + * "relative" to the full FEXCH QP range for the port. + */ + *(qpinfo->qpi_qpn) = hermon_fcoib_fexch_relative_qpn(state, + attr_p->qp_fc.fc_hca_port, qpc->hr_indx); + else + *(qpinfo->qpi_qpn) = (ib_qpn_t)qpc->hr_indx; + + qp_range_p = kmem_alloc(sizeof (*qp_range_p), + (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP); + if (qp_range_p == NULL) { + status = IBT_INSUFF_RESOURCE; + goto qpalloc_fail0; + } + mutex_init(&qp_range_p->hqpr_lock, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(state->hs_intrmsi_pri)); + mutex_enter(&qp_range_p->hqpr_lock); + qp_range_p->hqpr_refcnt = 1 << log2; + qp_range_p->hqpr_qpcrsrc = qpc; + mutex_exit(&qp_range_p->hqpr_lock); + +for_each_qp: + + /* Increment the reference count on the protection domain (PD) */ + hermon_pd_refcnt_inc(pd); + + rq_cq = (hermon_cqhdl_t)recv_cq[ii]; + sq_cq = (hermon_cqhdl_t)send_cq[ii]; + if (sq_cq == NULL) { + if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) { + /* if no send completions, just use rq_cq */ + sq_cq = rq_cq; + } else { + status = IBT_CQ_HDL_INVALID; + goto qpalloc_fail1; + } + } + + /* + * Increment the reference count on the CQs. One or both of these + * could return error if we determine that the given CQ is already + * being used with a special (SMI/GSI) QP. + */ + status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL); + if (status != DDI_SUCCESS) { + status = IBT_CQ_HDL_INVALID; + goto qpalloc_fail1; + } + status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL); + if (status != DDI_SUCCESS) { + status = IBT_CQ_HDL_INVALID; + goto qpalloc_fail2; + } + + /* + * Allocate the software structure for tracking the queue pair + * (i.e. the Hermon Queue Pair handle). If we fail here, we must + * undo the reference counts and the previous resource allocation. + */ + status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto qpalloc_fail4; + } + qp = (hermon_qphdl_t)rsrc->hr_addr; + bzero(qp, sizeof (struct hermon_sw_qp_s)); + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) + qp->qp_alloc_flags = attr_p->qp_alloc_flags; + + /* + * Calculate the QP number from QPC index. This routine handles + * all of the operations necessary to keep track of used, unused, + * and released QP numbers. + */ + qp->qp_qpnum = qpc->hr_indx + ii; + qp->qp_ring = qp->qp_qpnum << 8; + qp->qp_qpn_hdl = NULL; + + /* + * Allocate the doorbell record. Hermon just needs one for the RQ, + * if the QP is not associated with an SRQ, and use uarpg (above) as + * the uar index + */ + + if (!qp_srq_en) { + status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl, + &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto qpalloc_fail6; + } + } + + qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO); + + /* + * We verify that the requested number of SGL is valid (i.e. + * consistent with the device limits and/or software-configured + * limits). If not, then obviously the same cleanup needs to be done. + */ + max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz; + swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD; + max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz; + if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) || + (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) { + status = IBT_HCA_SGL_EXCEEDED; + goto qpalloc_fail7; + } + + /* + * Determine this QP's WQE stride (for both the Send and Recv WQEs). + * This will depend on the requested number of SGLs. Note: this + * has the side-effect of also calculating the real number of SGLs + * (for the calculated WQE size). + * + * For QP's on an SRQ, we set these to 0. + */ + if (qp_srq_en) { + qp->qp_rq_log_wqesz = 0; + qp->qp_rq_sgl = 0; + } else { + hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl, + max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ, + &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl); + } + hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, + max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); + + sq_wqe_size = 1 << qp->qp_sq_log_wqesz; + + /* NOTE: currently policy in driver, later maybe IBTF interface */ + qp->qp_no_prefetch = 0; + + /* + * for prefetching, we need to add the number of wqes in + * the 2k area plus one to the number requested, but + * ONLY for send queue. If no_prefetch == 1 (prefetch off) + * it's exactly TWO wqes for the headroom + */ + if (qp->qp_no_prefetch) + qp->qp_sq_headroom = 2 * sq_wqe_size; + else + qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE; + /* + * hdrm wqes must be integral since both sq_wqe_size & + * HERMON_QP_OH_SIZE are power of 2 + */ + qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size); + + + /* + * Calculate the appropriate size for the work queues. + * For send queue, add in the headroom wqes to the calculation. + * Note: All Hermon QP work queues must be a power-of-2 in size. Also + * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is + * to round the requested size up to the next highest power-of-2 + */ + /* first, adjust to a minimum and tell the caller the change */ + attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, + HERMON_QP_MIN_SIZE); + attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, + HERMON_QP_MIN_SIZE); + /* + * now, calculate the alloc size, taking into account + * the headroom for the sq + */ + log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes); + /* if the total is a power of two, reduce it */ + if (((attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes) & + (attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes - 1)) == 0) { + log_qp_sq_size = log_qp_sq_size - 1; + } + + log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq); + if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) { + log_qp_rq_size = log_qp_rq_size - 1; + } + + /* + * Next we verify that the rounded-up size is valid (i.e. consistent + * with the device limits and/or software-configured limits). If not, + * then obviously we have a lot of cleanup to do before returning. + * + * NOTE: the first condition deals with the (test) case of cs_sq + * being just less than 2^32. In this case, the headroom addition + * to the requested cs_sq will pass the test when it should not. + * This test no longer lets that case slip through the check. + */ + if ((attr_p->qp_sizes.cs_sq > + (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) || + (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) || + (!qp_srq_en && (log_qp_rq_size > + state->hs_cfg_profile->cp_log_max_qp_sz))) { + status = IBT_HCA_WR_EXCEEDED; + goto qpalloc_fail7; + } + + /* + * Allocate the memory for QP work queues. Since Hermon work queues + * are not allowed to cross a 32-bit (4GB) boundary, the alignment of + * the work queue memory is very important. We used to allocate + * work queues (the combined receive and send queues) so that they + * would be aligned on their combined size. That alignment guaranteed + * that they would never cross the 4GB boundary (Hermon work queues + * are on the order of MBs at maximum). Now we are able to relax + * this alignment constraint by ensuring that the IB address assigned + * to the queue memory (as a result of the hermon_mr_register() call) + * is offset from zero. + * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to + * guarantee the alignment, but when attempting to use IOMMU bypass + * mode we found that we were not allowed to specify any alignment + * that was more restrictive than the system page size. + * So we avoided this constraint by passing two alignment values, + * one for the memory allocation itself and the other for the DMA + * handle (for later bind). This used to cause more memory than + * necessary to be allocated (in order to guarantee the more + * restrictive alignment contraint). But by guaranteeing the + * zero-based IB virtual address for the queue, we are able to + * conserve this memory. + */ + sq_wqe_size = 1 << qp->qp_sq_log_wqesz; + sq_depth = 1 << log_qp_sq_size; + sq_size = sq_depth * sq_wqe_size; + + /* QP on SRQ sets these to 0 */ + if (qp_srq_en) { + rq_wqe_size = 0; + rq_size = 0; + } else { + rq_wqe_size = 1 << qp->qp_rq_log_wqesz; + rq_depth = 1 << log_qp_rq_size; + rq_size = rq_depth * rq_wqe_size; + } + + qp->qp_wqinfo.qa_size = sq_size + rq_size; + qp->qp_wqinfo.qa_alloc_align = PAGESIZE; + qp->qp_wqinfo.qa_bind_align = PAGESIZE; + qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; + status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto qpalloc_fail7; + } + + /* + * Sort WQs in memory according to stride (*q_wqe_size), largest first + * If they are equal, still put the SQ first + */ + qp->qp_sq_baseaddr = 0; + qp->qp_rq_baseaddr = 0; + if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) { + sq_buf = qp->qp_wqinfo.qa_buf_aligned; + + /* if this QP is on an SRQ, set the rq_buf to NULL */ + if (qp_srq_en) { + rq_buf = NULL; + } else { + rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size); + qp->qp_rq_baseaddr = sq_size; + } + } else { + rq_buf = qp->qp_wqinfo.qa_buf_aligned; + sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size); + qp->qp_sq_baseaddr = rq_size; + } + + qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth); + if (qp->qp_sq_wqhdr == NULL) { + status = IBT_INSUFF_RESOURCE; + goto qpalloc_fail8; + } + if (qp_srq_en) { + qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr; + qp->qp_rq_wqavl.wqa_srq_en = 1; + qp->qp_rq_wqavl.wqa_srq = srq; + } else { + qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth); + if (qp->qp_rq_wqhdr == NULL) { + status = IBT_INSUFF_RESOURCE; + goto qpalloc_fail8; + } + qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr; + } + qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum; + qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND; + qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr; + qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum; + qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV; + + /* + * Register the memory for the QP work queues. The memory for the + * QP must be registered in the Hermon cMPT tables. This gives us the + * LKey to specify in the QP context later. Note: The memory for + * Hermon work queues (both Send and Recv) must be contiguous and + * registered as a single memory region. Note: If the QP memory is + * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to + * meet the alignment restriction, we pass the "mro_bind_override_addr" + * flag in the call to hermon_mr_register(). This guarantees that the + * resulting IB vaddr will be zero-based (modulo the offset into the + * first page). If we fail here, we still have the bunch of resource + * and reference count cleanup to do. + */ + flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : + IBT_MR_NOSLEEP; + mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned; + mr_attr.mr_len = qp->qp_wqinfo.qa_size; + mr_attr.mr_as = NULL; + mr_attr.mr_flags = flag; + /* HERMON_QUEUE_LOCATION_NORMAL */ + mr_op.mro_bind_type = + state->hs_cfg_profile->cp_iommu_bypass; + mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl; + mr_op.mro_bind_override_addr = 1; + status = hermon_mr_register(state, pd, &mr_attr, &mr, + &mr_op, HERMON_QP_CMPT); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto qpalloc_fail9; + } + + /* + * Calculate the offset between the kernel virtual address space + * and the IB virtual address space. This will be used when + * posting work requests to properly initialize each WQE. + */ + qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned - + (uint64_t)mr->mr_bindinfo.bi_addr; + + /* + * Fill in all the return arguments (if necessary). This includes + * real work queue sizes (in wqes), real SGLs, and QP number + */ + if (queuesz_p != NULL) { + queuesz_p->cs_sq = + (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes; + queuesz_p->cs_sq_sgl = qp->qp_sq_sgl; + + /* if this QP is on an SRQ, set these to 0 */ + if (qp_srq_en) { + queuesz_p->cs_rq = 0; + queuesz_p->cs_rq_sgl = 0; + } else { + queuesz_p->cs_rq = (1 << log_qp_rq_size); + queuesz_p->cs_rq_sgl = qp->qp_rq_sgl; + } + } + + /* + * Fill in the rest of the Hermon Queue Pair handle. + */ + qp->qp_qpcrsrcp = NULL; + qp->qp_rsrcp = rsrc; + qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); + qp->qp_pdhdl = pd; + qp->qp_mrhdl = mr; + qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? + HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED; + qp->qp_is_special = 0; + qp->qp_uarpg = uarpg; + qp->qp_umap_dhp = (devmap_cookie_t)NULL; + qp->qp_sq_cqhdl = sq_cq; + qp->qp_sq_bufsz = (1 << log_qp_sq_size); + qp->qp_sq_logqsz = log_qp_sq_size; + qp->qp_sq_buf = sq_buf; + qp->qp_desc_off = qp_desc_off; + qp->qp_rq_cqhdl = rq_cq; + qp->qp_rq_buf = rq_buf; + qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) != + 0; + + /* if this QP is on an SRQ, set rq_bufsz to 0 */ + if (qp_srq_en) { + qp->qp_rq_bufsz = 0; + qp->qp_rq_logqsz = 0; + } else { + qp->qp_rq_bufsz = (1 << log_qp_rq_size); + qp->qp_rq_logqsz = log_qp_rq_size; + } + + qp->qp_forward_sqd_event = 0; + qp->qp_sqd_still_draining = 0; + qp->qp_hdlrarg = (void *)ibt_qphdl[ii]; + qp->qp_mcg_refcnt = 0; + + /* + * If this QP is to be associated with an SRQ, set the SRQ handle + */ + if (qp_srq_en) { + qp->qp_srqhdl = srq; + hermon_srq_refcnt_inc(qp->qp_srqhdl); + } else { + qp->qp_srqhdl = NULL; + } + + qp->qp_type = IBT_UD_RQP; + qp->qp_serv_type = serv_type; + + /* + * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed + */ + + /* + * Initialize the SQ WQEs - all that needs to be done is every 64 bytes + * set the quadword to all F's - high-order bit is owner (init to one) + * and the rest for the headroom definition of prefetching. + */ + if ((attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) == 0) { + wqesz_shift = qp->qp_sq_log_wqesz; + thewqesz = 1 << wqesz_shift; + thewqe = (uint64_t *)(void *)(qp->qp_sq_buf); + for (i = 0; i < sq_depth; i++) { + /* + * for each stride, go through and every 64 bytes + * write the init value - having set the address + * once, just keep incrementing it + */ + for (j = 0; j < thewqesz; j += 64, thewqe += 8) { + *(uint32_t *)thewqe = 0xFFFFFFFF; + } + } + } + + /* Zero out the QP context */ + bzero(&qp->qpc, sizeof (hermon_hw_qpc_t)); + + /* + * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the + * "qphdl" and return success + */ + hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + ii, qp); + + mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER, + DDI_INTR_PRI(state->hs_intrmsi_pri)); + + qp->qp_rangep = qp_range_p; + + qphdl[ii] = qp; + + if (++ii < (1 << log2)) + goto for_each_qp; + + return (DDI_SUCCESS); + +/* + * The following is cleanup for all possible failure cases in this routine + */ +qpalloc_fail9: + hermon_queue_free(&qp->qp_wqinfo); +qpalloc_fail8: + if (qp->qp_sq_wqhdr) + hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr); + if (qp->qp_rq_wqhdr) + hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr); +qpalloc_fail7: + if (!qp_srq_en) { + hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr); + } + +qpalloc_fail6: + hermon_rsrc_free(state, &rsrc); +qpalloc_fail4: + hermon_cq_refcnt_dec(rq_cq); +qpalloc_fail2: + hermon_cq_refcnt_dec(sq_cq); +qpalloc_fail1: + hermon_pd_refcnt_dec(pd); +qpalloc_fail0: + if (ii == 0) { + if (qp_range_p) + kmem_free(qp_range_p, sizeof (*qp_range_p)); + hermon_rsrc_free(state, &qpc); + } else { + /* qp_range_p and qpc rsrc will be freed in hermon_qp_free */ + + mutex_enter(&qp->qp_rangep->hqpr_lock); + qp_range_p->hqpr_refcnt = ii; + mutex_exit(&qp->qp_rangep->hqpr_lock); + while (--ii >= 0) { + ibc_qpn_hdl_t qpn_hdl; + int free_status; + + free_status = hermon_qp_free(state, &qphdl[ii], + IBC_FREE_QP_AND_QPN, &qpn_hdl, sleepflag); + if (free_status != DDI_SUCCESS) + cmn_err(CE_CONT, "!qp_range: status 0x%x: " + "error status %x during free", + status, free_status); + } + } + + return (status); +} + + +/* * hermon_qp_free() * This function frees up the QP resources. Depending on the value * of the "free_qp_flags", the QP number may not be released until @@ -1186,7 +1883,7 @@ */ qp = *qphdl; mutex_enter(&qp->qp_lock); - qpc = qp->qp_qpcrsrcp; + qpc = qp->qp_qpcrsrcp; /* NULL if part of a "range" */ rsrc = qp->qp_rsrcp; pd = qp->qp_pdhdl; srq = qp->qp_srqhdl; @@ -1194,7 +1891,7 @@ rq_cq = qp->qp_rq_cqhdl; sq_cq = qp->qp_sq_cqhdl; port = qp->qp_portnum; - qp_srq_en = qp->qp_srq_en; + qp_srq_en = qp->qp_alloc_flags & IBT_QP_USES_SRQ; /* * If the QP is part of an MCG, then we fail the qp_free @@ -1221,6 +1918,7 @@ goto qpfree_fail; } qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); /* * Do any additional handling necessary for the transition @@ -1242,7 +1940,7 @@ * We also need to invalidate the QP tracking information for the * user mapping. */ - if (qp->qp_is_umap) { + if (qp->qp_alloc_flags & IBT_QP_USER_MAP) { status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum, MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, &umapdb); @@ -1274,10 +1972,15 @@ * number has been freed. Note: it does depend in whether we are * freeing a special QP or not. */ - if (qp->qp_is_special) { - state->hs_qphdl[qpc->hr_indx + port] = NULL; + if (qpc == NULL) { + hermon_icm_set_num_to_hdl(state, HERMON_QPC, + qp->qp_qpnum, NULL); + } else if (qp->qp_is_special) { + hermon_icm_set_num_to_hdl(state, HERMON_QPC, + qpc->hr_indx + port, NULL); } else { - state->hs_qphdl[qpc->hr_indx] = NULL; + hermon_icm_set_num_to_hdl(state, HERMON_QPC, + qpc->hr_indx, NULL); } /* @@ -1342,9 +2045,20 @@ goto qpfree_fail; } + } else if (qp->qp_rangep) { + int refcnt; + mutex_enter(&qp->qp_rangep->hqpr_lock); + refcnt = --qp->qp_rangep->hqpr_refcnt; + mutex_exit(&qp->qp_rangep->hqpr_lock); + if (refcnt == 0) { + mutex_destroy(&qp->qp_rangep->hqpr_lock); + hermon_rsrc_free(state, &qp->qp_rangep->hqpr_qpcrsrc); + kmem_free(qp->qp_rangep, sizeof (*qp->qp_rangep)); + } + qp->qp_rangep = NULL; + } else if (qp->qp_qpn_hdl == NULL) { + hermon_rsrc_free(state, &qpc); } else { - type = qp->qp_serv_type; - /* * Check the flags and determine whether to release the * QPN or not, based on their value. @@ -1359,8 +2073,8 @@ HERMON_QPN_RELEASE); } } + mutex_destroy(&qp->qp_sq_lock); - mutex_destroy(&qp->qp_rq_lock); /* Free the Hermon Queue Pair handle */ hermon_rsrc_free(state, &rsrc); @@ -1450,8 +2164,10 @@ * the current QP state. Note: Some special handling is necessary * for calculating the QP number on special QP (QP0 and QP1). */ - attr_p->qp_sq_cq = qp->qp_sq_cqhdl->cq_hdlrarg; - attr_p->qp_rq_cq = qp->qp_rq_cqhdl->cq_hdlrarg; + attr_p->qp_sq_cq = + (qp->qp_sq_cqhdl == NULL) ? NULL : qp->qp_sq_cqhdl->cq_hdlrarg; + attr_p->qp_rq_cq = + (qp->qp_rq_cqhdl == NULL) ? NULL : qp->qp_rq_cqhdl->cq_hdlrarg; if (qp->qp_is_special) { attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1; } else { @@ -1498,7 +2214,7 @@ /* * Fill in the additional QP info based on the QP's transport type. */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { /* Fill in the UD-specific info */ ud = &attr_p->qp_info.qp_transport.ud; @@ -1511,6 +2227,38 @@ attr_p->qp_info.qp_trans = IBT_UD_SRV; + if (qp->qp_serv_type == HERMON_QP_FEXCH) { + ibt_pmr_desc_t *pmr; + uint64_t heart_beat; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pmr)) + pmr = &attr_p->qp_query_fexch.fq_uni_mem_desc; + pmr->pmd_iova = 0; + pmr->pmd_lkey = pmr->pmd_rkey = + hermon_fcoib_qpn_to_mkey(state, qp->qp_qpnum); + pmr->pmd_phys_buf_list_sz = + state->hs_fcoib.hfc_mtts_per_mpt; + pmr->pmd_sync_required = 0; + + pmr = &attr_p->qp_query_fexch.fq_bi_mem_desc; + pmr->pmd_iova = 0; + pmr->pmd_lkey = 0; + pmr->pmd_rkey = 0; + pmr->pmd_phys_buf_list_sz = 0; + pmr->pmd_sync_required = 0; + + attr_p->qp_query_fexch.fq_flags = + ((hermon_get_heart_beat_rq_cmd_post(state, + qp->qp_qpnum, &heart_beat) == HERMON_CMD_SUCCESS) && + (heart_beat == 0)) ? IBT_FEXCH_HEART_BEAT_OK : + IBT_FEXCH_NO_FLAGS; + + ud->ud_fc = qp->qp_fc_attr; + } else if (qp->qp_serv_type == HERMON_QP_FCMND || + qp->qp_serv_type == HERMON_QP_RFCI) { + ud->ud_fc = qp->qp_fc_attr; + } + } else if (qp->qp_serv_type == HERMON_QP_RC) { /* Fill in the RC-specific info */ @@ -1637,10 +2385,12 @@ if (qpc->state == HERMON_QP_SQERR) { attr_p->qp_info.qp_state = IBT_STATE_SQE; qp->qp_state = HERMON_QP_SQERR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQERR); } if (qpc->state == HERMON_QP_ERR) { attr_p->qp_info.qp_state = IBT_STATE_ERROR; qp->qp_state = HERMON_QP_ERR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR); } mutex_exit(&qp->qp_lock); @@ -1889,7 +2639,7 @@ /* Calculate the QP table index from the qpnum */ qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1; qpindx = qpnum & qpmask; - return (state->hs_qphdl[qpindx]); + return (hermon_icm_num_to_hdl(state, HERMON_QPC, qpindx)); }
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_qpmod.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_qpmod.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -204,6 +203,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_INIT; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_INIT); } else if ((flags & IBT_CEP_SET_STATE) && (mod_state == IBT_STATE_RESET)) { @@ -232,6 +232,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_ERR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR); } else { /* Invalid transition - return error */ @@ -255,6 +256,7 @@ HERMON_WARNING(state, "failed to reset QP"); } qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); mutex_exit(&qp->qp_lock); goto qpmod_fail; @@ -303,6 +305,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RTR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RTR); } else if ((flags & IBT_CEP_SET_STATE) && (mod_state == IBT_STATE_INIT)) { @@ -315,6 +318,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_INIT; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_INIT); } else if ((flags & IBT_CEP_SET_STATE) && (mod_state == IBT_STATE_RESET)) { @@ -327,6 +331,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); /* * Do any additional handling necessary for the @@ -350,6 +355,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_ERR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR); } else { /* Invalid transition - return error */ @@ -402,6 +408,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RTS; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RTS); } else if ((flags & IBT_CEP_SET_STATE) && (mod_state == IBT_STATE_RESET)) { @@ -414,6 +421,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); /* * Do any additional handling necessary for the @@ -437,6 +445,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_ERR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR); } else { /* Invalid transition - return error */ @@ -478,6 +487,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RTS; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RTS); } else if ((flags & IBT_CEP_SET_STATE) && (mod_state == IBT_STATE_SQD)) { @@ -491,6 +501,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_SQD; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQD); #else /* hack because of the lack of fw support for SQD */ mutex_exit(&qp->qp_lock); @@ -509,6 +520,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); /* * Do any additional handling necessary for the @@ -532,6 +544,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_ERR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR); } else { /* Invalid transition - return error */ @@ -571,6 +584,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RTS; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RTS); } else if ((flags & IBT_CEP_SET_STATE) && (mod_state == IBT_STATE_RESET)) { @@ -583,6 +597,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); /* * Do any additional handling necessary for the @@ -606,6 +621,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_ERR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR); } else { /* Invalid transition - return error */ @@ -651,6 +667,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_SQD; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQD); } else if ((flags & IBT_CEP_SET_STATE) && (mod_state == IBT_STATE_RTS)) { @@ -678,6 +695,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_SQD; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQD); /* * The, attempt to transition from "SQD" to "RTS", but @@ -691,6 +709,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RTS; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RTS); } else if ((flags & IBT_CEP_SET_STATE) && (mod_state == IBT_STATE_RESET)) { @@ -703,6 +722,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); /* * Do any additional handling necessary for the @@ -726,6 +746,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_ERR; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR); } else { /* Invalid transition - return error */ @@ -751,6 +772,7 @@ goto qpmod_fail; } qp->qp_state = HERMON_QP_RESET; + HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET); /* * Do any additional handling necessary for the @@ -818,6 +840,7 @@ uint_t portnum, pkeyindx; int status; uint32_t cqnmask; + int qp_srq_en; ASSERT(MUTEX_HELD(&qp->qp_lock)); @@ -847,16 +870,12 @@ qpc->usr_page = qp->qp_uarpg; - /* HERMON: sched_q is now in the address vector(s) */ - qpc->pri_addr_path.sched_q = HERMON_QP_SCHEDQ_GET(qp->qp_portnum, - 0, qp->qp_is_special); - qpc->alt_addr_path.sched_q = HERMON_QP_SCHEDQ_GET(qp->qp_portnum, - 0, qp->qp_is_special); - cqnmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1; - qpc->cqn_snd = qp->qp_sq_cqhdl->cq_cqnum & cqnmask; + qpc->cqn_snd = + (qp->qp_sq_cqhdl == NULL) ? 0 : qp->qp_sq_cqhdl->cq_cqnum & cqnmask; qpc->page_offs = qp->qp_wqinfo.qa_pgoffs >> 6; - qpc->cqn_rcv = qp->qp_rq_cqhdl->cq_cqnum & cqnmask; + qpc->cqn_rcv = + (qp->qp_rq_cqhdl == NULL) ? 0 : qp->qp_rq_cqhdl->cq_cqnum & cqnmask; /* dbr is now an address, not an index */ qpc->dbr_addrh = ((uint64_t)qp->qp_rq_pdbr >> 32); @@ -875,25 +894,33 @@ qpc->mtt_base_addrl = (qp->qp_mrhdl->mr_mttaddr) >> 3; qpc->mtt_base_addrh = (uint32_t)((qp->qp_mrhdl->mr_mttaddr >> 32) & 0xFF); - qpc->srq_en = qp->qp_srq_en; - - if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) { + qp_srq_en = (qp->qp_alloc_flags & IBT_QP_USES_SRQ) != 0; + qpc->srq_en = qp_srq_en; + + if (qp_srq_en) { qpc->srq_number = qp->qp_srqhdl->srq_srqnum; } else { qpc->srq_number = 0; } - /* 1.2 verbs extensions disabled for now */ - qpc->fre = 0; /* default disable fast registration WR */ + /* + * Fast Registration Work Requests and Reserved Lkey are enabled + * with the single IBT bit stored in qp_rlky. + */ + qpc->fre = qp->qp_rlky; qpc->rlky = qp->qp_rlky; - qpc->header_sep = 0; /* disble header separation for now */ - qpc->rss = 0; /* default disable RSS for now */ + + /* 1.2 verbs extensions disabled for now */ + qpc->header_sep = 0; /* disable header separation for now */ + qpc->rss = qp->qp_alloc_flags & IBT_QP_USES_RSS ? 1 : 0; qpc->inline_scatter = 0; /* disable inline scatter for now */ /* * Now fill in the QPC fields which are specific to transport type */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { + int my_fc_id_idx, exch_base; + ud = &info_p->qp_transport.ud; /* Set the QKey */ @@ -907,6 +934,8 @@ qpc->mtu = HERMON_MAX_MTU; if (qp->qp_uses_lso) qpc->msg_max = state->hs_devlim.log_max_gso_sz; + else if (qp->qp_is_special) + qpc->msg_max = HERMON_MAX_MTU + 6; else qpc->msg_max = HERMON_QP_LOG_MAX_MSGSZ; @@ -931,6 +960,60 @@ return (IBT_PKEY_IX_ILLEGAL); } + /* fill in the RSS fields */ + if (qpc->rss) { + struct hermon_hw_rss_s *rssp; + ibt_rss_flags_t flags = ud->ud_rss.rss_flags; + + rssp = (struct hermon_hw_rss_s *)&qpc->pri_addr_path; + rssp->log2_tbl_sz = ud->ud_rss.rss_log2_table; + rssp->base_qpn = ud->ud_rss.rss_base_qpn; + rssp->default_qpn = ud->ud_rss.rss_def_qpn; + if (flags & IBT_RSS_ALG_XOR) + rssp->hash_fn = 0; /* XOR Hash Function */ + else if (flags & IBT_RSS_ALG_TPL) + rssp->hash_fn = 1; /* Toeplitz Hash Fn */ + else + return (IBT_INVALID_PARAM); + rssp->ipv4 = (flags & IBT_RSS_HASH_IPV4) != 0; + rssp->tcp_ipv4 = (flags & IBT_RSS_HASH_TCP_IPV4) != 0; + rssp->ipv6 = (flags & IBT_RSS_HASH_IPV6) != 0; + rssp->tcp_ipv4 = (flags & IBT_RSS_HASH_TCP_IPV6) != 0; + bcopy(ud->ud_rss.rss_toe_key, rssp->rss_key, 40); + } else if (qp->qp_serv_type == HERMON_QP_RFCI) { + status = hermon_fcoib_set_id(state, portnum, + qp->qp_qpnum, ud->ud_fc.fc_src_id); + if (status != DDI_SUCCESS) + return (status); + qp->qp_fc_attr = ud->ud_fc; + } else if (qp->qp_serv_type == HERMON_QP_FEXCH) { + my_fc_id_idx = hermon_fcoib_get_id_idx(state, + portnum, &ud->ud_fc); + if (my_fc_id_idx == -1) + return (IBT_INVALID_PARAM); + qpc->my_fc_id_idx = my_fc_id_idx; + + status = hermon_fcoib_fexch_mkey_init(state, + qp->qp_pdhdl, ud->ud_fc.fc_hca_port, + qp->qp_qpnum, HERMON_CMD_NOSLEEP_SPIN); + if (status != DDI_SUCCESS) + return (status); + qp->qp_fc_attr = ud->ud_fc; + } else if (qp->qp_serv_type == HERMON_QP_FCMND) { + my_fc_id_idx = hermon_fcoib_get_id_idx(state, + portnum, &ud->ud_fc); + if (my_fc_id_idx == -1) + return (IBT_INVALID_PARAM); + qpc->my_fc_id_idx = my_fc_id_idx; + exch_base = hermon_fcoib_check_exch_base_off(state, + portnum, &ud->ud_fc); + if (exch_base == -1) + return (IBT_INVALID_PARAM); + qpc->exch_base = exch_base; + qpc->exch_size = ud->ud_fc.fc_exch_log2_sz; + qp->qp_fc_attr = ud->ud_fc; + } + } else if (qp->qp_serv_type == HERMON_QP_RC) { rc = &info_p->qp_transport.rc; @@ -1046,7 +1129,7 @@ * Since there are no common fields to be filled in for this command, * we begin with the QPC fields which are specific to transport type. */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { ud = &info_p->qp_transport.ud; /* @@ -1255,7 +1338,7 @@ * Since there are few common fields to be filled in for this command, * we just do the QPC fields that are specific to transport type. */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { ud = &info_p->qp_transport.ud; /* @@ -1627,7 +1710,7 @@ /* * Now fill in the QPC fields which are specific to transport type */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { ud = &info_p->qp_transport.ud; /* Set the send PSN */ @@ -1898,7 +1981,7 @@ * Since there are no common fields to be filled in for this command, * we begin with the QPC fields which are specific to transport type. */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { ud = &info_p->qp_transport.ud; /* @@ -2190,7 +2273,7 @@ /* * Now fill in the QPC fields which are specific to transport type */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { ud = &info_p->qp_transport.ud; /* @@ -2491,7 +2574,7 @@ /* * Now fill in the QPC fields which are specific to transport type */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { ud = &info_p->qp_transport.ud; /* @@ -2933,7 +3016,7 @@ * Since there are no common fields to be filled in for this command, * we begin with the QPC fields which are specific to transport type. */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { ud = &info_p->qp_transport.ud; /* @@ -3065,7 +3148,13 @@ } return (ibc_get_ci_failure(0)); } - + if (qp->qp_serv_type == HERMON_QP_FEXCH) { + status = hermon_fcoib_fexch_mkey_fini(state, qp->qp_pdhdl, + qp->qp_qpnum, HERMON_CMD_NOSLEEP_SPIN); + if (status != DDI_SUCCESS) + cmn_err(CE_NOTE, "hermon%d: fexch_mkey_fini failed " + "%08x\n", state->hs_instance, status); + } return (DDI_SUCCESS); } @@ -3125,9 +3214,11 @@ qpc->mtt_base_addrh = (qp->qp_mrhdl->mr_mttaddr) >> 32 & 0xFF; qpc->mtt_base_addrl = (qp->qp_mrhdl->mr_mttaddr) >> 3 & 0xFFFFFFFF; cqnmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1; - qpc->cqn_snd = qp->qp_sq_cqhdl->cq_cqnum & cqnmask; + qpc->cqn_snd = + (qp->qp_sq_cqhdl == NULL) ? 0 : qp->qp_sq_cqhdl->cq_cqnum & cqnmask; qpc->page_offs = qp->qp_wqinfo.qa_pgoffs >> 6; - qpc->cqn_rcv = qp->qp_rq_cqhdl->cq_cqnum & cqnmask; + qpc->cqn_rcv = + (qp->qp_rq_cqhdl == NULL) ? 0 : qp->qp_rq_cqhdl->cq_cqnum & cqnmask; qpc->sq_wqe_counter = 0; qpc->rq_wqe_counter = 0; @@ -3135,10 +3226,10 @@ qpc->log_rq_stride = qp->qp_rq_log_wqesz - 4; qpc->log_sq_size = highbit(qp->qp_sq_bufsz) - 1; qpc->log_rq_size = highbit(qp->qp_rq_bufsz) - 1; - qpc->srq_en = qp->qp_srq_en; + qpc->srq_en = (qp->qp_alloc_flags & IBT_QP_USES_SRQ) != 0; qpc->sq_no_prefetch = qp->qp_no_prefetch; - if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) { + if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { qpc->srq_number = qp->qp_srqhdl->srq_srqnum; } else { qpc->srq_number = 0; @@ -3150,7 +3241,7 @@ /* * Now fill in the QPC fields which are specific to transport type */ - if (qp->qp_serv_type == HERMON_QP_UD) { + if (qp->qp_type == IBT_UD_RQP) { /* Set the UD parameters to an invalid default */ qpc->qkey = 0; qpc->pri_addr_path.sched_q =
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_rsrc.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_rsrc.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -107,19 +106,19 @@ */ static int hermon_rsrc_mbox_alloc(hermon_rsrc_pool_info_t *pool_info, uint_t num, hermon_rsrc_t *hdl); -static void hermon_rsrc_mbox_free(hermon_rsrc_pool_info_t *pool_info, - hermon_rsrc_t *hdl); +static void hermon_rsrc_mbox_free(hermon_rsrc_t *hdl); static int hermon_rsrc_hw_entry_alloc(hermon_rsrc_pool_info_t *pool_info, - uint_t num, uint_t num_align, ddi_acc_handle_t acc_handle, - uint_t sleepflag, hermon_rsrc_t *hdl); + uint_t num, uint_t num_align, uint_t sleepflag, hermon_rsrc_t *hdl); static void hermon_rsrc_hw_entry_free(hermon_rsrc_pool_info_t *pool_info, hermon_rsrc_t *hdl); +static int hermon_rsrc_hw_entry_reserve(hermon_rsrc_pool_info_t *pool_info, + uint_t num, uint_t num_align, uint_t sleepflag, hermon_rsrc_t *hdl); static int hermon_rsrc_hw_entry_icm_confirm(hermon_rsrc_pool_info_t *pool_info, - uint_t num, hermon_rsrc_t *hdl); + uint_t num, hermon_rsrc_t *hdl, int num_to_hdl); static int hermon_rsrc_hw_entry_icm_free(hermon_rsrc_pool_info_t *pool_info, - hermon_rsrc_t *hdl); + hermon_rsrc_t *hdl, int num_to_hdl); static int hermon_rsrc_swhdl_alloc(hermon_rsrc_pool_info_t *pool_info, uint_t sleepflag, hermon_rsrc_t *hdl); @@ -131,6 +130,13 @@ static void hermon_rsrc_pdhdl_free(hermon_rsrc_pool_info_t *pool_info, hermon_rsrc_t *hdl); +static int hermon_rsrc_fexch_alloc(hermon_state_t *state, + hermon_rsrc_type_t rsrc, uint_t num, uint_t sleepflag, hermon_rsrc_t *hdl); +static void hermon_rsrc_fexch_free(hermon_state_t *state, hermon_rsrc_t *hdl); +static int hermon_rsrc_rfci_alloc(hermon_state_t *state, + hermon_rsrc_type_t rsrc, uint_t num, uint_t sleepflag, hermon_rsrc_t *hdl); +static void hermon_rsrc_rfci_free(hermon_state_t *state, hermon_rsrc_t *hdl); + /* * The following routines are the constructors and destructors for several * of the SW handle type objects. For certain types of SW handles objects @@ -187,8 +193,7 @@ * Allocate space for the object used to track the resource handle */ flag = (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP; - tmp_rsrc_hdl = (hermon_rsrc_t *)kmem_cache_alloc(state->hs_rsrc_cache, - flag); + tmp_rsrc_hdl = kmem_cache_alloc(state->hs_rsrc_cache, flag); if (tmp_rsrc_hdl == NULL) { return (DDI_FAILURE); } @@ -203,7 +208,7 @@ /* * Depending on resource type, call the appropriate alloc routine */ - switch (rsrc_pool->rsrc_type) { + switch (rsrc) { case HERMON_IN_MBOX: case HERMON_OUT_MBOX: case HERMON_INTR_IN_MBOX: @@ -211,64 +216,37 @@ status = hermon_rsrc_mbox_alloc(rsrc_pool, num, tmp_rsrc_hdl); break; + case HERMON_DMPT: + /* Allocate "num" (contiguous/aligned for FEXCH) DMPTs */ case HERMON_QPC: - /* Allocate "num" contiguous/aligned QPCs for RSS */ + /* Allocate "num" (contiguous/aligned for RSS) QPCs */ status = hermon_rsrc_hw_entry_alloc(rsrc_pool, num, num, - 0, sleepflag, tmp_rsrc_hdl); - break; - - case HERMON_CQC: - case HERMON_SRQC: - case HERMON_EQC: - /* - * Because these objects are NOT accessed by Hermon driver - * software, we set the acc_handle parameter to zero. - */ - status = hermon_rsrc_hw_entry_alloc(rsrc_pool, num, 1, 0, sleepflag, tmp_rsrc_hdl); break; - case HERMON_DMPT: - /* - * Because these objects are sometimes accessed by Hermon - * driver software (FMR for MPTs), we need the acc_handle - * to be set. The ICM-aware code will set it for all - * ICM backed resources. - * But if they are allocated in multiples, we specify here that - * they must be aligned on a more restrictive boundary. - */ - status = hermon_rsrc_hw_entry_alloc(rsrc_pool, num, num, - 0, sleepflag, tmp_rsrc_hdl); + case HERMON_QPC_FEXCH_PORT1: + case HERMON_QPC_FEXCH_PORT2: + /* Allocate "num" contiguous/aligned QPCs for FEXCH */ + status = hermon_rsrc_fexch_alloc(state, rsrc, num, + sleepflag, tmp_rsrc_hdl); break; - case HERMON_MCG: - /* - * Hermon MCG entries are also NOT accessed by Hermon driver - * software, but because MCG entries do not have the same - * alignnment restrictions we loosen the constraint here. - */ - status = hermon_rsrc_hw_entry_alloc(rsrc_pool, num, 1, 0, + case HERMON_QPC_RFCI_PORT1: + case HERMON_QPC_RFCI_PORT2: + /* Allocate "num" contiguous/aligned QPCs for RFCI */ + status = hermon_rsrc_rfci_alloc(state, rsrc, num, sleepflag, tmp_rsrc_hdl); break; case HERMON_MTT: - /* - * Because MTT objects are among the few HW resources that - * may be allocated in odd numbers, we specify a less - * restrictive alignment than for the above resources. - */ + case HERMON_CQC: + case HERMON_SRQC: + case HERMON_EQC: + case HERMON_MCG: + case HERMON_UARPG: + /* Allocate "num" unaligned resources */ status = hermon_rsrc_hw_entry_alloc(rsrc_pool, num, 1, - 0, sleepflag, tmp_rsrc_hdl); - break; - - case HERMON_UARPG: - /* - * Because UAR pages are written by Hermon driver software (for - * doorbells), we set the acc_handle parameter to point to - * the ddi_acc_handle_t for the Hermon UAR memory. - */ - status = hermon_rsrc_hw_entry_alloc(rsrc_pool, num, 1, - hermon_rsrc_alloc_uarhdl(state), sleepflag, tmp_rsrc_hdl); + sleepflag, tmp_rsrc_hdl); break; case HERMON_MRHDL: @@ -307,7 +285,75 @@ */ if (status != DDI_SUCCESS) { kmem_cache_free(state->hs_rsrc_cache, tmp_rsrc_hdl); - tmp_rsrc_hdl = NULL; + return (DDI_FAILURE); + } else { + *hdl = tmp_rsrc_hdl; + return (DDI_SUCCESS); + } +} + + +/* + * hermon_rsrc_reserve() + * + * Context: Can only be called from attach. + * The "sleepflag" parameter is used by all object allocators to + * determine whether to SLEEP for resources or not. + */ +int +hermon_rsrc_reserve(hermon_state_t *state, hermon_rsrc_type_t rsrc, uint_t num, + uint_t sleepflag, hermon_rsrc_t **hdl) +{ + hermon_rsrc_pool_info_t *rsrc_pool; + hermon_rsrc_t *tmp_rsrc_hdl; + int flag, status = DDI_FAILURE; + + ASSERT(state != NULL); + ASSERT(hdl != NULL); + + rsrc_pool = &state->hs_rsrc_hdl[rsrc]; + ASSERT(rsrc_pool != NULL); + + /* + * Allocate space for the object used to track the resource handle + */ + flag = (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP; + tmp_rsrc_hdl = kmem_cache_alloc(state->hs_rsrc_cache, flag); + if (tmp_rsrc_hdl == NULL) { + return (DDI_FAILURE); + } + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*tmp_rsrc_hdl)) + + /* + * Set rsrc_hdl type. This is later used by the hermon_rsrc_free call + * to know what type of resource is being freed. + */ + tmp_rsrc_hdl->rsrc_type = rsrc; + + switch (rsrc) { + case HERMON_QPC: + case HERMON_DMPT: + case HERMON_MTT: + /* + * Reserve num resources, naturally aligned (N * num). + */ + status = hermon_rsrc_hw_entry_reserve(rsrc_pool, num, num, + sleepflag, tmp_rsrc_hdl); + break; + + default: + HERMON_WARNING(state, "unexpected resource type in reserve "); + cmn_err(CE_WARN, "Resource type %x \n", rsrc); + break; + } + + /* + * If the resource allocation failed, then free the special resource + * tracking structure and return failure. Otherwise return the + * handle for the resource tracking structure. + */ + if (status != DDI_SUCCESS) { + kmem_cache_free(state->hs_rsrc_cache, tmp_rsrc_hdl); return (DDI_FAILURE); } else { *hdl = tmp_rsrc_hdl; @@ -317,6 +363,174 @@ /* + * hermon_rsrc_fexch_alloc() + * + * Context: Can only be called from base context. + * The "sleepflag" parameter is used by all object allocators to + * determine whether to SLEEP for resources or not. + */ +static int +hermon_rsrc_fexch_alloc(hermon_state_t *state, hermon_rsrc_type_t rsrc, + uint_t num, uint_t sleepflag, hermon_rsrc_t *hdl) +{ + hermon_fcoib_t *fcoib; + void *addr; + uint32_t fexch_qpn_base; + hermon_rsrc_pool_info_t *qpc_pool, *mpt_pool, *mtt_pool; + int flag, status; + hermon_rsrc_t mpt_hdl; /* temporary, just for icm_confirm */ + hermon_rsrc_t mtt_hdl; /* temporary, just for icm_confirm */ + uint_t portm1; /* hca_port_number - 1 */ + uint_t nummtt; + vmem_t *vmp; + + ASSERT(state != NULL); + ASSERT(hdl != NULL); + + if ((state->hs_ibtfinfo.hca_attr->hca_flags2 & IBT_HCA2_FC) == 0) + return (DDI_FAILURE); + + portm1 = rsrc - HERMON_QPC_FEXCH_PORT1; + fcoib = &state->hs_fcoib; + flag = (sleepflag == HERMON_SLEEP) ? VM_SLEEP : VM_NOSLEEP; + + /* Allocate from the FEXCH QP range */ + vmp = fcoib->hfc_fexch_vmemp[portm1]; + addr = vmem_xalloc(vmp, num, num, 0, 0, NULL, NULL, flag | VM_FIRSTFIT); + if (addr == NULL) { + return (DDI_FAILURE); + } + fexch_qpn_base = (uint32_t)((uintptr_t)addr - + fcoib->hfc_vmemstart + fcoib->hfc_fexch_base[portm1]); + + /* ICM confirm for the FEXCH QP range */ + qpc_pool = &state->hs_rsrc_hdl[HERMON_QPC]; + hdl->hr_len = num << qpc_pool->rsrc_shift; + hdl->hr_addr = addr; /* used only for vmem_xfree */ + hdl->hr_indx = fexch_qpn_base; + + status = hermon_rsrc_hw_entry_icm_confirm(qpc_pool, num, hdl, 1); + if (status != DDI_SUCCESS) { + vmem_xfree(vmp, addr, num); + return (DDI_FAILURE); + } + + /* ICM confirm for the Primary MKEYs (client side only) */ + mpt_pool = &state->hs_rsrc_hdl[HERMON_DMPT]; + mpt_hdl.hr_len = num << mpt_pool->rsrc_shift; + mpt_hdl.hr_addr = NULL; + mpt_hdl.hr_indx = fcoib->hfc_mpt_base[portm1] + + (fexch_qpn_base - fcoib->hfc_fexch_base[portm1]); + + status = hermon_rsrc_hw_entry_icm_confirm(mpt_pool, num, &mpt_hdl, 0); + if (status != DDI_SUCCESS) { + status = hermon_rsrc_hw_entry_icm_free(qpc_pool, hdl, 1); + vmem_xfree(vmp, addr, num); + return (DDI_FAILURE); + } + + /* ICM confirm for the MTTs of the Primary MKEYs (client side only) */ + nummtt = fcoib->hfc_mtts_per_mpt; + num *= nummtt; + mtt_pool = &state->hs_rsrc_hdl[HERMON_MTT]; + mtt_hdl.hr_len = num << mtt_pool->rsrc_shift; + mtt_hdl.hr_addr = NULL; + mtt_hdl.hr_indx = fcoib->hfc_mtt_base[portm1] + + (fexch_qpn_base - fcoib->hfc_fexch_base[portm1]) * + nummtt; + + status = hermon_rsrc_hw_entry_icm_confirm(mtt_pool, num, &mtt_hdl, 0); + if (status != DDI_SUCCESS) { + vmem_xfree(vmp, addr, num); + return (DDI_FAILURE); + } + return (DDI_SUCCESS); +} + +static void +hermon_rsrc_fexch_free(hermon_state_t *state, hermon_rsrc_t *hdl) +{ + hermon_fcoib_t *fcoib; + uint_t portm1; /* hca_port_number - 1 */ + + ASSERT(state != NULL); + ASSERT(hdl != NULL); + + portm1 = hdl->rsrc_type - HERMON_QPC_FEXCH_PORT1; + fcoib = &state->hs_fcoib; + vmem_xfree(fcoib->hfc_fexch_vmemp[portm1], hdl->hr_addr, + hdl->hr_len >> state->hs_rsrc_hdl[HERMON_QPC].rsrc_shift); +} + +/* + * hermon_rsrc_rfci_alloc() + * + * Context: Can only be called from base context. + * The "sleepflag" parameter is used by all object allocators to + * determine whether to SLEEP for resources or not. + */ +static int +hermon_rsrc_rfci_alloc(hermon_state_t *state, hermon_rsrc_type_t rsrc, + uint_t num, uint_t sleepflag, hermon_rsrc_t *hdl) +{ + hermon_fcoib_t *fcoib; + void *addr; + uint32_t rfci_qpn_base; + hermon_rsrc_pool_info_t *qpc_pool; + int flag, status; + uint_t portm1; /* hca_port_number - 1 */ + vmem_t *vmp; + + ASSERT(state != NULL); + ASSERT(hdl != NULL); + + if ((state->hs_ibtfinfo.hca_attr->hca_flags2 & IBT_HCA2_FC) == 0) + return (DDI_FAILURE); + + portm1 = rsrc - HERMON_QPC_RFCI_PORT1; + fcoib = &state->hs_fcoib; + flag = (sleepflag == HERMON_SLEEP) ? VM_SLEEP : VM_NOSLEEP; + + /* Allocate from the RFCI QP range */ + vmp = fcoib->hfc_rfci_vmemp[portm1]; + addr = vmem_xalloc(vmp, num, num, 0, 0, NULL, NULL, flag | VM_FIRSTFIT); + if (addr == NULL) { + return (DDI_FAILURE); + } + rfci_qpn_base = (uint32_t)((uintptr_t)addr - + fcoib->hfc_vmemstart + fcoib->hfc_rfci_base[portm1]); + + /* ICM confirm for the RFCI QP */ + qpc_pool = &state->hs_rsrc_hdl[HERMON_QPC]; + hdl->hr_len = num << qpc_pool->rsrc_shift; + hdl->hr_addr = addr; /* used only for vmem_xfree */ + hdl->hr_indx = rfci_qpn_base; + + status = hermon_rsrc_hw_entry_icm_confirm(qpc_pool, num, hdl, 1); + if (status != DDI_SUCCESS) { + vmem_xfree(vmp, addr, num); + return (DDI_FAILURE); + } + return (DDI_SUCCESS); +} + +static void +hermon_rsrc_rfci_free(hermon_state_t *state, hermon_rsrc_t *hdl) +{ + hermon_fcoib_t *fcoib; + uint_t portm1; /* hca_port_number - 1 */ + + ASSERT(state != NULL); + ASSERT(hdl != NULL); + + portm1 = hdl->rsrc_type - HERMON_QPC_RFCI_PORT1; + fcoib = &state->hs_fcoib; + vmem_xfree(fcoib->hfc_rfci_vmemp[portm1], hdl->hr_addr, + hdl->hr_len >> state->hs_rsrc_hdl[HERMON_QPC].rsrc_shift); +} + + +/* * hermon_rsrc_free() * Context: Can be called from interrupt or base context. */ @@ -339,7 +553,17 @@ case HERMON_OUT_MBOX: case HERMON_INTR_IN_MBOX: case HERMON_INTR_OUT_MBOX: - hermon_rsrc_mbox_free(rsrc_pool, *hdl); + hermon_rsrc_mbox_free(*hdl); + break; + + case HERMON_QPC_FEXCH_PORT1: + case HERMON_QPC_FEXCH_PORT2: + hermon_rsrc_fexch_free(state, *hdl); + break; + + case HERMON_QPC_RFCI_PORT1: + case HERMON_QPC_RFCI_PORT2: + hermon_rsrc_rfci_free(state, *hdl); break; case HERMON_QPC: @@ -375,7 +599,7 @@ case HERMON_CMPT_CQC: case HERMON_CMPT_EQC: default: - HERMON_WARNING(state, "unexpected resource type in free"); + cmn_err(CE_CONT, "!rsrc_type = 0x%x\n", rsrc_pool->rsrc_type); break; } @@ -422,7 +646,7 @@ cleanup = HERMON_RSRC_CLEANUP_LEVEL0; /* Build kmem cache name from Hermon instance */ - rsrc_name = (char *)kmem_zalloc(HERMON_RSRC_NAME_MAXLEN, KM_SLEEP); + rsrc_name = kmem_zalloc(HERMON_RSRC_NAME_MAXLEN, KM_SLEEP); HERMON_RSRC_NAME(rsrc_name, HERMON_RSRC_CACHE); /* @@ -446,7 +670,6 @@ num = ((uint64_t)1 << cfgprof->cp_log_num_outmbox); size = ((uint64_t)1 << cfgprof->cp_log_outmbox_size); rsrc_pool = &state->hs_rsrc_hdl[HERMON_OUT_MBOX]; - rsrc_pool->rsrc_type = HERMON_OUT_MBOX; rsrc_pool->rsrc_loc = HERMON_IN_SYSMEM; rsrc_pool->rsrc_pool_size = (size * num); rsrc_pool->rsrc_shift = cfgprof->cp_log_outmbox_size; @@ -477,7 +700,6 @@ num = ((uint64_t)1 << cfgprof->cp_log_num_intr_outmbox); size = ((uint64_t)1 << cfgprof->cp_log_outmbox_size); rsrc_pool = &state->hs_rsrc_hdl[HERMON_INTR_OUT_MBOX]; - rsrc_pool->rsrc_type = HERMON_INTR_OUT_MBOX; rsrc_pool->rsrc_loc = HERMON_IN_SYSMEM; rsrc_pool->rsrc_pool_size = (size * num); rsrc_pool->rsrc_shift = cfgprof->cp_log_outmbox_size; @@ -508,7 +730,6 @@ num = ((uint64_t)1 << cfgprof->cp_log_num_inmbox); size = ((uint64_t)1 << cfgprof->cp_log_inmbox_size); rsrc_pool = &state->hs_rsrc_hdl[HERMON_IN_MBOX]; - rsrc_pool->rsrc_type = HERMON_IN_MBOX; rsrc_pool->rsrc_loc = HERMON_IN_SYSMEM; rsrc_pool->rsrc_pool_size = (size * num); rsrc_pool->rsrc_shift = cfgprof->cp_log_inmbox_size; @@ -539,7 +760,6 @@ num = ((uint64_t)1 << cfgprof->cp_log_num_intr_inmbox); size = ((uint64_t)1 << cfgprof->cp_log_inmbox_size); rsrc_pool = &state->hs_rsrc_hdl[HERMON_INTR_IN_MBOX]; - rsrc_pool->rsrc_type = HERMON_INTR_IN_MBOX; rsrc_pool->rsrc_loc = HERMON_IN_SYSMEM; rsrc_pool->rsrc_pool_size = (size * num); rsrc_pool->rsrc_shift = cfgprof->cp_log_inmbox_size; @@ -600,7 +820,7 @@ /* Allocate the ICM resource name space */ /* Build the ICM vmem arena names from Hermon instance */ - rsrc_name = (char *)kmem_zalloc(HERMON_RSRC_NAME_MAXLEN, KM_SLEEP); + rsrc_name = kmem_zalloc(HERMON_RSRC_NAME_MAXLEN, KM_SLEEP); /* * Initialize the resource pools for all objects that exist in @@ -628,6 +848,7 @@ rsrc_pool = &state->hs_rsrc_hdl[i]; rsrc_pool->rsrc_type = i; + rsrc_pool->rsrc_state = state; /* Set the resource-specific attributes */ switch (i) { @@ -668,7 +889,7 @@ case HERMON_EQC: max = ((uint64_t)1 << devlim->log_max_eq); - num_prealloc = devlim->num_rsvd_eq; + num_prealloc = state->hs_rsvd_eqs; HERMON_RSRC_NAME(rsrc_name, HERMON_EQC_VMEM); ncleanup = HERMON_RSRC_CLEANUP_LEVEL18; break; @@ -732,7 +953,6 @@ max = ((uint64_t)1 << devlim->log_max_mcg); num_prealloc = ((uint64_t)1 << cfgprof->cp_log_num_mcg_hash); rsrc_pool = &state->hs_rsrc_hdl[HERMON_MCG]; - rsrc_pool->rsrc_type = HERMON_MCG; rsrc_pool->rsrc_loc = HERMON_IN_ICM; rsrc_pool->rsrc_pool_size = (mcg_size * num); rsrc_pool->rsrc_shift = mcg_size_shift; @@ -773,7 +993,6 @@ /* Initialize the resource pool and vmem arena for the PD handles */ rsrc_pool = &state->hs_rsrc_hdl[HERMON_PDHDL]; - rsrc_pool->rsrc_type = HERMON_PDHDL; rsrc_pool->rsrc_loc = HERMON_IN_SYSMEM; rsrc_pool->rsrc_quantum = sizeof (struct hermon_sw_pd_s); rsrc_pool->rsrc_state = state; @@ -801,11 +1020,11 @@ */ for (i = HERMON_NUM_ICM_RESOURCES; i < HERMON_NUM_RESOURCES; i++) { rsrc_pool = &state->hs_rsrc_hdl[i]; + rsrc_pool->rsrc_type = i; /* Set the resource-specific attributes */ switch (i) { case HERMON_MRHDL: - rsrc_pool->rsrc_type = HERMON_MRHDL; rsrc_pool->rsrc_quantum = sizeof (struct hermon_sw_mr_s); HERMON_RSRC_NAME(rsrc_name, HERMON_MRHDL_CACHE); @@ -823,7 +1042,6 @@ break; case HERMON_EQHDL: - rsrc_pool->rsrc_type = HERMON_EQHDL; rsrc_pool->rsrc_quantum = sizeof (struct hermon_sw_eq_s); HERMON_RSRC_NAME(rsrc_name, HERMON_EQHDL_CACHE); @@ -836,7 +1054,6 @@ break; case HERMON_CQHDL: - rsrc_pool->rsrc_type = HERMON_CQHDL; rsrc_pool->rsrc_quantum = sizeof (struct hermon_sw_cq_s); HERMON_RSRC_NAME(rsrc_name, HERMON_CQHDL_CACHE); @@ -846,14 +1063,12 @@ hdl_info.swi_constructor = hermon_rsrc_cqhdl_constructor; hdl_info.swi_destructor = hermon_rsrc_cqhdl_destructor; - hdl_info.swi_flags = (HERMON_SWHDL_KMEMCACHE_INIT | - HERMON_SWHDL_TABLE_INIT); + hdl_info.swi_flags = HERMON_SWHDL_KMEMCACHE_INIT; hdl_info.swi_prealloc_sz = sizeof (hermon_cqhdl_t); ncleanup = HERMON_RSRC_CLEANUP_LEVEL24; break; case HERMON_SRQHDL: - rsrc_pool->rsrc_type = HERMON_SRQHDL; rsrc_pool->rsrc_quantum = sizeof (struct hermon_sw_srq_s); HERMON_RSRC_NAME(rsrc_name, HERMON_SRQHDL_CACHE); @@ -863,14 +1078,12 @@ hdl_info.swi_constructor = hermon_rsrc_srqhdl_constructor; hdl_info.swi_destructor = hermon_rsrc_srqhdl_destructor; - hdl_info.swi_flags = (HERMON_SWHDL_KMEMCACHE_INIT | - HERMON_SWHDL_TABLE_INIT); + hdl_info.swi_flags = HERMON_SWHDL_KMEMCACHE_INIT; hdl_info.swi_prealloc_sz = sizeof (hermon_srqhdl_t); ncleanup = HERMON_RSRC_CLEANUP_LEVEL25; break; case HERMON_AHHDL: - rsrc_pool->rsrc_type = HERMON_AHHDL; rsrc_pool->rsrc_quantum = sizeof (struct hermon_sw_ah_s); HERMON_RSRC_NAME(rsrc_name, HERMON_AHHDL_CACHE); @@ -885,7 +1098,6 @@ break; case HERMON_QPHDL: - rsrc_pool->rsrc_type = HERMON_QPHDL; rsrc_pool->rsrc_quantum = sizeof (struct hermon_sw_qp_s); HERMON_RSRC_NAME(rsrc_name, HERMON_QPHDL_CACHE); @@ -895,14 +1107,12 @@ hdl_info.swi_constructor = hermon_rsrc_qphdl_constructor; hdl_info.swi_destructor = hermon_rsrc_qphdl_destructor; - hdl_info.swi_flags = (HERMON_SWHDL_KMEMCACHE_INIT | - HERMON_SWHDL_TABLE_INIT); + hdl_info.swi_flags = HERMON_SWHDL_KMEMCACHE_INIT; hdl_info.swi_prealloc_sz = sizeof (hermon_qphdl_t); ncleanup = HERMON_RSRC_CLEANUP_LEVEL27; break; case HERMON_REFCNT: - rsrc_pool->rsrc_type = HERMON_REFCNT; rsrc_pool->rsrc_quantum = sizeof (hermon_sw_refcnt_t); HERMON_RSRC_NAME(rsrc_name, HERMON_REFCNT_CACHE); hdl_info.swi_num = @@ -931,25 +1141,6 @@ goto rsrcinitp2_fail; } cleanup = ncleanup; - - /* - * For table entries, save away a pointer to the central list - * of handle pointers. These are used to enable fast lookup - * of the resources during event processing. - */ - switch (i) { - case HERMON_CQHDL: - state->hs_cqhdl = hdl_info.swi_table_ptr; - break; - case HERMON_QPHDL: - state->hs_qphdl = hdl_info.swi_table_ptr; - break; - case HERMON_SRQHDL: - state->hs_srqhdl = hdl_info.swi_table_ptr; - break; - default: - break; - } } /* @@ -985,7 +1176,6 @@ max = num; num_prealloc = max(devlim->num_rsvd_uar, 128); rsrc_pool = &state->hs_rsrc_hdl[HERMON_UARPG]; - rsrc_pool->rsrc_type = HERMON_UARPG; rsrc_pool->rsrc_loc = HERMON_IN_UAR; rsrc_pool->rsrc_pool_size = (num << PAGESHIFT); rsrc_pool->rsrc_shift = PAGESHIFT; @@ -1079,7 +1269,7 @@ case HERMON_RSRC_CLEANUP_LEVEL28: /* Cleanup the QP handle resource pool */ hdl_info.swi_rsrcpool = &state->hs_rsrc_hdl[HERMON_QPHDL]; - hdl_info.swi_table_ptr = state->hs_qphdl; + hdl_info.swi_table_ptr = NULL; hdl_info.swi_num = ((uint64_t)1 << cfgprof->cp_log_num_qp); hdl_info.swi_prealloc_sz = sizeof (hermon_qphdl_t); hermon_rsrc_sw_handles_fini(state, &hdl_info); @@ -1094,7 +1284,7 @@ case HERMON_RSRC_CLEANUP_LEVEL26: /* Cleanup the SRQ handle resource pool. */ hdl_info.swi_rsrcpool = &state->hs_rsrc_hdl[HERMON_SRQHDL]; - hdl_info.swi_table_ptr = state->hs_srqhdl; + hdl_info.swi_table_ptr = NULL; hdl_info.swi_num = ((uint64_t)1 << cfgprof->cp_log_num_srq); hdl_info.swi_prealloc_sz = sizeof (hermon_srqhdl_t); hermon_rsrc_sw_handles_fini(state, &hdl_info); @@ -1103,7 +1293,7 @@ case HERMON_RSRC_CLEANUP_LEVEL25: /* Cleanup the CQ handle resource pool */ hdl_info.swi_rsrcpool = &state->hs_rsrc_hdl[HERMON_CQHDL]; - hdl_info.swi_table_ptr = state->hs_cqhdl; + hdl_info.swi_table_ptr = NULL; hdl_info.swi_num = ((uint64_t)1 << cfgprof->cp_log_num_cq); hdl_info.swi_prealloc_sz = sizeof (hermon_cqhdl_t); hermon_rsrc_sw_handles_fini(state, &hdl_info); @@ -1662,7 +1852,7 @@ } /* Allocate memory for the mailbox */ - temp_len = (num * pool_info->rsrc_quantum); + temp_len = (num << pool_info->rsrc_shift); status = ddi_dma_mem_alloc(hdl->hr_dmahdl, temp_len, &priv->pmb_devaccattr, priv->pmb_xfer_mode, DDI_DMA_SLEEP, NULL, &kaddr, &real_len, &hdl->hr_acchdl); @@ -1684,9 +1874,8 @@ * Context: Can be called from interrupt or base context. */ static void -hermon_rsrc_mbox_free(hermon_rsrc_pool_info_t *pool_info, hermon_rsrc_t *hdl) +hermon_rsrc_mbox_free(hermon_rsrc_t *hdl) { - ASSERT(pool_info != NULL); ASSERT(hdl != NULL); /* Use ddi_dma_mem_free() to free up sys memory for mailbox */ @@ -1703,8 +1892,7 @@ */ static int hermon_rsrc_hw_entry_alloc(hermon_rsrc_pool_info_t *pool_info, uint_t num, - uint_t num_align, ddi_acc_handle_t acc_handle, uint_t sleepflag, - hermon_rsrc_t *hdl) + uint_t num_align, uint_t sleepflag, hermon_rsrc_t *hdl) { void *addr; uint64_t offset; @@ -1716,11 +1904,66 @@ ASSERT(hdl != NULL); /* - * Hermon hardware entries (QPC, CQC, EQC, MPT, etc.) do not - * generally use the acc_handle (because the entries are not - * directly accessed by software). The exception to this rule - * are the MTT entries. + * Use vmem_xalloc() to get a properly aligned pointer (based on + * the number requested) to the HW entry(ies). This handles the + * cases (for special QPCs and for RDB entries) where we need more + * than one and need to ensure that they are properly aligned. */ + flag = (sleepflag == HERMON_SLEEP) ? VM_SLEEP : VM_NOSLEEP; + hdl->hr_len = (num << pool_info->rsrc_shift); + align = (num_align << pool_info->rsrc_shift); + + addr = vmem_xalloc(pool_info->rsrc_vmp, hdl->hr_len, + align, 0, 0, NULL, NULL, flag | VM_FIRSTFIT); + + if (addr == NULL) { + /* No more HW entries available */ + return (DDI_FAILURE); + } + + hdl->hr_acchdl = NULL; /* only used for mbox resources */ + + /* Calculate vaddr and HW table index */ + offset = (uintptr_t)addr - (uintptr_t)pool_info->rsrc_start; + hdl->hr_addr = addr; /* only used for mbox and uarpg resources */ + hdl->hr_indx = offset >> pool_info->rsrc_shift; + + if (pool_info->rsrc_loc == HERMON_IN_ICM) { + int num_to_hdl; + hermon_rsrc_type_t rsrc_type = pool_info->rsrc_type; + + num_to_hdl = (rsrc_type == HERMON_QPC || + rsrc_type == HERMON_CQC || rsrc_type == HERMON_SRQC); + + /* confirm ICM is mapped, and allocate if necessary */ + status = hermon_rsrc_hw_entry_icm_confirm(pool_info, num, hdl, + num_to_hdl); + if (status != DDI_SUCCESS) { + return (DDI_FAILURE); + } + hdl->hr_addr = NULL; /* not used for ICM resources */ + } + + return (DDI_SUCCESS); +} + + +/* + * hermon_rsrc_hw_entry_reserve() + * Context: Can be called from interrupt or base context. + */ +int +hermon_rsrc_hw_entry_reserve(hermon_rsrc_pool_info_t *pool_info, uint_t num, + uint_t num_align, uint_t sleepflag, hermon_rsrc_t *hdl) +{ + void *addr; + uint64_t offset; + uint32_t align; + int flag; + + ASSERT(pool_info != NULL); + ASSERT(hdl != NULL); + ASSERT(pool_info->rsrc_loc == HERMON_IN_ICM); /* * Use vmem_xalloc() to get a properly aligned pointer (based on @@ -1729,8 +1972,8 @@ * than one and need to ensure that they are properly aligned. */ flag = (sleepflag == HERMON_SLEEP) ? VM_SLEEP : VM_NOSLEEP; - hdl->hr_len = (num * pool_info->rsrc_quantum); - align = (num_align * pool_info->rsrc_quantum); + hdl->hr_len = (num << pool_info->rsrc_shift); + align = (num_align << pool_info->rsrc_shift); addr = vmem_xalloc(pool_info->rsrc_vmp, hdl->hr_len, align, 0, 0, NULL, NULL, flag | VM_FIRSTFIT); @@ -1740,27 +1983,19 @@ return (DDI_FAILURE); } - hdl->hr_acchdl = acc_handle; + hdl->hr_acchdl = NULL; /* only used for mbox resources */ /* Calculate vaddr and HW table index */ offset = (uintptr_t)addr - (uintptr_t)pool_info->rsrc_start; - hdl->hr_addr = addr; + hdl->hr_addr = NULL; hdl->hr_indx = offset >> pool_info->rsrc_shift; - if (pool_info->rsrc_loc == HERMON_IN_ICM) { - /* confirm ICM is mapped, and allocate if necessary */ - status = hermon_rsrc_hw_entry_icm_confirm(pool_info, num, hdl); - if (status != DDI_SUCCESS) { - return (DDI_FAILURE); - } - hdl->hr_addr = NULL; - } + /* ICM will be allocated and mapped if and when it gets used */ return (DDI_SUCCESS); } - /* * hermon_rsrc_hw_entry_free() * Context: Can be called from interrupt or base context. @@ -1784,8 +2019,15 @@ vmem_xfree(pool_info->rsrc_vmp, addr, hdl->hr_len); if (pool_info->rsrc_loc == HERMON_IN_ICM) { + int num_to_hdl; + hermon_rsrc_type_t rsrc_type = pool_info->rsrc_type; + + num_to_hdl = (rsrc_type == HERMON_QPC || + rsrc_type == HERMON_CQC || rsrc_type == HERMON_SRQC); + /* free ICM references, and free ICM if required */ - status = hermon_rsrc_hw_entry_icm_free(pool_info, hdl); + status = hermon_rsrc_hw_entry_icm_free(pool_info, hdl, + num_to_hdl); if (status != DDI_SUCCESS) HERMON_WARNING(pool_info->rsrc_state, "failure in hw_entry_free"); @@ -1798,7 +2040,7 @@ */ static int hermon_rsrc_hw_entry_icm_confirm(hermon_rsrc_pool_info_t *pool_info, uint_t num, - hermon_rsrc_t *hdl) + hermon_rsrc_t *hdl, int num_to_hdl) { hermon_state_t *state; hermon_icm_table_t *icm_table; @@ -1838,7 +2080,7 @@ } mutex_enter(&icm_table->icm_table_lock); - hermon_bitmap(bitmap, dma_info, icm_table, index1); + hermon_bitmap(bitmap, dma_info, icm_table, index1, num_to_hdl); while (num) { #ifndef __lock_lint while (icm_table->icm_busy) { @@ -1866,13 +2108,6 @@ } /* - * Mellanox FMR accesses the MPT directly. We set the - * access handle here only for this case - */ - if (type == HERMON_DMPT) - hdl->hr_acchdl = dma_info[index2].acc_hdl; - - /* * We need to increment the refcnt of this span by the * number of objects in this resource allocation that are * backed by this span. Given that the rsrc allocation is @@ -1911,7 +2146,7 @@ break; hermon_index(index1, index2, rindx, icm_table, span_offset); - hermon_bitmap(bitmap, dma_info, icm_table, index1); + hermon_bitmap(bitmap, dma_info, icm_table, index1, num_to_hdl); } mutex_exit(&icm_table->icm_table_lock); @@ -1960,7 +2195,7 @@ */ static int hermon_rsrc_hw_entry_icm_free(hermon_rsrc_pool_info_t *pool_info, - hermon_rsrc_t *hdl) + hermon_rsrc_t *hdl, int num_to_hdl) { hermon_state_t *state; hermon_icm_table_t *icm_table; @@ -1989,7 +2224,7 @@ rindx = hdl->hr_indx; hermon_index(index1, index2, rindx, icm_table, span_offset); - hermon_bitmap(bitmap, dma_info, icm_table, index1); + hermon_bitmap(bitmap, dma_info, icm_table, index1, num_to_hdl); /* determine the number of ICM objects in this allocation */ num = hdl->hr_len >> pool_info->rsrc_shift; @@ -2051,7 +2286,7 @@ break; hermon_index(index1, index2, rindx, icm_table, span_offset); - hermon_bitmap(bitmap, dma_info, icm_table, index1); + hermon_bitmap(bitmap, dma_info, icm_table, index1, num_to_hdl); } mutex_exit(&icm_table->icm_table_lock);
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_srq.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_srq.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -379,8 +378,7 @@ * Put SRQ handle in Hermon SRQNum-to-SRQhdl list. Then fill in the * "srqhdl" and return success */ - ASSERT(state->hs_srqhdl[srqc->hr_indx] == NULL); - state->hs_srqhdl[srqc->hr_indx] = srq; + hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, srq); /* * If this is a user-mappable SRQ, then we need to insert the @@ -518,7 +516,7 @@ * in-progress events to detect that the SRQ corresponding to this * number has been freed. */ - state->hs_srqhdl[srqc->hr_indx] = NULL; + hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, NULL); mutex_exit(&srq->srq_lock); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)); @@ -757,10 +755,6 @@ srq_old_bufsz = srq->srq_wq_bufsz; bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz); - /* Sync entire "new" SRQ for use by hardware (if necessary) */ - (void) ddi_dma_sync(bind.bi_dmahdl, 0, new_srqinfo.qa_size, - DDI_DMA_SYNC_FORDEV); - /* * Setup MPT information for use in the MODIFY_MPT command */ @@ -957,7 +951,7 @@ /* Calculate the SRQ table index from the srqnum */ srqmask = (1 << state->hs_cfg_profile->cp_log_num_srq) - 1; srqindx = srqnum & srqmask; - return (state->hs_srqhdl[srqindx]); + return (hermon_icm_num_to_hdl(state, HERMON_SRQC, srqindx)); }
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_umap.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_umap.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -1656,7 +1655,7 @@ * then return invalid RecvQ parameters. Otherwise, return * the proper parameter values. */ - if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) { + if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { data->mqp_rq_off = (uint32_t)qp->qp_wqinfo.qa_size; data->mqp_rq_desc_addr = (uint32_t)qp->qp_wqinfo.qa_size; data->mqp_rq_numwqe = 0;
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_wr.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_wr.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -53,8 +52,6 @@ ibt_recv_wr_t *wr, uint64_t *desc); static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, ibt_recv_wr_t *wr, uint64_t *desc); -static void hermon_wqe_sync(void *hdl, uint_t sync_from, - uint_t sync_to, uint_t sync_type, uint_t flag); static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t send_or_recv); static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl); @@ -79,6 +76,8 @@ hermon_hw_snd_wqe_ud_t *ud; hermon_workq_hdr_t *wq; hermon_ahhdl_t ah; + ibt_wr_rfci_send_t *rfci; + ibt_wr_init_send_t *is; ibt_ud_dest_t *dest; uint64_t *desc; uint32_t desc_sz; @@ -88,11 +87,13 @@ uint32_t nopcode, fence, immed_data = 0; hermon_hw_wqe_sgl_t *ds, *old_ds; ibt_wr_ds_t *sgl; - uint32_t nds, dnds; + int nds; int i, j, last_ds, num_ds, status; uint32_t *wqe_start; int sectperwqe; uint_t posted_cnt = 0; + int total_len, strong_order, fc_bits, cksum; + /* initialize the FMA retry loop */ hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); @@ -132,27 +133,25 @@ desc = HERMON_QP_SQ_ENTRY(qp, tail); - ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + - sizeof (hermon_hw_snd_wqe_ctrl_t)); - ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + - sizeof (hermon_hw_snd_wqe_ud_t)); nds = wr->wr_nds; sgl = wr->wr_sgl; num_ds = 0; - - /* need to know the count of destination nds for backward loop */ - for (dnds = 0, i = 0; i < nds; i++) { - if (sgl[i].ds_len != 0) - dnds++; - } + strong_order = 0; + fc_bits = 0; + cksum = 0; /* * Build a Send or Send_LSO WQE */ - if (wr->wr_opcode == IBT_WRC_SEND_LSO) { - int total_len; - + switch (wr->wr_opcode) { + case IBT_WRC_SEND_LSO: + if (wr->wr_trans != IBT_UD_SRV) { + status = IBT_QP_SRV_TYPE_INVALID; + goto done; + } nopcode = HERMON_WQE_SEND_NOPCODE_LSO; + if (wr->wr_flags & IBT_WR_SEND_CKSUM) + cksum = 0x30; if (wr->wr.ud_lso.lso_hdr_sz > 60) { nopcode |= (1 << 6); /* ReRead bit must be set */ } @@ -162,6 +161,10 @@ status = IBT_AH_HDL_INVALID; goto done; } + ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + + sizeof (hermon_hw_snd_wqe_ctrl_t)); + ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + + sizeof (hermon_hw_snd_wqe_ud_t)); HERMON_WQE_BUILD_UD(qp, ud, ah, dest); total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; @@ -175,22 +178,128 @@ wr->wr.ud_lso.lso_hdr_sz); ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); i = 0; - } else if (wr->wr_opcode == IBT_WRC_SEND) { + break; + + case IBT_WRC_SEND: + nopcode = HERMON_WQE_SEND_NOPCODE_SEND; + if (qp->qp_serv_type == HERMON_QP_UD) { + if (wr->wr_trans != IBT_UD_SRV) { + status = IBT_QP_SRV_TYPE_INVALID; + goto done; + } + if (wr->wr_flags & IBT_WR_SEND_CKSUM) + cksum = 0x30; + dest = wr->wr.ud.udwr_dest; + } else if (qp->qp_serv_type == HERMON_QP_RFCI) { + if (wr->wr_trans != IBT_RFCI_SRV) { + status = IBT_QP_SRV_TYPE_INVALID; + goto done; + } + rfci = &wr->wr.fc.rfci_send; + if ((wr->wr_flags & IBT_WR_SEND_FC_CRC) != 0) { + nopcode |= (rfci->rfci_eof << 16); + fc_bits = 0x40; /* set FCRC */ + } + dest = rfci->rfci_dest; + } else { + status = IBT_QP_OP_TYPE_INVALID; + goto done; + } if (wr->wr_flags & IBT_WR_SEND_IMMED) { - nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; + /* "|=" changes 0xa to 0xb without touching FCEOF */ + nopcode |= HERMON_WQE_SEND_NOPCODE_SENDI; immed_data = wr->wr.ud.udwr_immed; - } else { - nopcode = HERMON_WQE_SEND_NOPCODE_SEND; } - dest = wr->wr.ud.udwr_dest; ah = (hermon_ahhdl_t)dest->ud_ah; if (ah == NULL) { status = IBT_AH_HDL_INVALID; goto done; } + ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + + sizeof (hermon_hw_snd_wqe_ctrl_t)); + ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + + sizeof (hermon_hw_snd_wqe_ud_t)); HERMON_WQE_BUILD_UD(qp, ud, ah, dest); i = 0; - } else { + break; + + case IBT_WRC_INIT_SEND_FCMD: + if (qp->qp_serv_type != HERMON_QP_FCMND) { + status = IBT_QP_OP_TYPE_INVALID; + goto done; + } + if (wr->wr_trans != IBT_FCMD_SRV) { + status = IBT_QP_SRV_TYPE_INVALID; + goto done; + } + nopcode = HERMON_WQE_FCP_OPCODE_INIT_AND_SEND; + is = wr->wr.fc.fc_is; + dest = is->is_ctl.fc_dest; + ah = (hermon_ahhdl_t)dest->ud_ah; + if (ah == NULL) { + status = IBT_AH_HDL_INVALID; + goto done; + } + ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + + sizeof (hermon_hw_snd_wqe_ctrl_t)); + ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + + sizeof (hermon_hw_snd_wqe_ud_t)); + HERMON_WQE_BUILD_UD(qp, ud, ah, dest); + old_ds = ds; + /* move ds beyond the FCP-3 Init Segment */ + ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + 0x10); + i = 0; + break; + + case IBT_WRC_FAST_REG_PMR: + { + hermon_hw_snd_wqe_frwr_t *frwr; + + if (qp->qp_serv_type != HERMON_QP_FCMND) { + status = IBT_QP_OP_TYPE_INVALID; + goto done; + } + if (wr->wr_trans != IBT_FCMD_SRV) { + status = IBT_QP_SRV_TYPE_INVALID; + goto done; + } + nopcode = HERMON_WQE_SEND_NOPCODE_FRWR; + frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc + + sizeof (hermon_hw_snd_wqe_ctrl_t)); + HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.fc.reg_pmr); + ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr + + sizeof (hermon_hw_snd_wqe_frwr_t)); + nds = 0; + strong_order = 0x80; + break; + } + +#if 0 + /* firmware does not support this */ + case IBT_WRC_LOCAL_INVALIDATE: + { + hermon_hw_snd_wqe_local_inv_t *li; + + if (qp->qp_serv_type != HERMON_QP_FCMND) { + status = IBT_QP_OP_TYPE_INVALID; + goto done; + } + if (wr->wr_trans != IBT_FCMD_SRV) { + status = IBT_QP_SRV_TYPE_INVALID; + goto done; + } + nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV; + li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc + + sizeof (hermon_hw_snd_wqe_ctrl_t)); + HERMON_WQE_BUILD_LI(qp, li, wr->wr.fc.li); + ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li + + sizeof (hermon_hw_snd_wqe_local_inv_t)); + nds = 0; + strong_order = 0x80; + break; + } +#endif + default: status = IBT_QP_OP_TYPE_INVALID; goto done; } @@ -223,17 +332,33 @@ if (wr->wr_opcode == IBT_WRC_SEND_LSO) { HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, wr->wr.ud_lso.lso_hdr_sz); + } else if (wr->wr_opcode == IBT_WRC_INIT_SEND_FCMD) { + /* This sits in the STAMP, so must be set after setting SGL */ + HERMON_WQE_BUILD_FCP3_INIT(old_ds, is->is_ctl.fc_frame_ctrl, + is->is_cs_priority, is->is_tx_seq_id, is->is_fc_mtu, + is->is_dest_id, is->is_op, is->is_rem_exch, + is->is_exch_qp_idx); + + /* The following will be used in HERMON_WQE_SET_CTRL_SEGMENT */ + /* SIT bit in FCP-3 ctrl segment */ + desc_sz |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_SIT) ? 0x80 : 0; + /* LS bit in FCP-3 ctrl segment */ + fc_bits |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_LAST_SEQ) ? + 0x10000 : 0; + fc_bits |= ((is->is_ctl.fc_routing_ctrl & 0xF) << 20) | + (is->is_ctl.fc_seq_id << 24); + immed_data = is->is_ctl.fc_parameter; } fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || - (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; + (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0; - solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; + solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0; HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, - solicited, signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); + solicited, signaled_dbd, cksum, qp, strong_order, fc_bits); wq->wq_wrid[tail] = wr->wr_id; @@ -312,13 +437,16 @@ hermon_hw_snd_wqe_remaddr_t *rc; hermon_hw_snd_wqe_atomic_t *at; hermon_hw_snd_wqe_bind_t *bn; + hermon_hw_snd_wqe_frwr_t *frwr; + hermon_hw_snd_wqe_local_inv_t *li; hermon_hw_wqe_sgl_t *ds; ibt_wr_ds_t *sgl; - uint32_t nds; + int nds; int i, last_ds, num_ds; uint32_t *wqe_start; int sectperwqe; uint_t posted_cnt = 0; + int strong_order; int print_rdma; int rlen; uint32_t rkey; @@ -343,6 +471,7 @@ post_next: print_rdma = 0; rlen = 0; + strong_order = 0; /* * Check for "queue full" condition. If the queue @@ -366,6 +495,10 @@ nds = wr->wr_nds; sgl = wr->wr_sgl; num_ds = 0; + if (wr->wr_trans != IBT_RC_SRV) { + status = IBT_QP_SRV_TYPE_INVALID; + goto done; + } /* * Validate the operation type. For RC requests, we allow @@ -378,7 +511,10 @@ goto done; case IBT_WRC_SEND: - if (wr->wr_flags & IBT_WR_SEND_IMMED) { + if (wr->wr_flags & IBT_WR_SEND_REMOTE_INVAL) { + nopcode = HERMON_WQE_SEND_NOPCODE_SND_INV; + immed_data = wr->wr.rc.rcwr.send_inval; + } else if (wr->wr_flags & IBT_WR_SEND_IMMED) { nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; immed_data = wr->wr.rc.rcwr.send_immed; } else { @@ -488,6 +624,29 @@ ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + sizeof (hermon_hw_snd_wqe_bind_t)); nds = 0; + break; + + case IBT_WRC_FAST_REG_PMR: + nopcode = HERMON_WQE_SEND_NOPCODE_FRWR; + frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc + + sizeof (hermon_hw_snd_wqe_ctrl_t)); + HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.rc.rcwr.reg_pmr); + ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr + + sizeof (hermon_hw_snd_wqe_frwr_t)); + nds = 0; + strong_order = 0x80; + break; + + case IBT_WRC_LOCAL_INVALIDATE: + nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV; + li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc + + sizeof (hermon_hw_snd_wqe_ctrl_t)); + HERMON_WQE_BUILD_LI(qp, li, wr->wr.rc.rcwr.li); + ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li + + sizeof (hermon_hw_snd_wqe_local_inv_t)); + nds = 0; + strong_order = 0x80; + break; } /* @@ -522,6 +681,12 @@ last_ds--; HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]); } + /* ensure RDMA READ does not exceed HCA limit */ + if ((wr->wr_opcode == IBT_WRC_RDMAR) && (desc_sz > + state->hs_ibtfinfo.hca_attr->hca_conn_rdma_read_sgl_sz + 2)) { + status = IBT_QP_SGL_LEN_INVALID; + goto done; + } if (print_rdma & 0x1) { IBTF_DPRINTF_L2("rdma", "post: indx %x rkey %x raddr %llx " @@ -531,12 +696,12 @@ fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || - (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; + (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0; - solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; + solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0; HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited, - signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); + signaled_dbd, 0, qp, strong_order, 0); wq->wq_wrid[tail] = wr->wr_id; @@ -621,7 +786,6 @@ uint32_t desc_sz; uint32_t signaled_dbd, solicited; uint32_t head, tail, next_tail, qsize_msk; - uint32_t sync_from, sync_to; uint32_t hdrmwqes; uint_t currindx, wrindx, numremain; uint_t chainlen; @@ -630,6 +794,7 @@ int status; uint32_t nopcode, fence, immed_data = 0; uint32_t prev_nopcode; + uint_t qp_state; /* initialize the FMA retry loop */ hermon_pio_init(fm_loop_cnt, fm_status, fm_test); @@ -639,42 +804,33 @@ * clients to post to QP memory that is accessible directly by the * user. If the QP memory is user accessible, then return an error. */ - if (qp->qp_is_umap) { + if (qp->qp_alloc_flags & IBT_QP_USER_MAP) { return (IBT_QP_HDL_INVALID); } - mutex_enter(&qp->qp_lock); + mutex_enter(&qp->qp_sq_lock); /* * Check QP state. Can not post Send requests from the "Reset", * "Init", or "RTR" states */ - if ((qp->qp_state == HERMON_QP_RESET) || - (qp->qp_state == HERMON_QP_INIT) || - (qp->qp_state == HERMON_QP_RTR)) { - mutex_exit(&qp->qp_lock); + qp_state = qp->qp_state_for_post_send; + if ((qp_state == HERMON_QP_RESET) || + (qp_state == HERMON_QP_INIT) || + (qp_state == HERMON_QP_RTR)) { + mutex_exit(&qp->qp_sq_lock); return (IBT_QP_STATE_INVALID); } - mutex_exit(&qp->qp_lock); - mutex_enter(&qp->qp_sq_lock); if (qp->qp_is_special) goto post_many; /* Use these optimized functions most of the time */ - if (qp->qp_serv_type == HERMON_QP_UD) { - if (wr->wr_trans != IBT_UD_SRV) { - mutex_exit(&qp->qp_sq_lock); - return (IBT_QP_SRV_TYPE_INVALID); - } + if (qp->qp_type == IBT_UD_RQP) { return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted)); } if (qp->qp_serv_type == HERMON_QP_RC) { - if (wr->wr_trans != IBT_RC_SRV) { - mutex_exit(&qp->qp_sq_lock); - return (IBT_QP_SRV_TYPE_INVALID); - } return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted)); } @@ -728,18 +884,6 @@ */ prev = HERMON_QP_SQ_ENTRY(qp, tail); - /* - * unlike Tavor & Arbel, tail will maintain the number of the - * next (this) WQE to be posted. Since there is no backward linking - * in Hermon, we can always just look ahead - */ - /* - * Before we begin, save the current "tail index" for later - * DMA sync - */ - /* NOTE: don't need to go back one like arbel/tavor */ - sync_from = tail; - /* * Break the request up into lists that are less than or * equal to the maximum number of WQEs that can be posted @@ -850,12 +994,12 @@ if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) { - signaled_dbd = 1; + signaled_dbd = 0xC; } else { signaled_dbd = 0; } if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT) - solicited = 1; + solicited = 0x2; else solicited = 0; @@ -873,8 +1017,7 @@ } else { HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited, - signaled_dbd, curr_wr->wr_flags & - IBT_WR_SEND_CKSUM, qp); + signaled_dbd, 0, qp, 0, 0); } wq->wq_wrid[tail] = curr_wr->wr_id; @@ -912,20 +1055,10 @@ if (posted_cnt != 0) { ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); - /* - * Save away updated "tail index" for the DMA sync - * including the headroom that will be needed - */ - sync_to = (tail + hdrmwqes) & qsize_msk; - /* do the invalidate of the headroom */ hermon_wqe_headroom(tail, qp); - /* Do a DMA sync for current send WQE(s) */ - hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_SEND, - DDI_DMA_SYNC_FORDEV); - /* Update some of the state in the QP */ wq->wq_tail = tail; total_posted += posted_cnt; @@ -982,7 +1115,6 @@ uint64_t *desc; hermon_workq_hdr_t *wq; uint32_t head, tail, next_tail, qsize_msk; - uint32_t sync_from, sync_to; uint_t wrindx; uint_t posted_cnt; int status; @@ -992,7 +1124,7 @@ * clients to post to QP memory that is accessible directly by the * user. If the QP memory is user accessible, then return an error. */ - if (qp->qp_is_umap) { + if (qp->qp_alloc_flags & IBT_QP_USER_MAP) { return (IBT_QP_HDL_INVALID); } @@ -1004,7 +1136,7 @@ /* * Check if QP is associated with an SRQ */ - if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) { + if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { mutex_exit(&qp->qp_lock); return (IBT_SRQ_IN_USE); } @@ -1018,16 +1150,13 @@ } /* Check that work request transport type is valid */ - if ((qp->qp_serv_type != HERMON_QP_UD) && + if ((qp->qp_type != IBT_UD_RQP) && (qp->qp_serv_type != HERMON_QP_RC) && (qp->qp_serv_type != HERMON_QP_UC)) { mutex_exit(&qp->qp_lock); return (IBT_QP_SRV_TYPE_INVALID); } - mutex_exit(&qp->qp_lock); - mutex_enter(&qp->qp_rq_lock); - /* * Grab the lock for the WRID list, i.e., membar_consumer(). * This is not needed because the mutex_enter() above has @@ -1042,11 +1171,6 @@ wrindx = 0; status = DDI_SUCCESS; - /* - * Before we begin, save the current "tail index" for later - * DMA sync - */ - sync_from = tail; for (wrindx = 0; wrindx < num_wr; wrindx++) { if (wq->wq_full != 0) { @@ -1071,11 +1195,6 @@ } if (posted_cnt != 0) { - /* Save away updated "tail index" for the DMA sync */ - sync_to = tail; - - hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_RECV, - DDI_DMA_SYNC_FORDEV); wq->wq_tail = tail; @@ -1091,7 +1210,7 @@ } - mutex_exit(&qp->qp_rq_lock); + mutex_exit(&qp->qp_lock); return (status); } @@ -1149,8 +1268,6 @@ break; } - hermon_wqe_sync(srq, indx, indx + 1, - HERMON_WR_SRQ, DDI_DMA_SYNC_FORDEV); posted_cnt++; indx = htons(((uint16_t *)desc)[1]); wq->wq_head = indx; @@ -1795,7 +1912,7 @@ hermon_hw_wqe_sgl_t *ds; int i, num_ds; - ASSERT(MUTEX_HELD(&qp->qp_rq_lock)); + ASSERT(MUTEX_HELD(&qp->qp_lock)); /* * Fill in the Data Segments (SGL) for the Recv WQE - don't @@ -1984,118 +2101,6 @@ } /* - * hermon_wqe_sync() - * Context: Can be called from interrupt or base context. - */ -static void -hermon_wqe_sync(void *hdl, uint_t sync_from, uint_t sync_to, - uint_t sync_type, uint_t flag) -{ - hermon_qphdl_t qp; - hermon_srqhdl_t srq; - uint64_t *wqe_from, *wqe_to; - uint64_t *wq_base, *wq_top, *qp_base; - ddi_dma_handle_t dmahdl; - off_t offset; - size_t length; - uint32_t qsize; - int status; - - if (sync_type == HERMON_WR_SRQ) { - srq = (hermon_srqhdl_t)hdl; - /* Get the DMA handle from SRQ context */ - dmahdl = srq->srq_mrhdl->mr_bindinfo.bi_dmahdl; - /* get base addr of the buffer */ - qp_base = (uint64_t *)(void *)srq->srq_wq_buf; - } else { - qp = (hermon_qphdl_t)hdl; - /* Get the DMA handle from QP context */ - dmahdl = qp->qp_mrhdl->mr_bindinfo.bi_dmahdl; - /* Determine the base address of the QP buffer */ - if (qp->qp_sq_baseaddr == 0) { - qp_base = (uint64_t *)(void *)(qp->qp_sq_buf); - } else { - qp_base = (uint64_t *)(void *)(qp->qp_rq_buf); - } - } - - /* - * Depending on the type of the work queue, we grab information - * about the address ranges we need to DMA sync. - */ - - if (sync_type == HERMON_WR_SEND) { - wqe_from = HERMON_QP_SQ_ENTRY(qp, sync_from); - wqe_to = HERMON_QP_SQ_ENTRY(qp, sync_to); - qsize = qp->qp_sq_bufsz; - - wq_base = HERMON_QP_SQ_ENTRY(qp, 0); - wq_top = HERMON_QP_SQ_ENTRY(qp, qsize); - } else if (sync_type == HERMON_WR_RECV) { - wqe_from = HERMON_QP_RQ_ENTRY(qp, sync_from); - wqe_to = HERMON_QP_RQ_ENTRY(qp, sync_to); - qsize = qp->qp_rq_bufsz; - - wq_base = HERMON_QP_RQ_ENTRY(qp, 0); - wq_top = HERMON_QP_RQ_ENTRY(qp, qsize); - } else { - wqe_from = HERMON_SRQ_WQ_ENTRY(srq, sync_from); - wqe_to = HERMON_SRQ_WQ_ENTRY(srq, sync_to); - qsize = srq->srq_wq_bufsz; - - wq_base = HERMON_SRQ_WQ_ENTRY(srq, 0); - wq_top = HERMON_SRQ_WQ_ENTRY(srq, qsize); - } - - /* - * There are two possible cases for the beginning and end of the WQE - * chain we are trying to sync. Either this is the simple case, where - * the end of the chain is below the beginning of the chain, or it is - * the "wrap-around" case, where the end of the chain has wrapped over - * the end of the queue. In the former case, we simply need to - * calculate the span from beginning to end and sync it. In the latter - * case, however, we need to calculate the span from the top of the - * work queue to the end of the chain and sync that, and then we need - * to find the other portion (from beginning of chain to end of queue) - * and sync that as well. Note: if the "top to end" span is actually - * zero length, then we don't do a DMA sync because a zero length DMA - * sync unnecessarily syncs the entire work queue. - */ - if (wqe_to > wqe_from) { - /* "From Beginning to End" */ - - offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); - length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wqe_from); - - status = ddi_dma_sync(dmahdl, offset, length, flag); - if (status != DDI_SUCCESS) { - return; - } - } else { - /* "From Top to End" */ - - offset = (off_t)((uintptr_t)wq_base - (uintptr_t)qp_base); - length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wq_base); - if (length) { - status = ddi_dma_sync(dmahdl, offset, length, flag); - if (status != DDI_SUCCESS) { - return; - } - } - - /* "From Beginning to Bottom" */ - - offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); - length = (size_t)((uintptr_t)wq_top - (uintptr_t)wqe_from); - status = ddi_dma_sync(dmahdl, offset, length, flag); - if (status != DDI_SUCCESS) { - return; - } - } -} - - -/* * hermon_wr_bind_check() * Context: Can be called from interrupt or base context. */ @@ -2211,22 +2216,25 @@ hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) { hermon_workq_hdr_t *swq, *rwq; - uint_t qp_srq_en; - if (qp->qp_is_umap) + if (qp->qp_alloc_flags & IBT_QP_USER_MAP) return (DDI_SUCCESS); - /* grab the cq lock(s) to modify the wqavl tree */ +#ifdef __lock_lint mutex_enter(&qp->qp_rq_cqhdl->cq_lock); -#ifdef __lock_lint mutex_enter(&qp->qp_sq_cqhdl->cq_lock); #else - if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) + /* grab the cq lock(s) to modify the wqavl tree */ + if (qp->qp_rq_cqhdl) + mutex_enter(&qp->qp_rq_cqhdl->cq_lock); + if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && + qp->qp_sq_cqhdl != NULL) mutex_enter(&qp->qp_sq_cqhdl->cq_lock); #endif /* Chain the newly allocated work queue header to the CQ's list */ - hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); + if (qp->qp_sq_cqhdl) + hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); swq = qp->qp_sq_wqhdr; swq->wq_head = 0; @@ -2239,12 +2247,11 @@ * * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case. */ - qp_srq_en = qp->qp_srq_en; #ifdef __lock_lint mutex_enter(&qp->qp_srqhdl->srq_lock); #else - if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { + if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { mutex_enter(&qp->qp_srqhdl->srq_lock); } else { rwq = qp->qp_rq_wqhdr; @@ -2259,18 +2266,21 @@ #ifdef __lock_lint mutex_exit(&qp->qp_srqhdl->srq_lock); #else - if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { + if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { mutex_exit(&qp->qp_srqhdl->srq_lock); } #endif #ifdef __lock_lint mutex_exit(&qp->qp_sq_cqhdl->cq_lock); + mutex_exit(&qp->qp_rq_cqhdl->cq_lock); #else - if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) + if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && + qp->qp_sq_cqhdl != NULL) mutex_exit(&qp->qp_sq_cqhdl->cq_lock); + if (qp->qp_rq_cqhdl) + mutex_exit(&qp->qp_rq_cqhdl->cq_lock); #endif - mutex_exit(&qp->qp_rq_cqhdl->cq_lock); return (DDI_SUCCESS); } @@ -2282,9 +2292,7 @@ int hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) { - uint_t qp_srq_en; - - if (qp->qp_is_umap) + if (qp->qp_alloc_flags & IBT_QP_USER_MAP) return (DDI_SUCCESS); /* @@ -2292,19 +2300,22 @@ * polled/flushed. * Grab the CQ lock(s) before manipulating the lists. */ +#ifdef __lock_lint mutex_enter(&qp->qp_rq_cqhdl->cq_lock); -#ifdef __lock_lint mutex_enter(&qp->qp_sq_cqhdl->cq_lock); #else - if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) + /* grab the cq lock(s) to modify the wqavl tree */ + if (qp->qp_rq_cqhdl) + mutex_enter(&qp->qp_rq_cqhdl->cq_lock); + if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && + qp->qp_sq_cqhdl != NULL) mutex_enter(&qp->qp_sq_cqhdl->cq_lock); #endif - qp_srq_en = qp->qp_srq_en; #ifdef __lock_lint mutex_enter(&qp->qp_srqhdl->srq_lock); #else - if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { + if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { mutex_enter(&qp->qp_srqhdl->srq_lock); } #endif @@ -2316,21 +2327,25 @@ #ifdef __lock_lint mutex_exit(&qp->qp_srqhdl->srq_lock); #else - if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { + if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { mutex_exit(&qp->qp_srqhdl->srq_lock); } #endif hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); - hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); + if (qp->qp_sq_cqhdl != NULL) + hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); #ifdef __lock_lint mutex_exit(&qp->qp_sq_cqhdl->cq_lock); + mutex_exit(&qp->qp_rq_cqhdl->cq_lock); #else - if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) + if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && + qp->qp_sq_cqhdl != NULL) mutex_exit(&qp->qp_sq_cqhdl->cq_lock); + if (qp->qp_rq_cqhdl) + mutex_exit(&qp->qp_rq_cqhdl->cq_lock); #endif - mutex_exit(&qp->qp_rq_cqhdl->cq_lock); return (IBT_SUCCESS); }
--- a/usr/src/uts/common/io/ib/adapters/tavor/tavor.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/tavor/tavor.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -1811,14 +1810,14 @@ * either configuration variables or successful queries of the Tavor * hardware abilities */ - state->ts_ibtfinfo.hca_ci_vers = IBCI_V3; - state->ts_ibtfinfo.hca_dip = state->ts_dip; + state->ts_ibtfinfo.hca_ci_vers = IBCI_V4; state->ts_ibtfinfo.hca_handle = (ibc_hca_hdl_t)state; state->ts_ibtfinfo.hca_ops = &tavor_ibc_ops; hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP); state->ts_ibtfinfo.hca_attr = hca_attr; + hca_attr->hca_dip = state->ts_dip; hca_attr->hca_fw_major_version = state->ts_fw.fw_rev_major; hca_attr->hca_fw_minor_version = state->ts_fw.fw_rev_minor; hca_attr->hca_fw_micro_version = state->ts_fw.fw_rev_subminor; @@ -1856,6 +1855,7 @@ IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE | IBT_HCA_PORT_UP | IBT_HCA_SQD_STATE); hca_attr->hca_flags = caps; + hca_attr->hca_flags2 = IBT_HCA2_DMA_MR; /* Determine VendorID, DeviceID, and revision ID */ hca_attr->hca_vendor_id = state->ts_adapter.vendor_id;
--- a/usr/src/uts/common/io/ib/adapters/tavor/tavor_ci.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/tavor/tavor_ci.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -96,9 +95,9 @@ uint_t, uint_t *); static ibt_status_t tavor_ci_modify_cq(ibc_hca_hdl_t, ibc_cq_hdl_t, uint_t, uint_t, ibt_cq_handler_id_t); -static ibt_status_t tavor_ci_alloc_cq_sched(ibc_hca_hdl_t, ibt_cq_sched_flags_t, - ibc_cq_handler_attr_t *); -static ibt_status_t tavor_ci_free_cq_sched(ibc_hca_hdl_t, ibt_cq_handler_id_t); +static ibt_status_t tavor_ci_alloc_cq_sched(ibc_hca_hdl_t, + ibt_cq_sched_attr_t *, ibc_sched_hdl_t *); +static ibt_status_t tavor_ci_free_cq_sched(ibc_hca_hdl_t, ibc_sched_hdl_t); /* EE Contexts */ static ibt_status_t tavor_ci_alloc_eec(ibc_hca_hdl_t, ibc_eec_flags_t, @@ -127,6 +126,8 @@ ibc_pd_hdl_t, ibt_smr_attr_t *, struct buf *, void *, ibc_mr_hdl_t *, ibt_mr_desc_t *); static ibt_status_t tavor_ci_sync_mr(ibc_hca_hdl_t, ibt_mr_sync_t *, size_t); +static ibt_status_t tavor_ci_register_dma_mr(ibc_hca_hdl_t, ibc_pd_hdl_t, + ibt_dmr_attr_t *, void *, ibc_mr_hdl_t *, ibt_mr_desc_t *); /* Memory Windows */ static ibt_status_t tavor_ci_alloc_mw(ibc_hca_hdl_t, ibc_pd_hdl_t, @@ -209,6 +210,7 @@ static int tavor_mem_alloc(tavor_state_t *, size_t, ibt_mr_flags_t, caddr_t *, tavor_mem_alloc_hdl_t *); +static ibt_status_t tavor_ci_not_supported(); /* * This ibc_operations_t structure includes pointers to all the entry points @@ -252,6 +254,7 @@ tavor_ci_modify_cq, tavor_ci_alloc_cq_sched, tavor_ci_free_cq_sched, + tavor_ci_not_supported, /* query_cq_handler_id */ /* EE Contexts */ tavor_ci_alloc_eec, @@ -317,9 +320,40 @@ /* dmable memory */ tavor_ci_alloc_io_mem, - tavor_ci_free_io_mem + tavor_ci_free_io_mem, + + /* XRC not yet supported */ + tavor_ci_not_supported, /* ibc_alloc_xrc_domain */ + tavor_ci_not_supported, /* ibc_free_xrc_domain */ + tavor_ci_not_supported, /* ibc_alloc_xrc_srq */ + tavor_ci_not_supported, /* ibc_free_xrc_srq */ + tavor_ci_not_supported, /* ibc_query_xrc_srq */ + tavor_ci_not_supported, /* ibc_modify_xrc_srq */ + tavor_ci_not_supported, /* ibc_alloc_xrc_tgt_qp */ + tavor_ci_not_supported, /* ibc_free_xrc_tgt_qp */ + tavor_ci_not_supported, /* ibc_query_xrc_tgt_qp */ + tavor_ci_not_supported, /* ibc_modify_xrc_tgt_qp */ + + /* Memory Region (physical) */ + tavor_ci_register_dma_mr, + + /* Next enhancements */ + tavor_ci_not_supported, /* ibc_enhancement1 */ + tavor_ci_not_supported, /* ibc_enhancement2 */ + tavor_ci_not_supported, /* ibc_enhancement3 */ + tavor_ci_not_supported, /* ibc_enhancement4 */ }; +/* + * Not yet implemented OPS + */ +/* ARGSUSED */ +static ibt_status_t +tavor_ci_not_supported() +{ + return (IBT_NOT_SUPPORTED); +} + /* * tavor_ci_query_hca_ports() @@ -1294,29 +1328,18 @@ */ /* ARGSUSED */ static ibt_status_t -tavor_ci_alloc_cq_sched(ibc_hca_hdl_t hca, ibt_cq_sched_flags_t flags, - ibc_cq_handler_attr_t *handler_attr_p) +tavor_ci_alloc_cq_sched(ibc_hca_hdl_t hca, ibt_cq_sched_attr_t *attr, + ibc_sched_hdl_t *sched_hdl_p) { - TAVOR_TNF_ENTER(tavor_ci_alloc_cq_sched); - if (hca == NULL) { - TNF_PROBE_0(tavor_ci_alloc_cq_sched_fail, - TAVOR_TNF_ERROR, ""); - TAVOR_TNF_EXIT(tavor_ci_alloc_cq_sched); return (IBT_HCA_HDL_INVALID); } + *sched_hdl_p = NULL; /* * This is an unsupported interface for the Tavor driver. Tavor * does not support CQ scheduling classes. */ - - TAVOR_TNF_EXIT(tavor_ci_alloc_cq_sched); - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*handler_attr_p)) - handler_attr_p->h_id = NULL; - handler_attr_p->h_pri = 0; - handler_attr_p->h_bind = NULL; - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*handler_attr_p)) return (IBT_SUCCESS); } @@ -1326,30 +1349,18 @@ * Free a CQ scheduling class resource * Context: Can be called only from user or kernel context. */ +/* ARGSUSED */ static ibt_status_t -tavor_ci_free_cq_sched(ibc_hca_hdl_t hca, ibt_cq_handler_id_t handler_id) +tavor_ci_free_cq_sched(ibc_hca_hdl_t hca, ibc_sched_hdl_t sched_hdl) { - TAVOR_TNF_ENTER(tavor_ci_free_cq_sched); - if (hca == NULL) { - TNF_PROBE_0(tavor_ci_free_cq_sched_fail, - TAVOR_TNF_ERROR, ""); - TAVOR_TNF_EXIT(tavor_ci_free_cq_sched); return (IBT_HCA_HDL_INVALID); } /* * This is an unsupported interface for the Tavor driver. Tavor - * does not support CQ scheduling classes. Returning a NULL - * hint is the way to treat this as unsupported. We check for - * the expected NULL, but do not fail in any case. + * does not support CQ scheduling classes. */ - if (handler_id != NULL) { - TNF_PROBE_1(tavor_ci_free_cq_sched, TAVOR_TNF_TRACE, "", - tnf_opaque, handler_id, handler_id); - } - - TAVOR_TNF_EXIT(tavor_ci_free_cq_sched); return (IBT_SUCCESS); } @@ -2194,6 +2205,76 @@ } +/* ARGSUSED */ +static ibt_status_t +tavor_ci_register_dma_mr(ibc_hca_hdl_t hca, ibc_pd_hdl_t pd, + ibt_dmr_attr_t *mr_attr, void *ibtl_reserved, ibc_mr_hdl_t *mr_p, + ibt_mr_desc_t *mr_desc) +{ + tavor_state_t *state; + tavor_pdhdl_t pdhdl; + tavor_mrhdl_t mrhdl; + int status; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_desc)) + + ASSERT(mr_attr != NULL); + ASSERT(mr_p != NULL); + ASSERT(mr_desc != NULL); + + /* Check for valid HCA handle */ + if (hca == NULL) { + return (IBT_HCA_HDL_INVALID); + } + + /* Check for valid PD handle pointer */ + if (pd == NULL) { + return (IBT_PD_HDL_INVALID); + } + + /* + * Validate the access flags. Both Remote Write and Remote Atomic + * require the Local Write flag to be set + */ + if (((mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) || + (mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && + !(mr_attr->dmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)) { + return (IBT_MR_ACCESS_REQ_INVALID); + } + + /* Grab the Tavor softstate pointer and PD handle */ + state = (tavor_state_t *)hca; + pdhdl = (tavor_pdhdl_t)pd; + + status = tavor_dma_mr_register(state, pdhdl, mr_attr, &mrhdl); + if (status != DDI_SUCCESS) { + return (status); + } + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mrhdl)) + + /* Fill in the mr_desc structure */ + mr_desc->md_vaddr = mr_attr->dmr_paddr; + mr_desc->md_lkey = mrhdl->mr_lkey; + /* Only set RKey if remote access was requested */ + if ((mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) || + (mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) || + (mr_attr->dmr_flags & IBT_MR_ENABLE_REMOTE_READ)) { + mr_desc->md_rkey = mrhdl->mr_rkey; + } + + /* + * If region is mapped for streaming (i.e. noncoherent), then set + * sync is required + */ + mr_desc->md_sync_required = B_FALSE; + + /* Return the Hermon MR handle */ + *mr_p = (ibc_mr_hdl_t)mrhdl; + + return (IBT_SUCCESS); +} + + /* * tavor_ci_attach_mcg() * Attach a Queue Pair to a Multicast Group @@ -2988,19 +3069,211 @@ return (IBT_NOT_SUPPORTED); } +struct ibc_mi_s { + int imh_len; + ddi_dma_handle_t imh_dmahandle[1]; +}; +_NOTE(SCHEME_PROTECTS_DATA("safe sharing", + ibc_mi_s::imh_len + ibc_mi_s::imh_dmahandle)) + + +/* + * tavor_ci_map_mem_iov() + * Map the memory + * Context: Can be called from interrupt or base context. + */ /* ARGSUSED */ static ibt_status_t -tavor_ci_map_mem_iov(ibc_hca_hdl_t hca, ibt_iov_attr_t *iov, +tavor_ci_map_mem_iov(ibc_hca_hdl_t hca, ibt_iov_attr_t *iov_attr, ibt_all_wr_t *wr, ibc_mi_hdl_t *mi_hdl_p) { - return (IBT_NOT_SUPPORTED); + int status; + int i, j, nds, max_nds; + uint_t len; + ibt_status_t ibt_status; + ddi_dma_handle_t dmahdl; + ddi_dma_cookie_t dmacookie; + ddi_dma_attr_t dma_attr; + uint_t cookie_cnt; + ibc_mi_hdl_t mi_hdl; + ibt_lkey_t rsvd_lkey; + ibt_wr_ds_t *sgl; + tavor_state_t *state; + int kmflag; + int (*callback)(caddr_t); + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wr)) + + if (mi_hdl_p == NULL) + return (IBT_MI_HDL_INVALID); + + /* Check for valid HCA handle */ + if (hca == NULL) + return (IBT_HCA_HDL_INVALID); + + /* Tavor does not allow the default "use reserved lkey" */ + if ((iov_attr->iov_flags & IBT_IOV_ALT_LKEY) == 0) + return (IBT_INVALID_PARAM); + + rsvd_lkey = iov_attr->iov_alt_lkey; + + state = (tavor_state_t *)hca; + tavor_dma_attr_init(&dma_attr); +#ifdef __sparc + if (state->ts_cfg_profile->cp_iommu_bypass == TAVOR_BINDMEM_BYPASS) + dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; +#endif + + nds = 0; + max_nds = iov_attr->iov_wr_nds; + if (iov_attr->iov_lso_hdr_sz) + max_nds -= (iov_attr->iov_lso_hdr_sz + sizeof (uint32_t) + + 0xf) >> 4; /* 0xf is for rounding up to a multiple of 16 */ + if ((iov_attr->iov_flags & IBT_IOV_NOSLEEP) == 0) { + kmflag = KM_SLEEP; + callback = DDI_DMA_SLEEP; + } else { + kmflag = KM_NOSLEEP; + callback = DDI_DMA_DONTWAIT; + } + + if (iov_attr->iov_flags & IBT_IOV_BUF) { + mi_hdl = kmem_alloc(sizeof (*mi_hdl), kmflag); + if (mi_hdl == NULL) + return (IBT_INSUFF_RESOURCE); + sgl = wr->send.wr_sgl; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgl)) + + status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, + callback, NULL, &dmahdl); + if (status != DDI_SUCCESS) { + kmem_free(mi_hdl, sizeof (*mi_hdl)); + return (IBT_INSUFF_RESOURCE); + } + status = ddi_dma_buf_bind_handle(dmahdl, iov_attr->iov_buf, + DDI_DMA_RDWR | DDI_DMA_CONSISTENT, callback, NULL, + &dmacookie, &cookie_cnt); + if (status != DDI_DMA_MAPPED) { + ddi_dma_free_handle(&dmahdl); + kmem_free(mi_hdl, sizeof (*mi_hdl)); + return (ibc_get_ci_failure(0)); + } + while (cookie_cnt-- > 0) { + if (nds > max_nds) { + status = ddi_dma_unbind_handle(dmahdl); + ddi_dma_free_handle(&dmahdl); + return (IBT_SGL_TOO_SMALL); + } + sgl[nds].ds_va = dmacookie.dmac_laddress; + sgl[nds].ds_key = rsvd_lkey; + sgl[nds].ds_len = (ib_msglen_t)dmacookie.dmac_size; + nds++; + if (cookie_cnt != 0) + ddi_dma_nextcookie(dmahdl, &dmacookie); + } + wr->send.wr_nds = nds; + mi_hdl->imh_len = 1; + mi_hdl->imh_dmahandle[0] = dmahdl; + *mi_hdl_p = mi_hdl; + return (IBT_SUCCESS); + } + + if (iov_attr->iov_flags & IBT_IOV_RECV) + sgl = wr->recv.wr_sgl; + else + sgl = wr->send.wr_sgl; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgl)) + + len = iov_attr->iov_list_len; + for (i = 0, j = 0; j < len; j++) { + if (iov_attr->iov[j].iov_len == 0) + continue; + i++; + } + mi_hdl = kmem_alloc(sizeof (*mi_hdl) + + (i - 1) * sizeof (ddi_dma_handle_t), kmflag); + if (mi_hdl == NULL) + return (IBT_INSUFF_RESOURCE); + mi_hdl->imh_len = i; + for (i = 0, j = 0; j < len; j++) { + if (iov_attr->iov[j].iov_len == 0) + continue; + status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, + callback, NULL, &dmahdl); + if (status != DDI_SUCCESS) { + ibt_status = IBT_INSUFF_RESOURCE; + goto fail2; + } + status = ddi_dma_addr_bind_handle(dmahdl, iov_attr->iov_as, + iov_attr->iov[j].iov_addr, iov_attr->iov[j].iov_len, + DDI_DMA_RDWR | DDI_DMA_CONSISTENT, callback, NULL, + &dmacookie, &cookie_cnt); + if (status != DDI_DMA_MAPPED) { + ibt_status = ibc_get_ci_failure(0); + goto fail1; + } + if (nds + cookie_cnt > max_nds) { + ibt_status = IBT_SGL_TOO_SMALL; + goto fail2; + } + while (cookie_cnt-- > 0) { + sgl[nds].ds_va = dmacookie.dmac_laddress; + sgl[nds].ds_key = rsvd_lkey; + sgl[nds].ds_len = (ib_msglen_t)dmacookie.dmac_size; + nds++; + if (cookie_cnt != 0) + ddi_dma_nextcookie(dmahdl, &dmacookie); + } + mi_hdl->imh_dmahandle[i] = dmahdl; + i++; + } + + if (iov_attr->iov_flags & IBT_IOV_RECV) + wr->recv.wr_nds = nds; + else + wr->send.wr_nds = nds; + *mi_hdl_p = mi_hdl; + return (IBT_SUCCESS); + +fail1: + ddi_dma_free_handle(&dmahdl); +fail2: + while (--i >= 0) { + status = ddi_dma_unbind_handle(mi_hdl->imh_dmahandle[i]); + ddi_dma_free_handle(&mi_hdl->imh_dmahandle[i]); + } + kmem_free(mi_hdl, sizeof (*mi_hdl) + + (len - 1) * sizeof (ddi_dma_handle_t)); + *mi_hdl_p = NULL; + return (ibt_status); } +/* + * tavor_ci_unmap_mem_iov() + * Unmap the memory + * Context: Can be called from interrupt or base context. + */ /* ARGSUSED */ static ibt_status_t tavor_ci_unmap_mem_iov(ibc_hca_hdl_t hca, ibc_mi_hdl_t mi_hdl) { - return (IBT_NOT_SUPPORTED); + int i; + + /* Check for valid HCA handle */ + if (hca == NULL) + return (IBT_HCA_HDL_INVALID); + + if (mi_hdl == NULL) + return (IBT_MI_HDL_INVALID); + + for (i = 0; i < mi_hdl->imh_len; i++) { + (void) ddi_dma_unbind_handle(mi_hdl->imh_dmahandle[i]); + ddi_dma_free_handle(&mi_hdl->imh_dmahandle[i]); + } + kmem_free(mi_hdl, sizeof (*mi_hdl) + + (mi_hdl->imh_len - 1) * sizeof (ddi_dma_handle_t)); + return (IBT_SUCCESS); } /* Allocate L_Key */
--- a/usr/src/uts/common/io/ib/adapters/tavor/tavor_mr.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/adapters/tavor/tavor_mr.c Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -597,27 +596,31 @@ } } - /* - * Decrement the MTT reference count. Since the MTT resource - * may be shared between multiple memory regions (as a result - * of a "RegisterSharedMR" verb) it is important that we not - * free up or unbind resources prematurely. If it's not shared (as - * indicated by the return status), then free the resource. - */ - shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt); - if (!shared_mtt) { - tavor_rsrc_free(state, &mtt_refcnt); - } + /* mtt_refcnt is NULL in the case of tavor_dma_mr_register() */ + if (mtt_refcnt != NULL) { + /* + * Decrement the MTT reference count. Since the MTT resource + * may be shared between multiple memory regions (as a result + * of a "RegisterSharedMR" verb) it is important that we not + * free up or unbind resources prematurely. If it's not shared + * (as indicated by the return status), then free the resource. + */ + shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt); + if (!shared_mtt) { + tavor_rsrc_free(state, &mtt_refcnt); + } - /* - * Free up the MTT entries and unbind the memory. Here, as above, we - * attempt to free these resources only if it is appropriate to do so. - */ - if (!shared_mtt) { - if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) { - tavor_mr_mem_unbind(state, bind); + /* + * Free up the MTT entries and unbind the memory. Here, + * as above, we attempt to free these resources only if + * it is appropriate to do so. + */ + if (!shared_mtt) { + if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) { + tavor_mr_mem_unbind(state, bind); + } + tavor_rsrc_free(state, &mtt); } - tavor_rsrc_free(state, &mtt); } /* @@ -1483,6 +1486,178 @@ return (status); } +int +tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, + ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl) +{ + tavor_rsrc_t *mpt, *rsrc; + tavor_hw_mpt_t mpt_entry; + tavor_mrhdl_t mr; + ibt_mr_flags_t flags; + uint_t sleep; + int status; + + /* Extract the flags field */ + flags = mr_attr->dmr_flags; + + /* + * Check the sleep flag. Ensure that it is consistent with the + * current thread context (i.e. if we are currently in the interrupt + * context, then we shouldn't be attempting to sleep). + */ + sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; + if ((sleep == TAVOR_SLEEP) && + (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { + status = IBT_INVALID_PARAM; + goto mrcommon_fail; + } + + /* Increment the reference count on the protection domain (PD) */ + tavor_pd_refcnt_inc(pd); + + /* + * Allocate an MPT entry. This will be filled in with all the + * necessary parameters to define the memory region. And then + * ownership will be passed to the hardware in the final step + * below. If we fail here, we must undo the protection domain + * reference count. + */ + status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto mrcommon_fail1; + } + + /* + * Allocate the software structure for tracking the memory region (i.e. + * the Tavor Memory Region handle). If we fail here, we must undo + * the protection domain reference count and the previous resource + * allocation. + */ + status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); + if (status != DDI_SUCCESS) { + status = IBT_INSUFF_RESOURCE; + goto mrcommon_fail2; + } + mr = (tavor_mrhdl_t)rsrc->tr_addr; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) + bzero(mr, sizeof (*mr)); + + /* + * Setup and validate the memory region access flags. This means + * translating the IBTF's enable flags into the access flags that + * will be used in later operations. + */ + mr->mr_accflag = 0; + if (flags & IBT_MR_ENABLE_WINDOW_BIND) + mr->mr_accflag |= IBT_MR_WINDOW_BIND; + if (flags & IBT_MR_ENABLE_LOCAL_WRITE) + mr->mr_accflag |= IBT_MR_LOCAL_WRITE; + if (flags & IBT_MR_ENABLE_REMOTE_READ) + mr->mr_accflag |= IBT_MR_REMOTE_READ; + if (flags & IBT_MR_ENABLE_REMOTE_WRITE) + mr->mr_accflag |= IBT_MR_REMOTE_WRITE; + if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) + mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; + + /* + * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed + * from a certain number of "constrained" bits (the least significant + * bits) and some number of "unconstrained" bits. The constrained + * bits must be set to the index of the entry in the MPT table, but + * the unconstrained bits can be set to any value we wish. Note: + * if no remote access is required, then the RKey value is not filled + * in. Otherwise both Rkey and LKey are given the same value. + */ + tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); + if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || + (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || + (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { + mr->mr_rkey = mr->mr_lkey; + } + + /* + * Fill in the MPT entry. This is the final step before passing + * ownership of the MPT entry to the Tavor hardware. We use all of + * the information collected/calculated above to fill in the + * requisite portions of the MPT. + */ + bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); + + mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; + mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; + mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; + mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; + mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; + mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; + mpt_entry.lr = 1; + mpt_entry.phys_addr = 1; /* critical bit for this */ + mpt_entry.reg_win = TAVOR_MPT_IS_REGION; + + mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; + mpt_entry.mem_key = mr->mr_lkey; + mpt_entry.pd = pd->pd_pdnum; + mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; + + mpt_entry.start_addr = mr_attr->dmr_paddr; + mpt_entry.reg_win_len = mr_attr->dmr_len; + + mpt_entry.mttseg_addr_h = 0; + mpt_entry.mttseg_addr_l = 0; + + /* + * Write the MPT entry to hardware. Lastly, we pass ownership of + * the entry to the hardware if needed. Note: in general, this + * operation shouldn't fail. But if it does, we have to undo + * everything we've done above before returning error. + * + * For Tavor, this routine (which is common to the contexts) will only + * set the ownership if needed - the process of passing the context + * itself to HW will take care of setting up the MPT (based on type + * and index). + */ + + status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, + sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); + if (status != TAVOR_CMD_SUCCESS) { + cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", + status); + status = ibc_get_ci_failure(0); + goto mrcommon_fail7; + } + + /* + * Fill in the rest of the Tavor Memory Region handle. Having + * successfully transferred ownership of the MPT, we can update the + * following fields for use in further operations on the MR. + */ + mr->mr_mptrsrcp = mpt; + mr->mr_mttrsrcp = NULL; + mr->mr_pdhdl = pd; + mr->mr_rsrcp = rsrc; + mr->mr_is_umem = 0; + mr->mr_umemcookie = NULL; + mr->mr_umem_cbfunc = NULL; + mr->mr_umem_cbarg1 = NULL; + mr->mr_umem_cbarg2 = NULL; + + *mrhdl = mr; + + return (DDI_SUCCESS); + +/* + * The following is cleanup for all possible failure cases in this routine + */ +mrcommon_fail7: + tavor_rsrc_free(state, &rsrc); +mrcommon_fail2: + tavor_rsrc_free(state, &mpt); +mrcommon_fail1: + tavor_pd_refcnt_dec(pd); +mrcommon_fail: + return (status); +} + /* * tavor_mr_mtt_bind() * Context: Can be called from interrupt or base context.
--- a/usr/src/uts/common/io/ib/clients/of/sol_ofs/sol_ib_cma.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/clients/of/sol_ofs/sol_ib_cma.c Thu Jul 29 22:10:26 2010 -0700 @@ -1212,6 +1212,7 @@ bcopy(local_addrp, &path_attr.ipa_src_ip, sizeof (ibt_ip_addr_t)); path_attr.ipa_ndst = 1; path_attr.ipa_max_paths = 1; + path_attr.ipa_zoneid = 0; if ((status = ibt_get_ip_paths(chanp->chan_ib_client_hdl, IBT_PATH_NO_FLAGS, &path_attr, &local_path, NULL, NULL)) != @@ -1316,6 +1317,7 @@ sizeof (ibt_ip_addr_t)); path_attr.ipa_ndst = 1; path_attr.ipa_max_paths = 2; + path_attr.ipa_zoneid = 0; if (ibcma_any_addr(&path_attr.ipa_src_ip)) path_attr.ipa_src_ip.family = AF_UNSPEC; @@ -2462,7 +2464,9 @@ uint_t num_ports, p; uint_t port_size; ibt_hca_portinfo_t *port_info, *tmp; - ibt_ip_addr_t hca_ipaddr; + ibt_srcip_info_t *src_info; + ibt_srcip_attr_t attr; + uint_t entries; SOL_OFS_DPRINTF_L5(sol_rdmacm_dbg_str, "get_devlist(%p, %p, %x, %p, %x)", root_chanp, hca_guidp, @@ -2498,10 +2502,16 @@ if (tmp->p_pkey_tbl[pk] == 0) continue; if (with_ipaddr_only == B_TRUE) { - status = ibt_get_src_ip( - tmp->p_sgid_tbl[s], - tmp->p_pkey_tbl[pk], - &hca_ipaddr); + bcopy(&tmp->p_sgid_tbl[s], + &attr.sip_gid, + sizeof (ib_gid_t)); + attr.sip_pkey = + tmp->p_pkey_tbl[pk]; + attr.sip_family = AF_INET; + attr.sip_zoneid = 0; + + status = ibt_get_src_ip(&attr, + &src_info, &entries); if (status != IBT_SUCCESS) continue; } @@ -2514,10 +2524,13 @@ devp->dev_pkey_ix = pk; devp->dev_pkey = tmp->p_pkey_tbl[pk]; devp->dev_sgid = tmp->p_sgid_tbl[s]; - if (with_ipaddr_only == B_TRUE) - bcopy(&hca_ipaddr, + if (with_ipaddr_only == B_TRUE) { + bcopy(&src_info[0].ip_addr, &devp->dev_ipaddr, sizeof (ibt_ip_addr_t)); + ibt_free_srcip_info(src_info, + entries); + } SOL_OFS_DPRINTF_L5(sol_rdmacm_dbg_str, "get_devlist: add2devlist "
--- a/usr/src/uts/common/io/ib/clients/of/sol_ofs/sol_kverbs.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/clients/of/sol_ofs/sol_kverbs.c Thu Jul 29 22:10:26 2010 -0700 @@ -937,7 +937,7 @@ struct ib_cq * ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), void *cq_context, - int cqe, int comp_vector) + int cqe, void *comp_vector) { ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl; ibt_cq_attr_t cq_attr; @@ -949,7 +949,7 @@ SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_cq: device: 0x%p, comp_handler: 0x%p, " "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, " - "comp_vector: %d => no sufficient memory", device, + "comp_vector: %p => no sufficient memory", device, comp_handler, event_handler, cq_context, cqe, comp_vector); return ((struct ib_cq *)-ENOMEM); } @@ -960,7 +960,7 @@ SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_cq: device: 0x%p, comp_handler: 0x%p, " "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, " - "comp_vector: %d => invalid device state (%d)", device, + "comp_vector: %p => invalid device state (%d)", device, comp_handler, event_handler, cq_context, cqe, comp_vector, device->reg_state); return ((struct ib_cq *)-ENXIO); @@ -973,7 +973,7 @@ cq_context, cqe, comp_vector); cq_attr.cq_size = cqe; - cq_attr.cq_sched = 0; /* no hint */ + cq_attr.cq_sched = comp_vector; cq_attr.cq_flags = IBT_CQ_NO_FLAGS; rtn = ibt_alloc_cq(device->hca_hdl, &cq_attr, &cq->ibt_cq, &real_size); ofs_lock_exit(&ofs_client->lock);
--- a/usr/src/uts/common/io/ib/clients/rdsv3/ib_cm.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/clients/rdsv3/ib_cm.c Thu Jul 29 22:10:26 2010 -0700 @@ -497,7 +497,7 @@ ic->i_cq = ib_create_cq(dev, rdsv3_ib_cq_comp_handler, rdsv3_ib_cq_event_handler, conn, ic->i_recv_ring.w_nr + ic->i_send_ring.w_nr + 1, - (intptr_t)rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp)); + rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp)); if (IS_ERR(ic->i_cq)) { ret = PTR_ERR(ic->i_cq); ic->i_cq = NULL; @@ -508,7 +508,7 @@ if (rdsv3_enable_snd_cq) { ic->i_snd_cq = ib_create_cq(dev, rdsv3_ib_snd_cq_comp_handler, rdsv3_ib_cq_event_handler, conn, ic->i_send_ring.w_nr + 1, - (intptr_t)rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp)); + rdsv3_af_grp_get_sched(ic->rds_ibdev->aft_hcagp)); if (IS_ERR(ic->i_snd_cq)) { ret = PTR_ERR(ic->i_snd_cq); (void) ib_destroy_cq(ic->i_cq);
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_chan.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_chan.c Thu Jul 29 22:10:26 2010 -0700 @@ -404,15 +404,48 @@ ibt_qp_alloc_attr_t qp_attr; ibt_qp_info_t qp_modify_attr; ibt_channel_hdl_t chanp; + ibt_chan_alloc_flags_t variant_flags; IBTF_DPRINTF_L3(ibtl_chan, "ibt_alloc_ud_channel(%p, %x, %p, %p)", hca_hdl, flags, args, sizes); + if (flags & IBT_ACHAN_USES_FEXCH) { + IBTF_DPRINTF_L2(ibtl_chan, "ibt_alloc_ud_channel: " + "FEXCH QPs are allocated by ibt_alloc_ud_channel_range()"); + return (IBT_CHAN_SRV_TYPE_INVALID); + } + bzero(&qp_modify_attr, sizeof (ibt_qp_info_t)); - + bzero(&qp_attr, sizeof (ibt_qp_alloc_attr_t)); qp_attr.qp_alloc_flags = IBT_QP_NO_FLAGS; - if (flags & IBT_ACHAN_USER_MAP) + + /* allow at most one of these flags */ + variant_flags = flags & (IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_RSS | + IBT_ACHAN_USES_RFCI | IBT_ACHAN_USES_FCMD | IBT_ACHAN_CLONE); + switch (variant_flags) { + case IBT_ACHAN_USER_MAP: qp_attr.qp_alloc_flags |= IBT_QP_USER_MAP; + break; + case IBT_ACHAN_USES_RSS: + qp_attr.qp_alloc_flags |= IBT_QP_USES_RSS; + qp_modify_attr.qp_transport.ud.ud_rss = args->ud_rss; + break; + case IBT_ACHAN_USES_RFCI: + qp_attr.qp_alloc_flags |= IBT_QP_USES_RFCI; + qp_modify_attr.qp_transport.ud.ud_fc = qp_attr.qp_fc = + args->ud_fc; + break; + case IBT_ACHAN_USES_FCMD: + qp_attr.qp_alloc_flags |= IBT_QP_USES_FCMD; + qp_modify_attr.qp_transport.ud.ud_fc = qp_attr.qp_fc = + args->ud_fc; + break; + case IBT_ACHAN_CLONE: + case 0: + break; + default: + return (IBT_INVALID_PARAM); + } if (flags & IBT_ACHAN_DEFER_ALLOC) qp_attr.qp_alloc_flags |= IBT_QP_DEFER_ALLOC; @@ -423,6 +456,11 @@ "NULL SRQ Handle specified."); return (IBT_INVALID_PARAM); } + if (flags & IBT_ACHAN_USES_RSS) { + IBTF_DPRINTF_L2(ibtl_chan, "ibt_alloc_ud_channel: " + "SRQ not allowed with RSS."); + return (IBT_INVALID_PARAM); + } qp_attr.qp_alloc_flags |= IBT_QP_USES_SRQ; } @@ -534,6 +572,191 @@ /* * Function: + * ibt_alloc_ud_channel_range + * Input: + * hca_hdl HCA Handle. + * log2 Log (base 2) of the number of QPs to allocate. + * flags Channel allocate flags. + * args A pointer to an ibt_ud_chan_alloc_args_t struct that + * specifies required channel attributes. + * send_cq A pointer to an array of CQ handles. + * recv_cq A pointer to an array of CQ handles. + * Output: + * base_qpn_p The returned QP number of the base QP. + * ud_chan_p The returned UD Channel handle. + * sizes NULL or a pointer to ibt_chan_sizes_s struct where + * new SendQ/RecvQ, and WR SGL sizes are returned. + * Returns: + * IBT_SUCCESS + * IBT_INVALID_PARAM + * Description: + * Allocate UD channels that satisfy the specified channel attributes. + */ +ibt_status_t +ibt_alloc_ud_channel_range(ibt_hca_hdl_t hca_hdl, uint_t log2, + ibt_chan_alloc_flags_t flags, ibt_ud_chan_alloc_args_t *args, + ibt_cq_hdl_t *send_cq, ibt_cq_hdl_t *recv_cq, ib_qpn_t *base_qpn_p, + ibt_channel_hdl_t *ud_chan_p, ibt_chan_sizes_t *sizes) +{ + ibt_status_t retval; + ibt_qp_alloc_attr_t qp_attr; + ibt_qp_info_t qp_modify_attr; + ibtl_channel_t *chanp; + ibt_cq_hdl_t ibt_cq_hdl; + ibc_cq_hdl_t *ibc_send_cq, *ibc_recv_cq; + ibc_qp_hdl_t *ibc_qp_hdl_p; + int i, n = 1 << log2; + ib_pkey_t tmp_pkey; + + + IBTF_DPRINTF_L3(ibtl_chan, "ibt_alloc_ud_channel_range(%p, %x, %p, %p)", + hca_hdl, flags, args, sizes); + + bzero(&qp_modify_attr, sizeof (ibt_qp_info_t)); + + qp_attr.qp_alloc_flags = IBT_QP_NO_FLAGS; + + if (flags & IBT_ACHAN_CLONE) + return (IBT_INVALID_PARAM); + + if (flags & IBT_ACHAN_USER_MAP) + qp_attr.qp_alloc_flags |= IBT_QP_USER_MAP; + + if (flags & IBT_ACHAN_DEFER_ALLOC) + qp_attr.qp_alloc_flags |= IBT_QP_DEFER_ALLOC; + + if (flags & IBT_ACHAN_USES_SRQ) { + if (args->ud_srq == NULL) { + IBTF_DPRINTF_L2(ibtl_chan, "ibt_alloc_ud_channel: " + "NULL SRQ Handle specified."); + return (IBT_INVALID_PARAM); + } + qp_attr.qp_alloc_flags |= IBT_QP_USES_SRQ; + } + + if (flags & IBT_ACHAN_USES_FEXCH) { + qp_attr.qp_alloc_flags |= IBT_QP_USES_FEXCH; + qp_attr.qp_fc = args->ud_fc; + qp_modify_attr.qp_transport.ud.ud_fc = qp_attr.qp_fc = + args->ud_fc; + } + if (flags & IBT_ACHAN_USES_RSS) { + if (log2 > + hca_hdl->ha_hca_devp->hd_hca_attr->hca_rss_max_log2_table) + return (IBT_INSUFF_RESOURCE); + qp_attr.qp_alloc_flags |= IBT_QP_USES_RSS; + } + + ibc_send_cq = kmem_alloc(sizeof (ibc_cq_hdl_t) << log2, KM_SLEEP); + ibc_recv_cq = kmem_alloc(sizeof (ibc_cq_hdl_t) << log2, KM_SLEEP); + ibc_qp_hdl_p = kmem_alloc(sizeof (ibc_qp_hdl_t) << log2, KM_SLEEP); + + for (i = 0; i < 1 << log2; i++) { + ud_chan_p[i] = kmem_zalloc(sizeof (ibtl_channel_t), KM_SLEEP); + ibt_cq_hdl = send_cq[i]; + ibc_send_cq[i] = ibt_cq_hdl ? ibt_cq_hdl->cq_ibc_cq_hdl : NULL; + ibt_cq_hdl = recv_cq[i]; + ibc_recv_cq[i] = ibt_cq_hdl ? ibt_cq_hdl->cq_ibc_cq_hdl : NULL; + } + + /* Setup QP alloc attributes. */ + qp_attr.qp_pd_hdl = args->ud_pd; + qp_attr.qp_flags = args->ud_flags; + qp_attr.qp_srq_hdl = args->ud_srq; + + bcopy(&args->ud_sizes, &qp_attr.qp_sizes, + sizeof (ibt_chan_sizes_t)); + + qp_modify_attr.qp_transport.ud.ud_port = args->ud_hca_port_num; + qp_modify_attr.qp_transport.ud.ud_qkey = args->ud_qkey; + + /* Validate input hca_port_num and pkey_ix values. */ + if ((retval = ibt_index2pkey(hca_hdl, args->ud_hca_port_num, + args->ud_pkey_ix, &tmp_pkey)) != IBT_SUCCESS) { + IBTF_DPRINTF_L2(ibtl_chan, "ibt_alloc_ud_channel_range:" + " ibt_index2pkey failed, status: %d", retval); + goto fail; + } + qp_modify_attr.qp_transport.ud.ud_pkey_ix = args->ud_pkey_ix; + + /* Allocate Channel and Initialize the channel. */ + retval = (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_alloc_qp_range)( + IBTL_HCA2CIHCA(hca_hdl), log2, (ibtl_qp_hdl_t *)ud_chan_p, + IBT_UD_RQP, &qp_attr, sizes, ibc_send_cq, ibc_recv_cq, + base_qpn_p, ibc_qp_hdl_p); + if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(ibtl_chan, "ibt_alloc_ud_channel_range: " + "Failed to allocate QPs: %d", retval); + goto fail; + } + + /* Initialize UD Channel by transitioning it to RTS State. */ + qp_modify_attr.qp_trans = IBT_UD_SRV; + qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS; + qp_modify_attr.qp_transport.ud.ud_sq_psn = 0; + + for (i = 0; i < n; i++) { + /* Initialize the internal QP struct. */ + chanp = ud_chan_p[i]; + chanp->ch_qp.qp_type = IBT_UD_SRV; + chanp->ch_qp.qp_hca = hca_hdl; + chanp->ch_qp.qp_ibc_qp_hdl = ibc_qp_hdl_p[i]; + chanp->ch_qp.qp_send_cq = send_cq[i]; + chanp->ch_qp.qp_recv_cq = recv_cq[i]; + chanp->ch_current_state = IBT_STATE_RESET; + mutex_init(&chanp->ch_cm_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&chanp->ch_cm_cv, NULL, CV_DEFAULT, NULL); + + retval = ibt_initialize_qp(chanp, &qp_modify_attr); + if (retval != IBT_SUCCESS) { + int j; + + IBTF_DPRINTF_L2(ibtl_chan, "ibt_alloc_ud_channel_range:" + " Failed to Initialize QP: %d", retval); + + /* Free the QP as we failed to initialize it. */ + (void) ibt_free_qp(chanp); + for (j = 0; j < i; j++) { + chanp = ud_chan_p[j]; + (void) ibt_free_qp(chanp); + } + goto fail; + } + + /* + * The IBTA spec does not include the signal type or PD on a QP + * query operation. In order to implement the "CLONE" feature + * we need to cache these values. + */ + chanp->ch_qp.qp_flags = qp_attr.qp_flags; + chanp->ch_qp.qp_pd_hdl = qp_attr.qp_pd_hdl; + } + + + IBTF_DPRINTF_L2(ibtl_chan, "ibt_alloc_ud_channel_range(%p): SUCCESS"); + + atomic_add_32(&hca_hdl->ha_qp_cnt, n); + + retval = IBT_SUCCESS; + +fail: + kmem_free(ibc_send_cq, sizeof (ibc_cq_hdl_t) << log2); + kmem_free(ibc_recv_cq, sizeof (ibc_cq_hdl_t) << log2); + kmem_free(ibc_qp_hdl_p, sizeof (ibc_qp_hdl_t) << log2); + if (retval != IBT_SUCCESS) { + for (i = 0; i < 1 << log2; i++) { + kmem_free(ud_chan_p[i], sizeof (ibtl_channel_t)); + ud_chan_p[i] = NULL; + } + IBTF_DPRINTF_L2(ibtl_chan, "ibt_alloc_ud_channel_range(%p): " + "failed: %d", retval); + } + return (retval); +} + + +/* + * Function: * ibt_query_ud_channel * Input: * ud_chan A previously allocated UD channel handle. @@ -592,6 +815,8 @@ ud_chan_attrs->ud_flags = ud_chan->ch_qp.qp_flags; + ud_chan_attrs->ud_query_fc = qp_attr.qp_query_fexch; + return (retval); }
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_cq.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_cq.c Thu Jul 29 22:10:26 2010 -0700 @@ -51,28 +51,11 @@ ibt_alloc_cq_sched(ibt_hca_hdl_t hca_hdl, ibt_cq_sched_attr_t *attr, ibt_sched_hdl_t *sched_hdl_p) { - ibc_cq_handler_attr_t handler_attrs; - ibt_cq_priority_t priority; - IBTF_DPRINTF_L3(ibtf_cq, "ibt_alloc_cq_sched(%p, %p, %p)", hca_hdl, attr, sched_hdl_p); - /* Validate and Convert the IBT CQ priority */ - priority = attr->cqs_priority; - - if ((priority < IBT_CQ_DEFAULT) || (priority > IBT_CQ_PRI_16)) { - return (IBT_CQ_INVALID_PRIORITY); - } - - - /* - * Do we need to check for valid range for load ? What's the valid - * range? - */ - *sched_hdl_p = NULL; /* Function not implemented fully yet */ - return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_alloc_cq_sched( - IBTL_HCA2CIHCA(hca_hdl), attr->cqs_flags, &handler_attrs)); + IBTL_HCA2CIHCA(hca_hdl), attr, sched_hdl_p)); } @@ -84,20 +67,13 @@ * load - CQ load being removed. */ ibt_status_t -ibt_free_cq_sched(ibt_hca_hdl_t hca_hdl, ibt_sched_hdl_t sched_hdl, - uint_t load) +ibt_free_cq_sched(ibt_hca_hdl_t hca_hdl, ibt_sched_hdl_t sched_hdl) { - ibt_cq_handler_id_t handler_id = 0; + IBTF_DPRINTF_L3(ibtf_cq, "ibt_free_cq_sched(%p, %p)", + hca_hdl, sched_hdl); - IBTF_DPRINTF_L3(ibtf_cq, "ibt_free_cq_sched(%p, %d, %p)", - hca_hdl, sched_hdl, load); - - /* - * Function not fully implemented should get handler ID from - * sched_hdl. - */ return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_free_cq_sched( - IBTL_HCA2CIHCA(hca_hdl), handler_id)); + IBTL_HCA2CIHCA(hca_hdl), sched_hdl)); } @@ -282,3 +258,17 @@ { return (ibt_cq->cq_clnt_private); } + +/* + * ibt_query_cq_handler_id - Retrieves the attributes of a cq_handler_id. + */ +ibt_status_t +ibt_query_cq_handler_id(ibt_hca_hdl_t hca_hdl, + ibt_cq_handler_id_t hid, ibt_cq_handler_attr_t *attrs) +{ + IBTF_DPRINTF_L3(ibtf_cq, "ibt_query_cq_handler(%p, %d, %p)", + hca_hdl, hid, attrs); + + return (IBTL_HCA2CIHCAOPS_P(hca_hdl)->ibc_query_cq_handler_id( + IBTL_HCA2CIHCA(hca_hdl), hid, attrs)); +}
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_handlers.c Thu Jul 29 22:10:26 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/ib/ibtl/impl/ibtl.h> @@ -211,6 +210,7 @@ case IBT_ERROR_INVALID_REQUEST_QP: case IBT_ERROR_ACCESS_VIOLATION_QP: case IBT_EVENT_EMPTY_QP: + case IBT_FEXCH_ERROR: ibtl_qp = event_p->ev_qp_hdl; if (ibtl_qp == NULL) { IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: "
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c Thu Jul 29 22:10:26 2010 -0700 @@ -156,10 +156,6 @@ } -/* - * The IBTF Module is never unloaded. Actually there is no need of this - * routine, but provided just in case. - */ int _fini(void) { @@ -536,7 +532,7 @@ IBTF_DPRINTF_L2(ibtf, "ibc_attach(%p, %p)", ibc_hdl_p, info_p); /* Validate the Transport API version */ - if (info_p->hca_ci_vers != IBCI_V3) { + if (info_p->hca_ci_vers != IBCI_V4) { IBTF_DPRINTF_L1(ibtf, "ibc_attach: Invalid IB CI Version '%d'", info_p->hca_ci_vers); return (IBC_FAILURE); @@ -595,7 +591,7 @@ hca_devp->hd_ibc_hca_hdl = info_p->hca_handle; hca_devp->hd_ibc_ops = info_p->hca_ops; hca_devp->hd_hca_attr = info_p->hca_attr; - hca_devp->hd_hca_dip = info_p->hca_dip; + hca_devp->hd_hca_dip = info_p->hca_attr->hca_dip; status = ibtl_init_hca_portinfo(hca_devp); if (status != IBT_SUCCESS) { @@ -608,7 +604,7 @@ } /* Register the with MPxIO as PHCI */ - if (ibtl_ibnex_phci_register(info_p->hca_dip) != IBT_SUCCESS) { + if (ibtl_ibnex_phci_register(hca_devp->hd_hca_dip) != IBT_SUCCESS) { mutex_exit(&ibtl_clnt_list_mutex); IBTF_DPRINTF_L1(ibtf, "ibc_attach: MPxIO register failed"); kmem_free(hca_devp, sizeof (ibtl_hca_devinfo_t) +
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_mem.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_mem.c Thu Jul 29 22:10:26 2010 -0700 @@ -892,3 +892,23 @@ return (IBTL_HCA2CIHCAOPS_P(hca)->ibc_deregister_fmr( IBTL_HCA2CIHCA(hca), mr_hdl)); } + +/* + * ibt_register_dma_mr + */ +ibt_status_t +ibt_register_dma_mr(ibt_hca_hdl_t hca, ibt_pd_hdl_t pd, + ibt_dmr_attr_t *mem_attr, ibt_mr_hdl_t *mr_hdl_p, ibt_mr_desc_t *mem_desc) +{ + ibt_status_t status; + + IBTF_DPRINTF_L3(ibtl_mem, "ibt_register_dma_mr(%p, %p, %p)", + hca, pd, mem_attr); + + status = IBTL_HCA2CIHCAOPS_P(hca)->ibc_register_dma_mr( + IBTL_HCA2CIHCA(hca), pd, mem_attr, NULL, mr_hdl_p, mem_desc); + if (status == IBT_SUCCESS) { + atomic_inc_32(&hca->ha_mr_cnt); + } + return (status); +}
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_qp.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_qp.c Thu Jul 29 22:10:26 2010 -0700 @@ -138,14 +138,12 @@ } /* Get CI CQ handles */ - if ((qp_attrp->qp_scq_hdl == NULL) || (qp_attrp->qp_rcq_hdl == NULL)) { - IBTF_DPRINTF_L2(ibtf_qp, "ibt_alloc_qp: Invalid CQ Handle"); - *ibt_qp_p = NULL; - return (IBT_CQ_HDL_INVALID); - } - qp_attrp->qp_ibc_scq_hdl = qp_attrp->qp_scq_hdl->cq_ibc_cq_hdl; - qp_attrp->qp_ibc_rcq_hdl = qp_attrp->qp_rcq_hdl->cq_ibc_cq_hdl; + qp_attrp->qp_ibc_scq_hdl = (qp_attrp->qp_scq_hdl == NULL) ? NULL : + qp_attrp->qp_scq_hdl->cq_ibc_cq_hdl; + qp_attrp->qp_ibc_rcq_hdl = (qp_attrp->qp_rcq_hdl == NULL) ? NULL : + qp_attrp->qp_rcq_hdl->cq_ibc_cq_hdl; + /* Get CI SRQ handle */ if ((qp_attrp->qp_alloc_flags & IBT_QP_USES_SRQ) && (qp_attrp->qp_srq_hdl != NULL)) qp_attrp->qp_ibc_srq_hdl =
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c Thu Jul 29 22:10:26 2010 -0700 @@ -225,11 +225,15 @@ if ((ibt_get_all_part_attr(&attr_list, &nparts) != IBT_SUCCESS) || (nparts == 0)) { + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibd_insts: Failed to " + "IB Part List - %d", nparts); ibds->ibcm_arp_ibd_alloc = 0; ibds->ibcm_arp_ibd_cnt = 0; ibds->ibcm_arp_ip = NULL; return; } + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibd_insts: Found %d IB Part List", + nparts); ibds->ibcm_arp_ibd_alloc = nparts; ibds->ibcm_arp_ibd_cnt = 0; @@ -247,6 +251,14 @@ ipp->ip_hca_guid = attr->pa_hca_guid; ipp->ip_port_gid = port_gid; ibds->ibcm_arp_ibd_cnt++; + + IBTF_DPRINTF_L4(cmlog, "PartAttr: p-linkid %lX, " + "d-linkid %lX, pkey 0x%lX", ipp->ip_linkid, + attr->pa_dlinkid, ipp->ip_pkey); + IBTF_DPRINTF_L4(cmlog, "hca_guid 0x%llX, " + "port_gid %llX \n attr-port_guid %llX", + ipp->ip_hca_guid, ipp->ip_port_gid.gid_guid, + attr->pa_port_guid); } attr++; } @@ -296,12 +308,13 @@ bzero(&lifn, sizeof (struct lifnum)); lifn.lifn_family = family_loc; + lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; err = ibcm_do_ip_ioctl(SIOCGLIFNUM, sizeof (struct lifnum), &lifn); if (err != 0) return (err); - IBTF_DPRINTF_L4(cmlog, "ibcm_do_lifconf: Family %d, lifn_count %d", + IBTF_DPRINTF_L3(cmlog, "ibcm_do_lifconf: Family %d, lifn_count %d", family_loc, lifn.lifn_count); /* * Pad the interface count to account for additional interfaces that @@ -314,6 +327,7 @@ lifcp->lifc_family = family_loc; lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq); lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP); + lifcp->lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES; err = ibcm_do_ip_ioctl(SIOCGLIFCONF, sizeof (struct lifconf), lifcp); if (err != 0) { @@ -329,7 +343,7 @@ datalink_id_t linkid; int i; - IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: linkname = %s\n", linkname); + IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: linkname = %s", linkname); /* * If at first we don't succeed, try again, just in case it is in @@ -339,8 +353,8 @@ */ if (dls_mgmt_get_linkid(linkname, &linkid) != 0) { if (dls_devnet_macname2linkid(linkname, &linkid) != 0) { - IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: could not " - "get linkid from linkname\n"); + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_lookup: could not " + "get linkid from linkname (%s)", linkname); return (NULL); } } @@ -350,7 +364,8 @@ return (&ibds->ibcm_arp_ip[i]); } - IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: returning NULL\n"); + IBTF_DPRINTF_L2(cmlog, "ibcm_arp_lookup: returning NULL for " + "linkname (%s)", linkname); return (NULL); } @@ -364,8 +379,13 @@ int i, nifs, naddr = 0; uint_t bufsize; struct lifconf lifc; - struct lifreq *lifrp; + struct lifreq *lifrp, lifr_copy; ibcm_arp_ip_t *ipp; + lifgroupinfo_t lifgr; + int err; + char ifname[LIFNAMSIZ + 1]; + uint64_t ifflags = 0; + zoneid_t ifzoneid; if (ibcm_do_lifconf(&lifc, &bufsize, family_loc) != 0) return (B_FALSE); @@ -376,12 +396,95 @@ family_loc, nifs); for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) { + if (lifrp->lifr_type != IFT_IB) continue; - if ((ipp = ibcm_arp_lookup(ibds, lifrp->lifr_name)) == NULL) + IBTF_DPRINTF_L4(cmlog, "\nInterface# : %d", i); + IBTF_DPRINTF_L4(cmlog, "lifr_name : %s, lifr_family :%X, " + "lifr_type : 0x%lX", lifrp->lifr_name, + lifrp->lifr_addr.ss_family, lifrp->lifr_type); + + (void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ); + + /* Get ZoneId. */ + lifr_copy = *lifrp; + ifzoneid = 0; + err = ibcm_do_ip_ioctl(SIOCGLIFZONE, sizeof (struct lifreq), + &lifr_copy); + if (err != 0) { + IBTF_DPRINTF_L2(cmlog, "IFZONE ioctl Failed: err = %d", + err); + } else { + IBTF_DPRINTF_L4(cmlog, "lifr_zoneid : 0x%X", + lifr_copy.lifr_zoneid); + ifzoneid = lifr_copy.lifr_zoneid; + } + + /* Get IfIndex. */ + lifr_copy = *lifrp; + err = ibcm_do_ip_ioctl(SIOCGLIFINDEX, sizeof (struct lifreq), + &lifr_copy); + if (err != 0) { + IBTF_DPRINTF_L2(cmlog, "IFINDEX ioctl Failed: err = %d", + err); + } else + IBTF_DPRINTF_L4(cmlog, "lifr_index : 0x%X", + lifr_copy.lifr_index); + + /* Get Interface flags. */ + lifr_copy = *lifrp; + err = ibcm_do_ip_ioctl(SIOCGLIFFLAGS, sizeof (struct lifreq), + &lifr_copy); + if (err != 0) { + IBTF_DPRINTF_L2(cmlog, "IFFLAGS ioctl Failed: err = %d", + err); + } else { + ifflags = lifr_copy.lifr_flags; + IBTF_DPRINTF_L4(cmlog, "lifr_flags : 0x%llX", + ifflags); + } + + lifr_copy = *lifrp; + err = ibcm_do_ip_ioctl(SIOCGLIFGROUPNAME, + sizeof (struct lifreq), &lifr_copy); + if (err != 0) { + IBTF_DPRINTF_L3(cmlog, "IFGroupName ioctl Failed: " + "err = %d", err); + } + + if (lifr_copy.lifr_groupname[0] != '\0') { + IBTF_DPRINTF_L4(cmlog, "lifr_groupname : %s", + lifr_copy.lifr_groupname); + (void) strlcpy(lifgr.gi_grname, + lifr_copy.lifr_groupname, LIFGRNAMSIZ); + err = ibcm_do_ip_ioctl(SIOCGLIFGROUPINFO, + sizeof (struct lifgroupinfo), &lifgr); + if (err != 0) { + IBTF_DPRINTF_L2(cmlog, "IFGroupINFO ioctl " + "Failed: err = %d", err); + } else { + IBTF_DPRINTF_L4(cmlog, "lifgroupinfo details"); + IBTF_DPRINTF_L4(cmlog, "grname : %s, grifname :" + " %s, m4ifname : %s, m6ifname : %s", + lifgr.gi_grname, lifgr.gi_grifname, + lifgr.gi_m4ifname, lifgr.gi_m6ifname); + IBTF_DPRINTF_L4(cmlog, "gi_bcifname : %s", + lifgr.gi_bcifname); + IBTF_DPRINTF_L4(cmlog, "gi_v4 %d, gi_v6 %d, " + "gi_nv4 %d, gi_nv6 %d, gi_mactype %d", + lifgr.gi_v4, lifgr.gi_v6, lifgr.gi_nv4, + lifgr.gi_nv6, lifgr.gi_mactype); + + (void) strlcpy(ifname, lifgr.gi_bcifname, + LIFNAMSIZ); + } + } + + if ((ipp = ibcm_arp_lookup(ibds, ifname)) == NULL) continue; + ipp->ip_zoneid = ifzoneid; /* Copy back the zoneid info */ switch (lifrp->lifr_addr.ss_family) { case AF_INET: ipp->ip_inet_family = AF_INET;
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_path.c Thu Jul 29 22:10:26 2010 -0700 @@ -3767,8 +3767,9 @@ dgid1.gid_prefix = dgid1.gid_guid = 0; sgid.gid_prefix = sgid.gid_guid = 0; - retval = ibcm_arp_get_ibaddr(getzoneid(), p_arg->attr.ipa_src_ip, - p_arg->attr.ipa_dst_ip[0], &sgid, &dgid1, &src_ip_p); + retval = ibcm_arp_get_ibaddr(p_arg->attr.ipa_zoneid, + p_arg->attr.ipa_src_ip, p_arg->attr.ipa_dst_ip[0], &sgid, + &dgid1, &src_ip_p); if (retval) { IBTF_DPRINTF_L2(cmlog, "ibcm_process_get_ip_paths: " "ibcm_arp_get_ibaddr() failed: %d", retval); @@ -3806,7 +3807,7 @@ if (p_arg->attr.ipa_ndst > 1) { /* Get DGID for all specified Dest IP Addr */ for (; i < p_arg->attr.ipa_ndst; i++) { - retval = ibcm_arp_get_ibaddr(getzoneid(), + retval = ibcm_arp_get_ibaddr(p_arg->attr.ipa_zoneid, p_arg->attr.ipa_src_ip, p_arg->attr.ipa_dst_ip[i], NULL, &dgid2, NULL); if (retval) { @@ -4299,8 +4300,9 @@ /* If optional attributes are specified, validate them. */ if (attrp) { /* Get SGID and DGID for the specified input ip-addr */ - retval = ibcm_arp_get_ibaddr(getzoneid(), attrp->apa_src_ip, - attrp->apa_dst_ip, &new_sgid, &new_dgid, NULL); + retval = ibcm_arp_get_ibaddr(attrp->apa_zoneid, + attrp->apa_src_ip, attrp->apa_dst_ip, &new_sgid, + &new_dgid, NULL); if (retval) { IBTF_DPRINTF_L2(cmlog, "ibt_get_ip_alt_path: " "ibcm_arp_get_ibaddr() failed: %d", retval);
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_ti.c Thu Jul 29 22:10:26 2010 -0700 @@ -5927,6 +5927,163 @@ /* * Function: + * ibt_lid_to_node_info() + * Input: + * lid Identifies the IB Node and port for which to obtain + * Node information. + * Output: + * node_info_p A pointer to an ibt_node_info_t structure (allocated + * by the caller) in which to return the node information. + * Returns: + * IBT_SUCCESS + * IBT_INVALID_PARAM + * IBT_NODE_RECORDS_NOT_FOUND + * IBT_NO_HCAS_AVAILABLE + * Description: + * Retrieve Node Information for the specified LID. + */ +ibt_status_t +ibt_lid_to_node_info(ib_lid_t lid, ibt_node_info_t *node_info_p) +{ + ibt_status_t retval; + ibcm_hca_info_t *hcap; + uint8_t i, j; + ib_guid_t *guid_array = NULL; + uint_t num_hcas = 0; + + + IBTF_DPRINTF_L4(cmlog, "ibt_lid_to_node_info(0x%lX, %p)", + lid, node_info_p); + + if ((lid == 0) || (node_info_p == NULL)) { + IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: " + "Lid is zero, or node_info_p is NULL."); + return (IBT_INVALID_PARAM); + } + + /* Get the number of HCAs and their GUIDs */ + num_hcas = ibt_get_hca_list(&guid_array); + IBTF_DPRINTF_L4(cmlog, "ibt_lid_to_node_info: ibt_get_hca_list " + "returned %d hcas", num_hcas); + + if (num_hcas == 0) { + IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: " + "NO HCA's Found on this system"); + return (IBT_NO_HCAS_AVAILABLE); + } + + for (i = 0; i < num_hcas; i++) { + hcap = ibcm_find_hca_entry(guid_array[i]); + if (hcap == NULL) { + IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: " + "HCA(%llX) info not found", guid_array[i]); + retval = IBT_NO_HCAS_AVAILABLE; + continue; + } + + for (j = 0; j < hcap->hca_num_ports; j++) { + uint8_t port; + ibmf_saa_handle_t saa_handle; + uint_t num_rec; + size_t len; + void *res_p; + sa_node_record_t nr_req, *nr_resp; + + port = j + 1; + + /* Get SA Access Handle. */ + saa_handle = ibcm_get_saa_handle(hcap, port); + if (saa_handle == NULL) { + IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: " + "Port %d of HCA (%llX) is NOT ACTIVE", + port, guid_array[i]); + retval = IBT_NODE_RECORDS_NOT_FOUND; + continue; + } + + /* Retrieve Node Records from SA Access. */ + bzero(&nr_req, sizeof (sa_node_record_t)); + + nr_req.LID = lid; /* LID */ + + retval = ibcm_get_node_rec(saa_handle, &nr_req, + SA_NODEINFO_COMPMASK_NODELID, &res_p, &len); + if (retval == IBT_NODE_RECORDS_NOT_FOUND) { + IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: " + "failed (%d) to get Node records", retval); + continue; + } else if (retval != IBT_SUCCESS) { + IBTF_DPRINTF_L2(cmlog, "ibt_lid_to_node_info: " + "failed (%d) to get Node records", retval); + ibcm_dec_hca_acc_cnt(hcap); + goto lid_to_ni_exit; + } + + num_rec = len/sizeof (sa_node_record_t); + nr_resp = (sa_node_record_t *)(uchar_t *)res_p; + + /* Validate the returned number of records. */ + if ((nr_resp != NULL) && (num_rec > 0)) { + + IBCM_DUMP_NODE_REC(nr_resp); + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS( + *node_info_p)) + + node_info_p->n_sys_img_guid = + nr_resp->NodeInfo.SystemImageGUID; + node_info_p->n_node_guid = + nr_resp->NodeInfo.NodeGUID; + node_info_p->n_port_guid = + nr_resp->NodeInfo.PortGUID; + node_info_p->n_dev_id = + nr_resp->NodeInfo.DeviceID; + node_info_p->n_revision = + nr_resp->NodeInfo.Revision; + node_info_p->n_vendor_id = + nr_resp->NodeInfo.VendorID; + node_info_p->n_num_ports = + nr_resp->NodeInfo.NumPorts; + node_info_p->n_port_num = + nr_resp->NodeInfo.LocalPortNum; + node_info_p->n_node_type = + nr_resp->NodeInfo.NodeType; + (void) strncpy(node_info_p->n_description, + (char *)&nr_resp->NodeDescription, 64); + + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS( + *node_info_p)) + + /* + * Deallocate the memory allocated by SA for + * 'nr_resp'. + */ + ibcm_dec_hca_acc_cnt(hcap); + kmem_free(nr_resp, len); + retval = IBT_SUCCESS; + + goto lid_to_ni_exit; + } else { + retval = IBT_NODE_RECORDS_NOT_FOUND; + IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: " + "Node Records NOT found - LID 0x%lX", + lid); + } + } + ibcm_dec_hca_acc_cnt(hcap); + } + +lid_to_ni_exit: + if (guid_array) + ibt_free_hca_list(guid_array, num_hcas); + + IBTF_DPRINTF_L3(cmlog, "ibt_lid_to_node_info: done. Status %d", retval); + + return (retval); +} + +/* + * Function: * ibt_get_companion_port_gids() * Description: * Get list of GID's available on a companion port(s) of the specified @@ -6336,64 +6493,105 @@ /* RDMA IP CM Support routines */ ibt_status_t -ibt_get_src_ip(ib_gid_t gid, ib_pkey_t pkey, ibt_ip_addr_t *src_ip) +ibt_get_src_ip(ibt_srcip_attr_t *sattr, ibt_srcip_info_t **src_info_p, + uint_t *entries_p) { + ibt_srcip_info_t *s_ip; ibcm_arp_ip_t *ipp; ibcm_arp_ibd_insts_t ibds; - int i; - boolean_t found = B_FALSE; + uint8_t i, j; + uint_t count; ibt_status_t retval = IBT_SUCCESS; - IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip(%llX:%llX, %X, %p)", - gid.gid_prefix, gid.gid_guid, pkey, src_ip); - - if (gid.gid_prefix == 0 || gid.gid_guid == 0) { + IBTF_DPRINTF_L4(cmlog, "ibt_get_src_ip(%p, %p, %p)", + sattr, src_info_p, entries_p); + + if (sattr == NULL || entries_p == NULL) { + IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid I/P Args."); + return (IBT_INVALID_PARAM); + } + + if (sattr->sip_gid.gid_prefix == 0 || sattr->sip_gid.gid_guid == 0) { IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: Invalid GID."); return (IBT_INVALID_PARAM); } - if (src_ip == NULL) { - IBTF_DPRINTF_L3(cmlog, "ibt_get_src_ip: ERROR: src_ip NULL"); - return (IBT_INVALID_PARAM); - } - - retval = ibcm_arp_get_ibds(&ibds, AF_UNSPEC); + /* TBD: Zoneid */ + retval = ibcm_arp_get_ibds(&ibds, sattr->sip_family); if (retval != IBT_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibt_get_src_ip: ibcm_arp_get_ibds " "failed to get IBD Instances: ret 0x%x", retval); goto get_src_ip_end; } + count = 0; for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt; i++, ipp++) { if (ipp->ip_inet_family == AF_UNSPEC) continue; - if (ipp->ip_port_gid.gid_prefix == gid.gid_prefix && - ipp->ip_port_gid.gid_guid == gid.gid_guid) { - if (pkey) { - if (ipp->ip_pkey == pkey) { - found = B_TRUE; - break; - } else - continue; - } - found = B_TRUE; + if (ipp->ip_port_gid.gid_prefix == sattr->sip_gid.gid_prefix && + ipp->ip_port_gid.gid_guid == sattr->sip_gid.gid_guid) { + if ((sattr->sip_pkey) && + (ipp->ip_pkey != sattr->sip_pkey)) + continue; + + if ((sattr->sip_zoneid != ALL_ZONES) && + (sattr->sip_zoneid != ipp->ip_zoneid)) + continue; + + count++; break; } } - if (found == B_FALSE) { - retval = IBT_SRC_IP_NOT_FOUND; + if (count) { + /* + * Allocate memory for return buffer, to be freed by + * ibt_free_srcip_info(). + */ + s_ip = kmem_alloc((count * sizeof (ibt_srcip_info_t)), + KM_SLEEP); + + *src_info_p = s_ip; + *entries_p = count; + + j = 0; + for (i = 0, ipp = ibds.ibcm_arp_ip; i < ibds.ibcm_arp_ibd_cnt; + i++, ipp++) { + if (ipp->ip_inet_family == AF_UNSPEC) + continue; + if ((ipp->ip_port_gid.gid_prefix == + sattr->sip_gid.gid_prefix) && + (ipp->ip_port_gid.gid_guid == + sattr->sip_gid.gid_guid)) { + if ((sattr->sip_pkey) && + (ipp->ip_pkey != sattr->sip_pkey)) + continue; + + if ((sattr->sip_zoneid != ALL_ZONES) && + (sattr->sip_zoneid != ipp->ip_zoneid)) + continue; + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*s_ip)) + s_ip[j].ip_addr.family = ipp->ip_inet_family; + _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*s_ip)) + if (s_ip[j].ip_addr.family == AF_INET) { + bcopy(&ipp->ip_cm_sin.sin_addr, + &s_ip[j].ip_addr.un.ip4addr, + sizeof (in_addr_t)); + } else if (s_ip[j].ip_addr.family == AF_INET6) { + bcopy(&ipp->ip_cm_sin6.sin6_addr, + &s_ip[j].ip_addr.un.ip6addr, + sizeof (in6_addr_t)); + /* TBD: scope_id */ + } + IBCM_PRINT_IP("ibt_get_src_ip", + &s_ip[j].ip_addr); + j++; + } + } } else { - src_ip->family = ipp->ip_inet_family; - if (src_ip->family == AF_INET) { - bcopy(&ipp->ip_cm_sin.sin_addr, &src_ip->un.ip4addr, - sizeof (in_addr_t)); - } else if (src_ip->family == AF_INET6) { - bcopy(&ipp->ip_cm_sin6.sin6_addr, &src_ip->un.ip6addr, - sizeof (in6_addr_t)); - } - IBCM_PRINT_IP("ibt_get_src_ip", src_ip); + retval = IBT_SRC_IP_NOT_FOUND; } get_src_ip_end: @@ -6401,6 +6599,28 @@ return (retval); } +/* + * ibt_free_srcip_info() + * Free the memory allocated by successful ibt_get_src_ip() + * + * src_info Pointer returned by ibt_get_src_ip(). + * + * entries The number of ibt_ip_addr_t entries to free. + */ +void +ibt_free_srcip_info(ibt_srcip_info_t *src_info, uint_t entries) +{ + IBTF_DPRINTF_L3(cmlog, "ibt_free_srcip_info: " + "Free <%d> entries from 0x%p", entries, src_info); + + if ((src_info != NULL) && (entries > 0)) + kmem_free(src_info, entries * sizeof (ibt_srcip_info_t)); + else + IBTF_DPRINTF_L2(cmlog, "ibt_free_srcip_info: " + "ERROR: NULL buf pointer or ZERO length specified."); +} + + ib_svc_id_t ibt_get_ip_sid(uint8_t protocol_num, in_port_t dst_port) {
--- a/usr/src/uts/common/io/warlock/hermon.wlcmd Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/warlock/hermon.wlcmd Thu Jul 29 22:10:26 2010 -0700 @@ -19,8 +19,7 @@ # CDDL HEADER END # # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. +# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. # one hermon_state_s @@ -63,6 +62,7 @@ root hermon_ci_reregister_mr root hermon_ci_reregister_buf root hermon_ci_sync_mr +root hermon_ci_register_dma_mr root hermon_ci_alloc_mw root hermon_ci_free_mw root hermon_ci_query_mw @@ -93,6 +93,14 @@ root hermon_ci_deregister_fmr root hermon_ci_alloc_io_mem root hermon_ci_free_io_mem +root hermon_ci_query_cq_handler_id +root hermon_ci_not_supported + +### Hermon misc. (currently unused) +root hermon_config_int_mod +root hermon_hw_health_check +root hermon_rsrc_alloc_uarhdl +root hermon_sense_port_post ### Hermon Firmware commands (currently unused) root hermon_sync_tpt_cmd_post @@ -105,20 +113,23 @@ ### Hermon Event Queue function pointers add hermon_eq_poll/eqfunction targets hermon_cq_handler \ - hermon_cq_err_handler hermon_comm_estbl_handler hermon_eq_arm \ + hermon_cq_err_handler hermon_comm_estbl_handler \ hermon_port_state_change_handler hermon_invreq_local_wq_err_handler \ hermon_local_acc_vio_wq_err_handler \ hermon_path_mig_handler hermon_path_mig_err_handler \ hermon_local_wq_cat_err_handler hermon_sendq_drained_handler \ - hermon_eq_overflow_handler hermon_cmd_complete_handler \ + hermon_cmd_complete_handler hermon_fexch_error_handler \ hermon_srq_last_wqe_reached_handler hermon_catastrophic_handler \ - hermon_ecc_detection_handler hermon_no_eqhandler hermon_eq_demux + hermon_no_eqhandler hermon_eq_demux ### Hermon agent callbacks root hermon_agent_handle_req root hermon_agent_request_cb root hermon_agent_response_cb +### Hermon interrupt allocation callback +root hermon_intr_cb_handler + ### Hermon kmem_cache constructors and destructors root hermon_rsrc_cqhdl_constructor root hermon_rsrc_cqhdl_destructor @@ -162,11 +173,9 @@ root hermon_qpn_avl_compare ### Hermon FMR processing -root hermon_fmr_avl_compare -root hermon_fmr_processing add hermon_sw_fmr_s::fmr_flush_function targets warlock_dummy -### Hermon FMR processing +### Hermon FMA processing root hermon_inter_err_chk add bus_ops::bus_add_eventcall targets warlock_dummy
--- a/usr/src/uts/common/io/warlock/ibcm.wlcmd Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/warlock/ibcm.wlcmd Thu Jul 29 22:10:26 2010 -0700 @@ -18,8 +18,7 @@ # # CDDL HEADER END # -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. +# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. # # ibcm api entry points @@ -62,13 +61,12 @@ root ibt_get_ip_sid root ibt_release_ip_sid root ibt_get_src_ip +root ibt_free_srcip_info +root ibt_lid_to_node_info root ibt_ofuvcm_get_req_data root ibt_ofuvcm_proceed -root ibcm_arp_get_srcip_plist -root ibcm_arp_get_ibd_insts_cb - # callback entry points from ibmf root ibcm_recv_cb root ibcm_post_req_complete
--- a/usr/src/uts/common/io/warlock/ibtl.wlcmd Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/warlock/ibtl.wlcmd Thu Jul 29 22:10:26 2010 -0700 @@ -57,6 +57,7 @@ root ibt_deregister_fmr root ibt_enable_cq_notify root ibt_set_cq_handler +root ibt_query_cq_handler_id root ibt_alloc_qp root ibt_initialize_qp root ibt_alloc_special_qp @@ -115,6 +116,7 @@ root ibt_query_rc_channel root ibt_modify_rc_channel root ibt_alloc_ud_channel +root ibt_alloc_ud_channel_range root ibt_query_ud_channel root ibt_modify_ud_channel root ibt_recover_ud_channel @@ -156,6 +158,7 @@ root ibt_get_part_attr root ibt_get_all_part_attr root ibt_free_part_attr +root ibt_register_dma_mr # IBTF CI api entry points root ibc_init @@ -252,6 +255,7 @@ add ibc_operations_s::ibc_alloc_mw targets warlock_dummy add ibc_operations_s::ibc_alloc_pd targets warlock_dummy add ibc_operations_s::ibc_alloc_qp targets warlock_dummy +add ibc_operations_s::ibc_alloc_qp_range targets warlock_dummy add ibc_operations_s::ibc_alloc_special_qp targets warlock_dummy add ibc_operations_s::ibc_attach_mcg targets warlock_dummy add ibc_operations_s::ibc_ci_data_in targets warlock_dummy @@ -275,6 +279,7 @@ add ibc_operations_s::ibc_post_send targets warlock_dummy add ibc_operations_s::ibc_query_ah targets warlock_dummy add ibc_operations_s::ibc_query_cq targets warlock_dummy +add ibc_operations_s::ibc_query_cq_handler_id targets warlock_dummy add ibc_operations_s::ibc_query_hca_ports targets warlock_dummy add ibc_operations_s::ibc_query_mr targets warlock_dummy add ibc_operations_s::ibc_query_mw targets warlock_dummy @@ -306,6 +311,7 @@ add ibc_operations_s::ibc_free_io_mem targets warlock_dummy add ibc_operations_s::ibc_map_mem_iov targets warlock_dummy add ibc_operations_s::ibc_unmap_mem_iov targets warlock_dummy +add ibc_operations_s::ibc_register_dma_mr targets warlock_dummy add ibt_clnt_modinfo_s::mi_async_handler targets warlock_dummy add ibtl_handlers.c:ibtl_async_client_call/async_handler targets warlock_dummy add ibtl_handlers.c:ibtl_cq_handler_call/cq_handler targets warlock_dummy
--- a/usr/src/uts/common/io/warlock/tavor.wlcmd Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/io/warlock/tavor.wlcmd Thu Jul 29 22:10:26 2010 -0700 @@ -1,6 +1,5 @@ # -# Copyright 2010 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. +# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. # one tavor_state_s @@ -73,6 +72,8 @@ root tavor_ci_unmap_mem_iov root tavor_ci_alloc_io_mem root tavor_ci_free_io_mem +root tavor_ci_register_dma_mr +root tavor_ci_not_supported ### Tavor Firmware commands (currently unused) root tavor_sync_tpt_cmd_post
--- a/usr/src/uts/common/rpc/rpcib.c Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/rpc/rpcib.c Thu Jul 29 22:10:26 2010 -0700 @@ -729,6 +729,7 @@ cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP); cq->rib_hca = hca; + bzero(&cq_attr, sizeof (cq_attr)); cq_attr.cq_size = cq_size; cq_attr.cq_flags = IBT_CQ_NO_FLAGS; status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl,
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon.h Thu Jul 29 22:10:26 2010 -0700 @@ -64,6 +64,7 @@ #include <sys/ib/adapters/hermon/hermon_cmd.h> #include <sys/ib/adapters/hermon/hermon_cq.h> #include <sys/ib/adapters/hermon/hermon_event.h> +#include <sys/ib/adapters/hermon/hermon_fcoib.h> #include <sys/ib/adapters/hermon/hermon_ioctl.h> #include <sys/ib/adapters/hermon/hermon_misc.h> #include <sys/ib/adapters/hermon/hermon_mr.h> @@ -99,7 +100,7 @@ #define HERMON_ONCLOSE_FLASH_INPROGRESS (1 << 0) -#define HERMON_MSIX_MAX 8 /* max # of interrupt vectors */ +#define HERMON_MSIX_MAX 256 /* max # of interrupt vectors */ /* * VPD header size - or more rightfully, the area of interest for fwflash @@ -139,17 +140,16 @@ /* * Macro used to output HCA warning messages. Note: HCA warning messages * are only generated when an unexpected condition has been detected. This - * can be the result of a software bug or some other problem, but it is more - * often an indication that the HCA firmware (and/or hardware) has done - * something unexpected. This warning message means that the driver state - * in unpredictable and that shutdown/restart is suggested. + * can be the result of a software bug or some other problem. Previously + * this was used for hardware errors, but those now use HERMON_FMANOTE + * instead, indicating that the driver state is more likely in an + * unpredictable state, and that shutdown/restart is suggested. + * + * HERMON_WARNING messages are not considered important enough to print + * to the console, just to the message log. */ #define HERMON_WARNING(state, string) \ - cmn_err(CE_WARN, "hermon%d: %s\n", (state)->hs_instance, string) - - -#define HERMON_NOTE(state, string) \ - cmn_err(CE_CONT, "hermon%d: %s\n", (state)->hs_instance, string) + cmn_err(CE_CONT, "!hermon%d: %s\n", (state)->hs_instance, string) /* * Macro used to set attach failure messages. Also, the attach message buf @@ -352,27 +352,17 @@ struct hermon_state_s { dev_info_t *hs_dip; int hs_instance; -int hs_debug; /* for debug, a way of tracing */ -uint32_t hs_debug_lev; /* for controlling prints, a bit mask */ - /* see hermon.c for setting it */ + /* PCI device, vendor, and revision IDs */ uint16_t hs_vendor_id; uint16_t hs_device_id; uint8_t hs_revision_id; -struct hermon_hw_qpc_s hs_debug_qpc; -struct hermon_hw_cqc_s hs_debug_cqc; -struct hermon_hw_eqc_s hs_debug_eqc; - - hermon_hw_sm_perfcntr_t hs_debug_perf; - - /* * DMA information for the InfiniHost Context Memory (ICM), * ICM Auxiliary allocation and the firmware. Also, record * of ICM and ICMA sizes, in bytes. */ - /* JBDB -- store here hs_icm_table, with hs_icm_dma in */ uint64_t hs_icm_sz; hermon_icm_table_t *hs_icm; @@ -389,9 +379,17 @@ ddi_intr_handle_t hs_intrmsi_hdl[HERMON_MSIX_MAX]; uint_t hs_intrmsi_pri; int hs_intrmsi_cap; + ddi_cb_handle_t hs_intr_cb_hdl; - /* assign EQs to CQs in a round robin fashion */ - uint_t hs_eq_dist; /* increment when used */ + /* Do not use reserved EQs */ + uint_t hs_rsvd_eqs; + uint_t hs_cq_erreqnum; + + /* cq_sched data */ + kmutex_t hs_cq_sched_lock; + hermon_cq_sched_t *hs_cq_sched_array; + hermon_cq_sched_t hs_cq_sched_default; + uint_t hs_cq_sched_array_size; /* hermon HCA name and HCA part number */ char hs_hca_name[64]; @@ -555,9 +553,6 @@ */ hermon_pdhdl_t hs_pdhdl_internal; hermon_eqhdl_t hs_eqhdl[HERMON_NUM_EQ]; - hermon_cqhdl_t *hs_cqhdl; - hermon_qphdl_t *hs_qphdl; - hermon_srqhdl_t *hs_srqhdl; kmutex_t hs_dbr_lock; /* lock for dbr mgmt */ /* linked list of kernel dbr resources */ @@ -706,6 +701,10 @@ mod_hash_t *hs_fm_test_hash; /* testset */ mod_hash_t *hs_fm_id_hash; /* testid */ #endif + /* FCoIB data */ + hermon_fcoib_t hs_fcoib; + boolean_t hs_fcoib_may_be_running; /* cq_poll test */ + /* * Hermon fastreboot support. To sw-reset Hermon HCA, the driver * needs to save/restore MSI-X tables and PBA. Those members are @@ -889,7 +888,7 @@ #define HERMON_ICM_SPLIT 64 #define HERMON_ICM_SPAN 4096 -#define hermon_bitmap(bitmap, dma_info, icm_table, split_index) \ +#define hermon_bitmap(bitmap, dma_info, icm_table, split_index, num_to_hdl) \ bitmap = (icm_table)->icm_bitmap[split_index]; \ if (bitmap == NULL) { \ _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(icm_table))) \ @@ -901,6 +900,12 @@ (icm_table)->icm_dma[split_index] = \ kmem_zalloc(num_spans * sizeof (hermon_dma_info_t), \ KM_SLEEP); \ + if (num_to_hdl) { \ + ASSERT((icm_table)->num_to_hdl[split_index] == NULL); \ + (icm_table)->num_to_hdl[split_index] = \ + kmem_zalloc(num_spans * \ + sizeof (void **), KM_SLEEP); \ + } \ } \ dma_info = (icm_table)->icm_dma[split_index] @@ -934,9 +939,10 @@ uint32_t rsrc_mask; uint16_t log_num_entries; uint16_t log_object_size; - /* two arrays of pointers, each pointer points to arrays */ + /* three arrays of pointers, each pointer points to arrays */ uint8_t *icm_bitmap[HERMON_ICM_SPLIT]; hermon_dma_info_t *icm_dma[HERMON_ICM_SPLIT]; + void ***num_to_hdl[HERMON_ICM_SPLIT]; /* qp/cq/srq */ }; /* * Split the rsrc index into three pieces: @@ -959,6 +965,10 @@ uint32_t icm_index1, uint32_t icm_index2); void hermon_icm_free(hermon_state_t *state, hermon_rsrc_type_t type, uint32_t icm_index1, uint32_t icm_index2); +void *hermon_icm_num_to_hdl(hermon_state_t *state, hermon_rsrc_type_t type, + uint32_t idx); +void hermon_icm_set_num_to_hdl(hermon_state_t *state, hermon_rsrc_type_t type, + uint32_t idx, void *hdl); int hermon_device_mode(hermon_state_t *state); /* Defined in hermon_umap.c */
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_cmd.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_cmd.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_CMD_H @@ -149,10 +148,13 @@ #define SW2HW_SRQ 0x35 #define HW2SW_SRQ 0x36 #define QUERY_SRQ 0x37 -/* new in hermon, replaces part of modifyMPT */ +/* new in hermon, replaces part of modify MPT */ #define RESIZE_SRQ 0X44 /* new in hermon, set limit water mark */ #define ARM_RQ 0X40 +/* new in hermon (PRM 0.36) configure interrupt moderation */ +#define CONFIG_INT_MOD 0X45 +#define HW_HEALTH_CHECK 0X50 /* Multicast Group Commands */ #define READ_MGM 0x25 @@ -167,6 +169,14 @@ #define DIAG_RPRT 0x30 #define CMD_NOP 0x31 +#define SET_VLAN_FLTR 0x47 +#define SET_MCAST_FLTR 0x48 + +#define CONFIG_FC 0x4A +#define QUERY_FC 0x4B +#define HEART_BEAT_RQ 0x4C + +#define SENSE_PORT 0x4D /* ICM and related commands - w/out LAM commands from Arbel */ #define RUN_FW 0xFF6 @@ -182,7 +192,6 @@ * Commands mentioned but not defined in PRM v35 * REL_ICM_AUX * INIT_VM - * HEART_BEAT_RQ */ /* @@ -731,6 +740,7 @@ * commands: * QUERY_DEV_LIM/CAP, QUERY_FW, QUERY_ADAPTER, QUERY_HCA, QUERY_MPT, * QUERY_EQ, QUERY_CQ, and QUERY_QP. + * New with FCoIB, QUERY_FC */ int hermon_cmn_query_cmd_post(hermon_state_t *state, uint_t opcode, uint_t opmod, uint_t queryindx, void *query, uint_t size, uint_t sleepflag); @@ -800,6 +810,12 @@ uint_t qptype, uint_t sleepflag, uint_t opmod); /* + * Get FEXCH HEART BEAT + */ +int hermon_get_heart_beat_rq_cmd_post(hermon_state_t *state, uint_t qpindx, + uint64_t *outparm); + +/* * MGID_HASH, READ_MGM, and WRITE_MGM - used for manipulation of the * hardware resource tables for multicast groups. * NOTE: for intial implementation these functions retain their original @@ -833,7 +849,6 @@ * to resize the SRQ, by passing the new information in the same format as * the original srqc, which the HCA will update appropriately */ - int hermon_resize_srq_cmd_post(hermon_state_t *state, hermon_hw_srqc_t *srq, uint_t srqnum, uint_t sleepflag); @@ -842,12 +857,40 @@ */ int hermon_nop_post(hermon_state_t *state, uint_t interval, uint_t sleep); int hermon_setdebug_post(hermon_state_t *state); + /* * READ_MTT - used to read an mtt entry at address. */ int hermon_read_mtt_cmd_post(hermon_state_t *state, uint64_t mtt_addr, hermon_hw_mtt_t *mtt); +/* + * SENSE_PORT - used to send protocol running on a port + */ +int hermon_sense_port_post(hermon_state_t *state, uint_t portnum, + uint32_t *protocol); + +/* + * CONFIG_FC - used to do either a basic config passing in + * *hermon_hw_config_fc_basic_s, or config the N_Port table. + * passing in pointer to an array of 32-bit id's + * Note that either one needs to be cast to void * + */ +int hermon_config_fc_cmd_post(hermon_state_t *state, void *cfginfo, int enable, + int selector, int n_ports, int portnum, uint_t sleepflag); + +/* + * CONFIG_INT_MOD - used to configure INTERRUPT moderation + */ +int hermon_config_int_mod(hermon_state_t *state, uint_t min_delay, + uint_t vector); + +/* + * HW_HEALTH_CHECK - tests state of the HCA + * if command fails, *health is invalid/undefined + */ +int hermon_hw_health_check(hermon_state_t *state, int *health); + #ifdef __cplusplus } #endif
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_cq.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_cq.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_CQ_H @@ -57,18 +56,10 @@ * The following defines the default number of Completion Queues. This * is controllable via the "hermon_log_num_cq" configuration variable. * We also have a define for the minimum size of a CQ. CQs allocated - * with size 0, 1, 2, or 3 will always get back a CQ of size 4. - */ -#define HERMON_NUM_CQ_SHIFT 0x10 -/* - * #define HERMON_CQ_MIN_SIZE 0x3 + * with size "less than a page" will always get back a page. */ +#define HERMON_NUM_CQ_SHIFT 0x12 -/* - * #define HERMON_CQ_MIN_SIZE 0xFF testing, try min 1 page - */ - -/* page div 32 (cqe size) minus 1, for min size */ #define HERMON_CQ_MIN_SIZE ((PAGESIZE / 32) - 1) /* @@ -97,7 +88,7 @@ * the associated QP. */ #define HERMON_CQE_SND_NOP 0x0 -#define HERMON_CQE_SND_SND_INV 0x1 +#define HERMON_CQE_SND_SEND_INV 0x1 #define HERMON_CQE_SND_RDMAWR 0x8 #define HERMON_CQE_SND_RDMAWR_IMM 0x9 #define HERMON_CQE_SND_SEND 0xA @@ -116,7 +107,7 @@ #define HERMON_CQE_RCV_RDMAWR_IMM 0x00 #define HERMON_CQE_RCV_SEND 0x01 #define HERMON_CQE_RCV_SEND_IMM 0x02 -#define HERMON_CQE_RCV_SND_INV 0x03 +#define HERMON_CQE_RCV_SEND_INV 0x03 #define HERMON_CQE_RCV_ERROR_CODE 0x1E #define HERMON_CQE_RCV_RESIZE_CODE 0x16 @@ -124,25 +115,56 @@ /* Define for maximum CQ number mask (CQ number is 24 bits) */ #define HERMON_CQ_MAXNUMBER_MSK 0xFFFFFF +/* + * CQ Sched Management + * + * Each hermon_cq_sched struct defines a range of cq handler_id's + * assigned to the cq_sched instance. Also, the "next_alloc" + * member is used to allocate handler_id's in a round robin fashion. + * + * Valid cq handler_id's are in the range of 1 to hs_intrmsi_allocd. + * They are indexes into the hs_intrmsi_hdl array. + */ +#define HERMON_CQH_MAX 32 +typedef struct hermon_cq_sched_s { + char cqs_name[HERMON_CQH_MAX]; + uint_t cqs_start_hid; + uint_t cqs_len; + uint_t cqs_next_alloc; + uint_t cqs_desired; + uint_t cqs_minimum; + uint_t cqs_refcnt; /* could be alloc'ed more than once */ +} hermon_cq_sched_t; /* * new EQ mgmt - per domain (when it gets there). - * The first N are for CQ Completions. Following that are: + * The first hs_rsvd_eqs are reserved by the firmware. + * The next hs_intrmsi_allocd are for CQ Completions. + * Each of these "completion" EQs has a unique interrupt vector. + * The EQs following that are: * * 1 for CQ Errors * 1 for Asyncs and Command Completions, and finally * 1 for All Other events. * - * hs_intrmsi_allocd is the N in the above. + * share the last of the interrupt vectors. */ +#define HERMON_CQSCHED_NEXT_HID(cq_schedp) \ + ((atomic_inc_uint_nv(&(cq_schedp)->cqs_next_alloc) % \ + (cq_schedp)->cqs_len) + (cq_schedp)->cqs_start_hid) -#define HERMON_CQ_EQNUM_GET(state) \ - (state->hs_devlim.num_rsvd_eq + \ - (atomic_inc_uint_nv(&state->hs_eq_dist) % \ - state->hs_intrmsi_allocd)) +#define HERMON_HID_TO_EQNUM(state, hid) \ + ((state)->hs_rsvd_eqs + (hid) - 1) + +#define HERMON_HID_VALID(state, hid) \ + ((uint_t)((hid) - 1) < (state)->hs_intrmsi_allocd) + +#define HERMON_EQNUM_TO_HID(state, eqnum) \ + ((eqnum) - (state)->hs_rsvd_eqs + 1) #define HERMON_CQ_ERREQNUM_GET(state) \ - (state->hs_devlim.num_rsvd_eq + state->hs_intrmsi_allocd) + (state)->hs_cq_erreqnum + /* * The following defines are used for Hermon CQ error handling. Note: For * CQEs which correspond to error events, the Hermon device requires some @@ -151,8 +173,6 @@ * code (above), doorbell count, and whether a error completion is for a * send or receive work request. */ - - #define HERMON_CQE_ERR_STATUS_SHIFT 0 #define HERMON_CQE_ERR_STATUS_MASK 0xFF #define HERMON_CQE_ERR_DBDCNT_MASK 0xFFFF @@ -223,7 +243,6 @@ struct hermon_qalloc_info_s cq_cqinfo; }; _NOTE(READ_ONLY_DATA(hermon_sw_cq_s::cq_cqnum - hermon_sw_cq_s::cq_eqnum hermon_sw_cq_s::cq_erreqnum hermon_sw_cq_s::cq_cqcrsrcp hermon_sw_cq_s::cq_rsrcp @@ -235,6 +254,7 @@ hermon_sw_cq_s::cq_cqinfo)) _NOTE(MUTEX_PROTECTS_DATA(hermon_sw_cq_s::cq_lock, hermon_sw_cq_s::cq_buf + hermon_sw_cq_s::cq_eqnum hermon_sw_cq_s::cq_mrhdl hermon_sw_cq_s::cq_refcnt hermon_sw_cq_s::cq_is_special @@ -257,6 +277,9 @@ ibt_cq_notify_flags_t flags); int hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cqhdl, ibt_wc_t *wc_p, uint_t num_wc, uint_t *num_polled); +int hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr, + hermon_cq_sched_t **cq_sched_pp); +int hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp); int hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe); int hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq, @@ -266,6 +289,8 @@ hermon_cqhdl_t hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum); void hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp); void hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq); +int hermon_cq_sched_init(hermon_state_t *state); +void hermon_cq_sched_fini(hermon_state_t *state); #ifdef __cplusplus }
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_event.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_event.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_EVENT_H @@ -116,7 +115,7 @@ #define HERMON_EVT_PATH_MIGRATED 0x01 #define HERMON_EVT_COMM_ESTABLISHED 0x02 #define HERMON_EVT_SEND_QUEUE_DRAINED 0x03 -#define HERMON_EVT_SRQ_LAST_WQE_REACHED 0x13 +#define HERMON_EVT_SRQ_LAST_WQE_REACHED 0x13 #define HERMON_EVT_SRQ_LIMIT 0x14 /* QP Affiliated Asynch Event */ #define HERMON_EVT_CQ_ERRORS 0x04 /* overrun, protection */ @@ -127,6 +126,9 @@ #define HERMON_EVT_LOCAL_ACC_VIO_WQ_ERROR 0x11 #define HERMON_EVT_SRQ_CATASTROPHIC_ERROR 0x12 #define HERMON_EVT_SPOOF_FAIL 0x16 /* enet only */ +/* FEXCH Errors (QP Affiliated) */ +#define HERMON_EVT_FEXCH_ERROR 0x0B + /* Unaffiliated Asynch Events/Errors */ #define HERMON_EVT_PORT_STATE_CHANGE 0x09 #define HERMON_EVT_GPIO 0x15 @@ -134,11 +136,6 @@ #define HERMON_EVT_COMMAND_INTF_COMP 0x0A /* Miscellaneous */ #define HERMON_EVT_LOCAL_CAT_ERROR 0x08 -/* LEGACY - no longer supported */ -#define HERMON_EVT_WQE_PG_FAULT 0x0B -#define HERMON_EVT_UNSUPPORTED_PG_FAULT 0x0C -#define HERMON_EVT_ECC_DETECTION 0x0E -#define HERMON_EVT_EQ_OVERFLOW 0x0F #define HERMON_EVT_MSK_COMPLETION \ @@ -152,10 +149,10 @@ (1 << HERMON_EVT_SEND_QUEUE_DRAINED) #define HERMON_EVT_MSK_SRQ_LAST_WQE_REACHED \ (1 << HERMON_EVT_SRQ_LAST_WQE_REACHED) -#define HERMON_EVT_MSK_SRQ_LIMIT \ +#define HERMON_EVT_MSK_SRQ_LIMIT \ (1 << HERMON_EVT_SRQ_LIMIT) -#define HERMON_EVT_MSK_CQ_ERRORS \ +#define HERMON_EVT_MSK_CQ_ERRORS \ (1 << HERMON_EVT_CQ_ERRORS) #define HERMON_EVT_MSK_LOCAL_WQ_CAT_ERROR \ (1 << HERMON_EVT_LOCAL_WQ_CAT_ERROR) @@ -169,37 +166,29 @@ (1 << HERMON_EVT_LOCAL_ACC_VIO_WQ_ERROR) #define HERMON_EVT_MSK_SRQ_CATASTROPHIC_ERROR \ (1 << HERMON_EVT_SRQ_CATASTROPHIC_ERROR) -#define HERMON_EVT_MSK_SPOOF_FAIL \ +#define HERMON_EVT_MSK_SPOOF_FAIL \ (1 << HERMON_EVT_SPOOF_FAIL) -#define HERMON_EVT_MSK_PORT_STATE_CHANGE \ +#define HERMON_EVT_MSK_FEXCH_ERROR \ + (1 << HERMON_EVT_FEXCH_ERROR) + +#define HERMON_EVT_MSK_PORT_STATE_CHANGE \ (1 << HERMON_EVT_PORT_STATE_CHANGE) -#define HERMON_EVT_MSK_GPIO \ +#define HERMON_EVT_MSK_GPIO \ (1 << HERMON_EVT_GPIO) -#define HERMON_EVT_MSK_COMMAND_INTF_COMP \ +#define HERMON_EVT_MSK_COMMAND_INTF_COMP \ (1 << HERMON_EVT_COMMAND_INTF_COMP) #define HERMON_EVT_MSK_LOCAL_CAT_ERROR \ (1 << HERMON_EVT_LOCAL_CAT_ERROR) -#define HERMON_EVT_MSK_WQE_PG_FAULT \ - (1 << HERMON_EVT_WQE_PG_FAULT) -#define HERMON_EVT_MSK_UNSUPPORTED_PG_FAULT \ - (1 << HERMON_EVT_UNSUPPORTED_PG_FAULT) -#define HERMON_EVT_MSK_ECC_DETECTION \ - (1 << HERMON_EVT_ECC_DETECTION) - #define HERMON_EVT_NO_MASK 0 -/* - * WAS in Tavor & Arbel, but now two bits - 0x1000 and 0x0800 (0x0B & 0x00C) - * are no longer supported, so the catchall will be just 0x0040 (0x06) - * Loc QPC cat - * #define HERMON_EVT_CATCHALL_MASK 0x1840 - */ +/* For now, "catchall" is just HERMON_EVT_LOCAL_QPC_CAT_ERROR. */ #define HERMON_EVT_CATCHALL_MASK 0x0040 + /* * The last defines are used by hermon_eqe_sync() to indicate whether or not * to force a DMA sync. The case for forcing a DMA sync on a EQE comes from @@ -270,9 +259,8 @@ * Specifically, it has a consumer index and a lock to ensure single threaded * access to it. It has pointers to the various resources allocated for the * event queue, i.e. an EQC resource and the memory for the event queue - * itself. It has flags to indicate whether the EQ requires ddi_dma_sync() - * ("eq_sync") or to indicate which type of event class(es) the EQ has been - * mapped to (eq_evttypemask). + * itself. It has flags to indicate which type of event class(es) the EQ + * has been mapped to (eq_evttypemask). * * It also has a pointer to the associated MR handle (for the mapped queue * memory) and a function pointer that points to the handler that should @@ -288,11 +276,10 @@ uint32_t eq_consindx; uint32_t eq_eqnum; hermon_hw_eqe_t *eq_buf; + uint32_t *eq_doorbell; hermon_mrhdl_t eq_mrhdl; uint32_t eq_bufsz; uint32_t eq_log_eqsz; - uint32_t eq_nexteqe; - uint_t eq_sync; uint_t eq_evttypemask; hermon_rsrc_t *eq_eqcrsrcp; hermon_rsrc_t *eq_rsrcp; @@ -310,6 +297,7 @@ uint32_t eq_param); void hermon_eq_overflow_handler(hermon_state_t *state, hermon_eqhdl_t eq, hermon_hw_eqe_t *eqe); +void hermon_eq_reset_uar_baseaddr(hermon_state_t *state); #ifdef __cplusplus }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_fcoib.h Thu Jul 29 22:10:26 2010 -0700 @@ -0,0 +1,104 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _SYS_IB_ADAPTERS_HERMON_FCOIB_H +#define _SYS_IB_ADAPTERS_HERMON_FCOIB_H + +/* + * hermon_fcoib.h + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct hermon_fcoib_qp_s { + hermon_rsrc_t hfc_qp_rsrc; + vmem_t *hfc_qp_vmp; +} hermon_fcoib_qp_t; + +typedef struct hermon_fcoib_s { + uint8_t hfc_log2_max_port_ids_queried; + uint8_t hfc_log2_max_fexch_queried; + uint8_t hfc_log2_max_rfci_queried; + kmutex_t hfc_lock; + hermon_rsrc_t *hfc_mpt_rsrc; /* FEXCH MPTs for all ports */ + hermon_rsrc_t *hfc_mtt_rsrc; /* FEXCH MTTs for all MPTs */ + hermon_rsrc_t *hfc_fexch_rsrc; /* FEXCH QPs for all ports */ + hermon_rsrc_t *hfc_rfci_rsrc; /* RFCI QPs for all ports */ + uint8_t hfc_nports; /* #HCA ports */ + uint8_t hfc_port_enabled[HERMON_MAX_PORTS]; + uint_t hfc_mpts_per_port; + uint_t hfc_mtts_per_mpt; + uint_t hfc_fexch_qps_per_port; + uint_t hfc_rfci_qps_per_port; + vmem_t *hfc_rfci_vmemp[HERMON_MAX_PORTS]; + vmem_t *hfc_fexch_vmemp[HERMON_MAX_PORTS]; + uintptr_t hfc_vmemstart; + uint32_t *hfc_n_port_ids[HERMON_MAX_PORTS]; + + /* Convenient, but redundant values */ + uint32_t hfc_mpt_base[HERMON_MAX_PORTS]; + uint32_t hfc_mtt_base[HERMON_MAX_PORTS]; + uint32_t hfc_fexch_base[HERMON_MAX_PORTS]; + uint32_t hfc_rfci_base[HERMON_MAX_PORTS]; +} hermon_fcoib_t; + +_NOTE(DATA_READABLE_WITHOUT_LOCK(hermon_fcoib_s::hfc_fexch_rsrc + hermon_fcoib_s::hfc_nports + hermon_fcoib_s::hfc_mpts_per_port + hermon_fcoib_s::hfc_mtts_per_mpt + hermon_fcoib_s::hfc_fexch_qps_per_port + hermon_fcoib_s::hfc_rfci_qps_per_port + hermon_fcoib_s::hfc_mpt_base + hermon_fcoib_s::hfc_mtt_base + hermon_fcoib_s::hfc_fexch_base + hermon_fcoib_s::hfc_rfci_base)) + +int hermon_fcoib_set_id(hermon_state_t *state, int port, uint32_t rfci_qpn, + uint32_t src_id); +int hermon_fcoib_get_id_idx(hermon_state_t *state, int port, + ibt_fc_attr_t *fcp); +int hermon_fcoib_check_exch_base_off(hermon_state_t *state, int port, + ibt_fc_attr_t *fcp); +uint_t hermon_fcoib_qpnum_from_fexch(hermon_state_t *state, int port, + uint16_t fexch); +int hermon_fcoib_is_fexch_qpn(hermon_state_t *state, uint_t qpnum); +uint32_t hermon_fcoib_qpn_to_mkey(hermon_state_t *state, uint_t qpnum); +int hermon_fcoib_fexch_mkey_init(hermon_state_t *state, hermon_pdhdl_t pd, + uint8_t port, uint32_t qp_indx, uint_t sleep); +int hermon_fcoib_fexch_mkey_fini(hermon_state_t *state, hermon_pdhdl_t pd, + uint32_t qpnum, uint_t sleep); +uint32_t hermon_fcoib_fexch_relative_qpn(hermon_state_t *state, uint8_t port, + uint32_t qpnum); +int hermon_fcoib_init(hermon_state_t *state); +void hermon_fcoib_fini(hermon_state_t *state); + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_IB_ADAPTERS_HERMON_FCOIB_H */
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_fm.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_fm.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_FM_H @@ -119,10 +118,13 @@ * At each place where the planned FMA error matrix specifies that * an ereport will be generated, for now there is a HERMON_FMANOTE() * call generating an appropriate message string. + * + * This has been revised since it has been realized that FMA is only + * to be used for hardware errors. HERMON_FMANOTE() is used to report + * errors that are likely to be hardware, but possibly are not. */ - #define HERMON_FMANOTE(state, string) \ - cmn_err(CE_NOTE, "hermon%d: Device Error: %s", \ + cmn_err(CE_WARN, "hermon%d: Device Error: %s", \ (state)->hs_instance, string) /* CQE Syndrome errors - see hermon_cq.c */
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_hw.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_hw.h Thu Jul 29 22:10:26 2010 -0700 @@ -127,7 +127,7 @@ #define HERMON_HCR_CMD_OPMOD_SHFT 12 /* - * Arbel "QUERY_DEV_LIM" command - Hermon, "QUERY_DEV_CAP" - Same hex code + * Arbel/tavor "QUERY_DEV_LIM" == Hermon "QUERY_DEV_CAP" - Same hex code * same function as tavor/arbel QUERY_DEV_LIM, just renamed (whatever). * The QUERY_DEV_LIM command returns the device limits and capabilities * supported by the Hermon device. This command must be run before @@ -175,8 +175,8 @@ uint32_t :2; uint32_t log_rsvd_dmpt :4; uint32_t :4; - uint32_t log_max_mrw_sz :8; - uint32_t :4; + uint32_t log_max_mrw_sz :7; + uint32_t :5; uint32_t log_rsvd_mtt :4; uint32_t log_max_ra_glob :6; @@ -196,22 +196,25 @@ uint32_t num_ports :4; uint32_t :12; uint32_t ca_ack_delay :5; - uint32_t :11; - - uint32_t mod_wr_srq :1; - uint32_t :31; - + uint32_t cqmep :3; /* cq moderation policies */ uint32_t :4; - uint32_t :12; + uint32_t :1; + uint32_t :3; + + uint32_t mod_wr_srq :1; /* resize SRQ supported */ + uint32_t :31; + + uint32_t :16; uint32_t stat_rate_sup :16; + uint32_t :8; uint32_t :4; - uint32_t :12; + uint32_t :4; uint32_t :8; uint32_t log_max_msg :5; uint32_t :3; - uint32_t rc :1; + uint32_t rc :1; /* 0x44 */ uint32_t uc :1; uint32_t ud :1; uint32_t xrc :1; @@ -222,21 +225,39 @@ uint32_t pkey_v :1; uint32_t qkey_v :1; uint32_t vmm :1; - uint32_t :5; + uint32_t fcoe :1; + uint32_t dpdp :1; /* dual port diff protocol */ + uint32_t raw_etype :1; + uint32_t raw_ipv4 :1; + uint32_t blh :1; /* big LSO header, bit in WQE */ uint32_t mem_win :1; uint32_t apm :1; uint32_t atomic :1; uint32_t raw_multi :1; uint32_t avp :1; uint32_t ud_multi :1; - uint32_t :2; + uint32_t udm_ipv4 :1; + uint32_t dif :1; /* DIF supported */ uint32_t pg_on_demand :1; uint32_t router :1; - uint32_t :6; - - uint32_t :32; - - uint32_t log_max_bf_page :6; + uint32_t l2mc :1; /* lev 2 enet multicast */ + uint32_t :1; + uint32_t ud_swp :1; /* sw parse for UD xport */ + uint32_t ipv6_ex :1; /* offload w/ IPV6 ext hdrs */ + uint32_t lle :1; /* low latency enet */ + uint32_t fcoe_t11 :1; /* fcoenet T11 frame support */ + + /* 0x40 */ + uint32_t eth_uc_lb :1; /* enet unicast loopback */ + uint32_t :3; + uint32_t hdr_split :1; + uint32_t hdr_lookahead :1; + uint32_t :2; + uint32_t rss_udp :1; + uint32_t :7; + uint32_t :16; + + uint32_t log_max_bf_page :6; /* 0x4c */ uint32_t :2; uint32_t log_max_bf_req_ppg :6; uint32_t :2; @@ -244,30 +265,37 @@ uint32_t :10; uint32_t blu_flm :1; - uint32_t log_pg_sz :8; + uint32_t log_pg_sz :8; /* 0x48 */ uint32_t :8; uint32_t log_max_uar_sz :6; uint32_t :6; uint32_t num_rsvd_uar :4; - uint32_t max_desc_sz_rq :16; + uint32_t max_desc_sz_rq :16; /* 0x54 */ uint32_t max_sg_rq :8; uint32_t :8; - uint32_t max_desc_sz_sq :16; + uint32_t max_desc_sz_sq :16; /* 0x50 */ uint32_t max_sg_sq :8; uint32_t :8; - uint32_t rsvd_fcoib[2]; - - uint32_t log_max_srcd :4; - uint32_t :8; - uint32_t num_rsvd_srcds :4; + + uint32_t rsvd_fcoib; /* 0x5C */ + + uint32_t :1; /* 0x58 */ + uint32_t fexch_base_mpt :7; /* FC exch base mpt num */ + uint32_t fcp_ud_base_qp :16; /* RC UD base qp num */ + uint32_t fexch_base_qp :8; /* FC exch base qp num */ + + + uint32_t log_max_xrcd :5; /* 0x64 */ + uint32_t :7; + uint32_t num_rsvd_xrcds :4; uint32_t log_max_pd :5; uint32_t :7; uint32_t num_rsvd_pd :4; - uint32_t log_max_mcg :8; + uint32_t log_max_mcg :8; /* 0x60 */ uint32_t num_rsvd_mcg :4; uint32_t :4; uint32_t log_max_qp_mcg :8; @@ -275,19 +303,19 @@ uint32_t rsrv2[6]; - uint32_t altc_entry_sz :16; + uint32_t altc_entry_sz :16; /* 0x84 */ uint32_t aux_entry_sz :16; - uint32_t qpc_entry_sz :16; + uint32_t qpc_entry_sz :16; /* 0x80 */ uint32_t rdmardc_entry_sz :16; - uint32_t cmpt_entry_sz :16; + uint32_t cmpt_entry_sz :16; /* 0x8C */ uint32_t srq_entry_sz :16; - uint32_t cqc_entry_sz :16; + uint32_t cqc_entry_sz :16; /* 0x88 */ uint32_t eqc_entry_sz :16; - uint32_t bmme :1; + uint32_t bmme :1; /* 0x94 */ uint32_t win_type :1; uint32_t mps :1; uint32_t bl :1; @@ -301,13 +329,13 @@ uint32_t fast_reg_wr :1; uint32_t :20; - uint32_t dmpt_entry_sz :16; + uint32_t dmpt_entry_sz :16; /* 0x90 */ uint32_t mtt_entry_sz :16; uint32_t :32; uint32_t rsv_lkey; - + /* 0xA0 */ uint64_t max_icm_size; uint32_t rsrv3[22]; @@ -349,8 +377,8 @@ uint32_t log_max_eq :4; uint32_t log_rsvd_mtt :4; - uint32_t :4; - uint32_t log_max_mrw_sz :8; + uint32_t :5; + uint32_t log_max_mrw_sz :7; uint32_t :4; uint32_t log_rsvd_dmpt :4; uint32_t :2; @@ -373,37 +401,56 @@ uint32_t log_max_ra_glob :6; uint32_t :31; - uint32_t mod_wr_srq :1; - - uint32_t :11; + uint32_t mod_wr_srq :1; /* resize SRQ supported */ + + uint32_t :3; + uint32_t :1; + uint32_t :4; + uint32_t cqmep :3; /* cq moderation policies */ uint32_t ca_ack_delay :5; - /* PRM 0.35, stuff moved to per port info */ uint32_t :12; uint32_t num_ports :4; uint32_t :3; uint32_t log_max_msg :5; uint32_t :8; - uint32_t :12; + uint32_t :4; uint32_t :4; + uint32_t :8; uint32_t stat_rate_sup :16; - uint32_t :12; - uint32_t :4; - - uint32_t :32; - - uint32_t :6; + uint32_t :16; + + uint32_t :16; /* 0x40 */ + uint32_t :7; + uint32_t rss_udp :1; + uint32_t :2; + uint32_t hdr_lookahead :1; + uint32_t hdr_split :1; + uint32_t :3; + uint32_t eth_uc_lb :1; /* enet unicast loopback */ + /* 0x44 */ + uint32_t fcoe_t11 :1; /* fcoenet T11 frame support */ + uint32_t lle :1; /* low latency enet */ + uint32_t ipv6_ex :1; /* offload w/ IPV6 ext hdrs */ + uint32_t ud_swp :1; /* sw parse for UD xport */ + uint32_t :1; + uint32_t l2mc :1; /* lev 2 enet multicast */ uint32_t router :1; uint32_t pg_on_demand :1; - uint32_t :2; + uint32_t dif :1; /* DIF supported */ + uint32_t udm_ipv4 :1; uint32_t ud_multi :1; uint32_t avp :1; uint32_t raw_multi :1; uint32_t atomic :1; uint32_t apm :1; uint32_t mem_win :1; - uint32_t :5; + uint32_t blh :1; /* big LSO header, bit in WQE */ + uint32_t raw_ipv4 :1; + uint32_t raw_etype :1; + uint32_t dpdp :1; /* dual port diff protocol */ + uint32_t fcoe :1; uint32_t vmm :1; uint32_t qkey_v :1; uint32_t pkey_v :1; @@ -416,13 +463,13 @@ uint32_t uc :1; uint32_t rc :1; - uint32_t num_rsvd_uar :4; + uint32_t num_rsvd_uar :4; /* 0x48 */ uint32_t :6; uint32_t log_max_uar_sz :6; uint32_t :8; uint32_t log_pg_sz :8; - uint32_t blu_flm :1; + uint32_t blu_flm :1; /* 0x4c */ uint32_t :10; uint32_t log_bf_reg_sz :5; uint32_t :2; @@ -430,47 +477,53 @@ uint32_t :2; uint32_t log_max_bf_page :6; - uint32_t :8; + uint32_t :8; /* 0x50 */ uint32_t max_sg_sq :8; uint32_t max_desc_sz_sq :16; - uint32_t :8; + uint32_t :8; /* 0x54 */ uint32_t max_sg_rq :8; uint32_t max_desc_sz_rq :16; - uint32_t rsvd_fcoib[2]; - - uint32_t :8; + /* 0x58 */ + uint32_t fexch_base_qp :8; /* FC exch base qp num */ + uint32_t fcp_ud_base_qp :16; /* RC UD base qp num */ + uint32_t fexch_base_mpt :7; /* FC exch base mpt num */ + uint32_t :1; + + uint32_t rsvd_fcoib; /* 0x5C */ + + uint32_t :8; /* 0x60 */ uint32_t log_max_qp_mcg :8; uint32_t :4; uint32_t num_rsvd_mcg :4; uint32_t log_max_mcg :8; - uint32_t num_rsvd_pd :4; + uint32_t num_rsvd_pd :4; /* 0x64 */ uint32_t :7; uint32_t log_max_pd :5; - uint32_t num_rsvd_srcds :4; - uint32_t :8; - uint32_t log_max_srcd :4; + uint32_t num_rsvd_xrcds :4; + uint32_t :7; + uint32_t log_max_xrcd :5; uint32_t rsrv2[6]; - uint32_t rdmardc_entry_sz :16; + uint32_t rdmardc_entry_sz :16; /* 0x80 */ uint32_t qpc_entry_sz :16; - uint32_t aux_entry_sz :16; + uint32_t aux_entry_sz :16; /* 0x84 */ uint32_t altc_entry_sz :16; - uint32_t eqc_entry_sz :16; + uint32_t eqc_entry_sz :16; /* 0x88 */ uint32_t cqc_entry_sz :16; - uint32_t srq_entry_sz :16; + uint32_t srq_entry_sz :16; /* 0x8C */ uint32_t cmpt_entry_sz :16; - uint32_t mtt_entry_sz :16; + uint32_t mtt_entry_sz :16; /* 0x90 */ uint32_t dmpt_entry_sz :16; - uint32_t :20; + uint32_t :20; /* 0x94 */ uint32_t fast_reg_wr :1; uint32_t reserved_lkey :1; uint32_t win_type2 :1; @@ -489,7 +542,7 @@ uint32_t :32; uint64_t max_icm_size; - + /* 0xA0 */ uint32_t rsrv3[22]; }; #endif @@ -520,16 +573,16 @@ uint32_t cmd_intf_rev :16; uint32_t :16; - uint32_t fw_day :8; + uint32_t fw_day :8; uint32_t fw_month :8; - uint32_t fw_year :16; + uint32_t fw_year :16; uint32_t :1; - uint32_t ccq :1; + uint32_t ccq :1; /* currently not def'd */ uint32_t :6; - uint32_t fw_sec :8; - uint32_t fw_min :8; - uint32_t fw_hour :8; + uint32_t fw_sec :8; + uint32_t fw_min :8; + uint32_t fw_hour :8; uint32_t rsrv0[2]; @@ -547,9 +600,16 @@ uint32_t error_buf_sz; - uint32_t rsrv2[48]; + uint64_t vf_com_ch_addr; + + uint32_t :32; + + uint32_t :30; + uint32_t vf_com_ch_bar :2; + + uint32_t rsrv2[44]; }; -#else +#else /* BIG ENDIAN */ struct hermon_hw_queryfw_s { uint32_t fw_pages :16; uint32_t fw_rev_major :16; @@ -564,16 +624,16 @@ uint32_t :23; uint32_t log_max_cmd :8; - uint32_t fw_hour :8; - uint32_t fw_min :8; - uint32_t fw_sec :8; + uint32_t fw_hour :8; + uint32_t fw_min :8; + uint32_t fw_sec :8; uint32_t :6; - uint32_t ccq :1; + uint32_t ccq :1; /* currently not def'd */ uint32_t :1; - uint32_t fw_year :16; + uint32_t fw_year :16; uint32_t fw_month :8; - uint32_t fw_day :8; + uint32_t fw_day :8; uint32_t rsrv1[2]; @@ -591,7 +651,14 @@ uint32_t err_buf_bar :2; uint32_t :30; - uint32_t rsrv2[48]; + uint64_t vf_com_ch_addr; + + uint32_t vf_com_ch_bar :2; + uint32_t :30; + + uint32_t :32; + + uint32_t rsrv2[44]; }; #endif @@ -662,23 +729,27 @@ struct hermon_hw_vpm_s { uint32_t :12; uint32_t vaddr_l :20; + uint32_t vaddr_h; - uint32_t log2sz :5; + uint32_t log2sz :5; /* in 4KB pages */ uint32_t :7; uint32_t paddr_l :20; + uint32_t paddr_h; }; #else struct hermon_hw_vpm_s { uint32_t vaddr_h; + uint32_t vaddr_l :20; uint32_t :12; uint32_t paddr_h; + uint32_t paddr_l :20; uint32_t :7; - uint32_t log2sz :5; + uint32_t log2sz :5; /* in 4KB pages */ }; #endif @@ -751,7 +822,7 @@ uint32_t rsrv6[2]; } hermon_hw_qp_ee_cq_eq_rdb_t; -#else +#else /* BIG ENDIAN */ typedef struct hermon_hw_qp_ee_cq_eq_rdb_s { uint32_t rsrv0[4]; @@ -816,7 +887,7 @@ uint32_t mc_hash_fn :3; uint32_t :5; } hermon_multicast_param_t; -#else +#else /* BIG ENDIAN */ typedef struct hermon_multicast_param_s { uint64_t mc_baseaddr; @@ -844,6 +915,7 @@ uint64_t dmpt_baseaddr; uint32_t :32; + uint32_t log_dmpt_sz :6; uint32_t :2; uint32_t pgfault_rnr_to :5; @@ -853,7 +925,7 @@ uint64_t cmpt_baseaddr; } hermon_tpt_param_t; -#else +#else /* BIG ENDIAN */ typedef struct hermon_tpt_param_s { uint64_t dmpt_baseaddr; @@ -861,6 +933,7 @@ uint32_t pgfault_rnr_to :5; uint32_t :2; uint32_t log_dmpt_sz :6; + uint32_t :32; uint64_t mtt_baseaddr; @@ -899,6 +972,10 @@ /* * NEW for Hermon * QP Allocation Params + * NOTE: as of PRM v0.50 no longer needed (ccq not supported + * leave structure here, just in case ccq comes back ) + * but adjust the overall structure + * not to use it * */ @@ -926,12 +1003,13 @@ } hermon_qp_alloc_param_t; #endif + #ifdef _LITTLE_ENDIAN struct hermon_hw_initqueryhca_s { uint32_t :32; uint32_t :24; - uint32_t version :8; + uint32_t version :8; uint32_t :13; uint32_t log2_cacheline :3; @@ -941,9 +1019,11 @@ uint32_t udav_port_chk :1; uint32_t big_endian :1; - uint32_t :1; + uint32_t qos :1; uint32_t chsum_en :1; - uint32_t :28; + uint32_t :12; + uint32_t cqpm_short_pkt_lim :14; /* short pkt limit for qpm */ + uint32_t cqmp :2; /* cq moderation policy */ uint32_t router_qp :24; uint32_t :5; @@ -957,7 +1037,7 @@ uint32_t rsrv2[8]; - hermon_multicast_param_t multi; + hermon_multicast_param_t multi; uint32_t rsrv3[4]; @@ -967,15 +1047,22 @@ hermon_uar_param_t uar; - uint32_t rsrv5[4]; - - hermon_qp_alloc_param_t qp_alloc; - - uint32_t rsrv6[100]; /* from 0x16c to 0x2fc offsets */ + uint32_t rsrv5[36]; + + hermon_multicast_param_t enet_multi; + + uint32_t rsrv6[24]; /* to 0x24C */ + + uint32_t :32; + + uint32_t fcoe_t11 :1; /* fcoe t11 frame enable */ + uint32_t :31; + + uint32_t rsrv7[42]; /* 0x254 - 0x2FC */ }; #else /* BIG ENDIAN */ struct hermon_hw_initqueryhca_s { - uint32_t version :8; + uint32_t version :8; uint32_t :24; uint32_t :32; @@ -992,9 +1079,11 @@ uint32_t :5; uint32_t router_qp :24; - uint32_t :28; + uint32_t cqmp :2; /* cq moderation policy */ + uint32_t cqpm_short_pkt_lim :14; /* short pkt limit for qpm */ + uint32_t :12; uint32_t chsum_en :1; - uint32_t :1; + uint32_t qos :1; uint32_t big_endian :1; uint32_t udav_port_chk :1; @@ -1004,7 +1093,7 @@ uint32_t rsrv2[8]; - hermon_multicast_param_t multi; + hermon_multicast_param_t multi; uint32_t rsrv3[4]; @@ -1014,11 +1103,18 @@ hermon_uar_param_t uar; - uint32_t rsrv5[4]; - - hermon_qp_alloc_param_t qp_alloc; - - uint32_t rsrv6[100]; /* from 0x16c to 0x2fc offsets */ + uint32_t rsrv5[36]; + + hermon_multicast_param_t enet_multi; + + uint32_t rsrv6[24]; /* to 0x24C */ + + uint32_t :31; + uint32_t fcoe_t11 :1; /* fcoe t11 frame enable */ + + uint32_t :32; + + uint32_t rsrv7[42]; /* 0x254 - 0x2FC */ }; #endif #define HERMON_UDAV_PROTECT_DISABLED 0x0 @@ -1057,82 +1153,131 @@ * directly as was done for the previous HCAs). */ +/* + * PRM 0.4X and 0.50 changed the query_port to integrate the ethernet + * stuff as well, so this is a signficant change to the structure + */ + #ifdef _LITTLE_ENDIAN struct hermon_hw_query_port_s { + /* 0x04 */ uint32_t log_max_pkey :4; /* pkey table size */ uint32_t log_max_gid :4; /* max gids / port */ - /* was max_port_width arbel: long list of values */ - uint32_t ib_port_wid :4; + uint32_t ib_port_wid :8; + /* + * Enet link speed - 0x0 10Gb XAUI, 0x01 10Gb XFI, + * 0x02 1Gb, 0xF other + */ + uint32_t eth_link_spd :4; uint32_t :4; - uint32_t :4; /* other types possibly */ + /* + * IB Link speed - bit 0 SDR, bit1 DDR, Bit 2 QDR + */ + uint32_t ib_link_spd :8; + + /* 0x00 */ + uint32_t eth_mtu :16; /* in bytes */ + /* + * IB MTU - 0x0 rsvd, 0x1=256, 0x2=512, 0x3=1024, 0x4=2048, 0x5=4096 + */ + uint32_t ib_mtu :4; uint32_t :4; /* - * 0x1=2.5G, 0x3=2.5 or 5.0G, 0x5=2.5 or 10G - * 0x7=2.5, 5.0, or 10G, others rsvd - */ - uint32_t ib_link_spd :4; - - uint32_t :4; - - uint32_t :16; /* used for other types (?) */ - uint32_t ib_mtu :4; - /* - * 0x0 rsvd, 0x1=256, 0x2=512, 0x3=1024, 0x5=2048 - * 0x5=4096, others rsvd + * for next two if link down + * -> what port supports, if up + * -> what port is running */ - uint32_t :4; - uint32_t port_type :8; /* 0x00, 0x01 IB, others TBD */ - - uint32_t :32; + + uint32_t ib_link :1; + uint32_t eth_link :1; + uint32_t :1; + uint32_t vpi :1; + uint32_t :3; + uint32_t link_up :1; + + + uint32_t :32; /* 0x0C */ + /* max vl's supported (not incl vl_15) */ - uint32_t max_vl :4; + uint32_t max_vl :4; /* 0x08 */ uint32_t :4; - uint32_t :8; /* but others possibly */ + uint32_t log_max_mac :4; + uint32_t log_max_vlan :4; uint32_t :16; - uint32_t rsvd0[2]; /* but for other types */ - uint32_t rsvd1[504]; + uint32_t mac_lo; + + uint32_t mac_hi :16; + uint32_t :16; + + uint32_t rsvd1[2]; }; + #else /* BIG ENDIAN */ struct hermon_hw_query_port_s { - uint32_t port_type :8; /* 0x00, 0x01 IB, others TBD */ + /* 0x00 */ + uint32_t link_up :1; + uint32_t :3; + uint32_t vpi :1; + uint32_t :1; + /* + * for next two if link down + * -> what port supports, if up + * -> what port is running + */ + uint32_t eth_link :1; + uint32_t ib_link :1; uint32_t :4; /* - * 0x0 rsvd, 0x1=256, 0x2=512, 0x3=1024, 0x5=2048 - * 0x1=256, 0x2=512, 0x3=1024, 0x5=2048 + * IB MTU - 0x0 rsvd, 0x1=256, 0x2=512, 0x3=1024, 0x4=2048, 0x5=4096 */ uint32_t ib_mtu :4; - /* 0x5=4096, others rsvd */ - uint32_t :16; /* used for other types (?) */ - - uint32_t :4; - uint32_t ib_link_spd :4; + uint32_t eth_mtu :16; /* in bytes */ + + /* 0x04 */ /* - * 0x1=2.5G, 0x3=2.5 or 5.0G, 0x5=2.5 or 10G - * 0x7=2.5, 5.0, or 10G, others rsvd + * IB Link speed - bit 0 SDR, bit1 DDR, Bit 2 QDR */ + uint32_t ib_link_spd :8; uint32_t :4; - uint32_t :4; /* other types possibly */ - uint32_t :4; - /* was max_port_width arbel: long list of values */ - uint32_t ib_port_wid :4; + /* + * Enet link speed - 0x0 10Gb XAUI, 0x01 10Gb XFI, + * 0x02 1Gb, 0xF other + */ + uint32_t eth_link_spd :4; + uint32_t ib_port_wid :8; uint32_t log_max_gid :4; /* max gids / port */ uint32_t log_max_pkey :4; /* pkey table size */ - uint32_t :16; - uint32_t :8; /* but others possibly */ + uint32_t :16; /* 0x08 */ + uint32_t log_max_vlan :4; + uint32_t log_max_mac :4; uint32_t :4; /* max vl's supported (not incl vl_15) */ uint32_t max_vl :4; - uint32_t :32; - - uint32_t rsvd0[2]; /* but for other types */ - uint32_t rsvd1[504]; + uint32_t :32; /* 0x0C */ + + uint32_t :16; + uint32_t mac_hi :16; + + uint32_t mac_lo; + + uint32_t rsvd1[2]; }; #endif +/* + * the following structure is used for IB set port + * others following are for ethernet set port + */ + +#define HERMON_HW_OPMOD_SETPORT_IB 0x0 +#define HERMON_HW_OPMOD_SETPORT_EN 0x1 +#define HERMON_HW_OPMOD_SETPORT_EXT 0x2 + + #ifdef _LITTLE_ENDIAN struct hermon_hw_set_port_s { uint32_t cap_mask; @@ -1158,20 +1303,17 @@ uint64_t node_guid; - uint32_t sniff_qpn_base :24; - uint32_t ge :1; /* glob egress sniff enabled */ - uint32_t gi :1; /* glob ingress sniff enabled */ - uint32_t qe :1; /* qp-egress sniff enable */ - uint32_t qi :1; /* qp-ingress sniff enabled */ - uint32_t :4; - - uint32_t router_qpn_base :24; - uint32_t routermode :1; - uint32_t :7; + uint32_t ingress_sniff_qpn :24; + uint32_t ingress_sniff_mode :1; + uint32_t :7; + + uint32_t egress_sniff_qpn :24; + uint32_t egress_sniff_mode :1; + uint32_t :7; uint32_t :32; - uint32_t max_guid :16; /* valid if noted above */ + uint32_t max_gid :16; /* valid if noted above */ uint32_t max_pkey :16; /* valid if noted above */ uint32_t rsrd0[500]; @@ -1201,19 +1343,17 @@ uint64_t node_guid; - uint32_t :7; - uint32_t routermode :1; - uint32_t router_qpn_base :24; - - uint32_t :4; - uint32_t qi :1; /* qp-ingress sniff enabled */ - uint32_t qe :1; /* qp-egress sniff enable */ - uint32_t gi :1; /* glob ingress sniff enabled */ - uint32_t ge :1; /* glob egress sniff enabled */ - uint32_t sniff_qpn_base :24; + uint32_t :7; + uint32_t egress_sniff_mode :1; + uint32_t egress_sniff_qpn :24; + + uint32_t :7; + uint32_t ingress_sniff_mode :1; + uint32_t ingress_sniff_qpn :24; + uint32_t max_pkey :16; /* valid if noted above */ - uint32_t max_guid :16; /* valid if noted above */ + uint32_t max_gid :16; /* valid if noted above */ uint32_t :32; @@ -1221,6 +1361,292 @@ }; #endif +/* + * structures for ethernet setport + * Which structure is used depends on low-16 of opmod + * Low 8 == port number, 15:8 == selector + * Or the following with port number + */ + +#define HERMON_HW_ENET_OPMOD_SELECT_GEN 0x0000 /* general params */ +#define HERMON_HW_ENET_OPMOD_SELECT_RQN 0x0100 /* rcv qpn calc */ +#define HERMON_HW_ENET_OPMOD_SELECT_MAC 0x0200 /* MAC table conf */ +#define HERMON_HW_ENET_OPMOD_SELECT_VLAN 0x0300 /* VLAN table conf */ +#define HERMON_HW_ENET_OPMOD_SELECT_PRIO 0x0400 /* Priority table */ +#define HERMON_HW_ENET_OPMOD_SELECT_GID 0x0500 /* GID Table */ + +/* + * set port for enthernet, general parameters + * Which structure + */ + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_set_port_en_s { + uint32_t mtu :16; + uint32_t :16; + + uint32_t v_mtu :1; + uint32_t v_pprx :1; + uint32_t v_pptx :1; + uint32_t :29; + + uint32_t :16; + uint32_t pfcrx :8; + uint32_t :7; + uint32_t pprx :1; + + uint32_t :16; + uint32_t pfctx :8; + uint32_t :7; + uint32_t pptx :1; + + uint32_t rsvd0[4]; +}; + +#else /* BIG ENDIAN */ +struct hermon_hw_set_port_en_s { + uint32_t :29; + uint32_t v_pptx :1; + uint32_t v_pprx :1; + uint32_t v_mtu :1; + + uint32_t :16; + uint32_t mtu :16; + + uint32_t pptx :1; + uint32_t :7; + uint32_t pfctx :8; + uint32_t :16; + + uint32_t pprx :1; + uint32_t :7; + uint32_t pfcrx :8; + uint32_t :16; + + uint32_t rsvd0[4]; + +}; +#endif + +/* set_port for enet, RX QPM calculations Parameters */ + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_set_port_en_rqpn_s { + uint32_t n_p :2; + uint32_t :6; + uint32_t n_v :3; + uint32_t :5; + uint32_t n_m :4; + uint32_t :12; + + uint32_t base_qpn :24; + uint32_t :8; + + uint32_t vlan_miss_idx :7; + uint32_t :8; + uint32_t intra_vlan_miss :1; + uint32_t no_vlan_idx :7; + uint32_t :8; + uint32_t intra_no_vlan :1; + + uint32_t mac_miss_idx :8; + uint32_t :24; + + uint32_t promisc_qpn :24; + uint32_t :7; + uint32_t en_uc_promisc :1; + + uint32_t no_vlan_prio :3; + uint32_t :29; + + uint32_t :32; + + uint32_t def_mcast_qpn :24; + uint32_t :5; + uint32_t mc_by_vlan :1; + uint32_t mc_promisc_mode :2; + + uint32_t rsvd0[4]; +}; + +#else /* BIG ENDIAN */ +struct hermon_hw_set_port_en_rqpn_s { + uint32_t :8; + uint32_t base_qpn :24; + + uint32_t :12; + uint32_t n_m :4; + uint32_t :5; + uint32_t n_v :3; + uint32_t :6; + uint32_t n_p :2; + + uint32_t :24; + uint32_t mac_miss_idx :8; + + uint32_t intra_no_vlan :1; + uint32_t :8; + uint32_t no_vlan_idx :7; + uint32_t intra_vlan_miss :1; + uint32_t :8; + uint32_t vlan_miss_idx :7; + + uint32_t :29; + uint32_t no_vlan_prio :3; + + uint32_t en_uc_promisc :1; + uint32_t :7; + uint32_t promisc_qpn :24; + + uint32_t mc_promisc_mode :2; + uint32_t mc_by_vlan :1; + uint32_t :5; + uint32_t def_mcast_qpn :24; + + uint32_t :32; + + uint32_t rsvd0[4]; +}; +#endif + + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_set_port_mact_entry_s { + uint32_t mac_lo :32; + + uint32_t mac_hi :16; + uint32_t :7; + uint32_t mac_valid :1; +}; +#else /* BIG ENDIAN */ +struct hermon_hw_set_port_mact_entry_s { + uint32_t mac_valid :1; + uint32_t :7; + uint32_t mac_hi :16; + + uint32_t mac_lo :32; + +}; +#endif + + +/* set_port for enet, MAC Table Configuration */ + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_set_port_en_mact_s { + struct hermon_hw_set_port_mact_entry_s mtable[128]; +}; +#else /* BIG ENDIAN */ +struct hermon_hw_set_port_en_mact_s { + struct hermon_hw_set_port_mact_entry_s mtable[128]; +}; +#endif + + +/* set_port for enet, VLAN Table Configuration */ + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_set_port_vlant_entry_s { + uint32_t vlan_id :12; + uint32_t :18; + uint32_t intra :1; + uint32_t valid :1; +}; +#else /* BIG ENDIAN */ +struct hermon_hw_set_port_vlant_entry_s { + uint32_t valid :1; + uint32_t intra :1; + uint32_t :18; + uint32_t vlan_id :12; +}; +#endif + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_set_port_en_vlant_s { + uint32_t rsvd[2]; + struct hermon_hw_set_port_vlant_entry_s table[126]; +}; +#else /* BIG ENDIAN */ +struct hermon_hw_set_port_en_vlant_s { + uint32_t rsvd[2]; + struct hermon_hw_set_port_vlant_entry_s table[126]; +}; +#endif + +/* set_port for enet, Priority table Parameters */ + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_set_port_en_priot_s { + uint32_t :32; + + uint32_t prio0 :3; + uint32_t :1; + uint32_t prio1 :3; + uint32_t :1; + uint32_t prio2 :3; + uint32_t :1; + uint32_t prio3 :3; + uint32_t :1; + uint32_t prio4 :3; + uint32_t :1; + uint32_t prio5 :3; + uint32_t :1; + uint32_t prio6 :3; + uint32_t :1; + uint32_t prio7 :3; + uint32_t :1; + + uint32_t rsvd[2]; +}; +#else /* BIG ENDIAN */ +struct hermon_hw_set_port_en_priot_s { + uint32_t :1; + uint32_t prio7 :3; + uint32_t :1; + uint32_t prio6 :3; + uint32_t :1; + uint32_t prio5 :3; + uint32_t :1; + uint32_t prio4 :3; + uint32_t :1; + uint32_t prio3 :3; + uint32_t :1; + uint32_t prio2 :3; + uint32_t :1; + uint32_t prio1 :3; + uint32_t :1; + uint32_t prio0 :3; + + uint32_t :32; + + uint32_t rsvd[2]; + +}; +#endif + + +/* note: GID table is same BIG or LITTLE ENDIAN */ + +struct hermon_hw_set_port_gidtable_s { + uint64_t gid[128]; +}; + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_conf_int_mod_s { + uint32_t :32; + + uint32_t int_vect :16; + uint32_t min_delay :16; +}; +#else /* BIG ENDIAN */ +struct hermon_hw_conf_int_mod_s { + uint32_t min_delay :16; + uint32_t int_vect :16; + + uint32_t :32; +}; +#endif + @@ -1286,6 +1712,7 @@ uint32_t :1; /* dw 2, byte 0xc-f */ uint32_t mem_key; + uint64_t start_addr; /* dw 4-5, byte 0x10-17 */ uint64_t reg_win_len; /* dw 6-7, byte 0x18-1f */ @@ -1315,15 +1742,16 @@ uint32_t entity_sz :21; uint32_t :11; /* dw 14, byte 0x38-3b */ -#ifdef HERMON_NOTIMPL + uint32_t dif_m_atag :16; uint32_t :16; /* dw 17, 0x44-47 */ uint32_t dif_a_msk :16; uint32_t dif_v_msk :2; uint32_t dif_rep :2; - uint32_t :9; - uint32_t dif_err :3; /* dw 16, 0x40-43 */ + uint32_t :4; + uint32_t dif_err :3; + uint32_t :5; /* dw 16, 0x40-43 */ uint32_t dif_w_atag :16; uint32_t :16; /* dw 19, 0x4c-4f */ @@ -1333,7 +1761,9 @@ uint32_t :32; uint32_t dif_w_rtagb; /* dw 20, 0x50-53 */ -#endif /* HERMON_NOTIMPL */ + + uint32_t rsvd[10]; + }; #else /* BIG ENDIAN */ @@ -1399,10 +1829,10 @@ uint32_t :11; uint32_t mtt_fbo :21; /* dw 15, byte 0x3c-3f */ -#ifdef HERMON_NOTIMPL - + + uint32_t :5; uint32_t dif_err :3; - uint32_t :9; + uint32_t :4; uint32_t dif_rep :2; uint32_t dif_v_msk :2; uint32_t dif_a_msk :16; /* dw 16, 0x40-43 */ @@ -1418,7 +1848,9 @@ uint32_t dif_w_rtagb; /* dw 20, 0x50-53 */ uint32_t :32; -#endif /* HERMON_NOTIMPL */ + + uint32_t rsvd[10]; + }; #endif @@ -1469,7 +1901,7 @@ uint64_t reg_win_len; /* dw 6-7, byte 0x18-1f */ - uint32_t win_cnt :24; + uint32_t win_cnt :24; uint32_t :8; /* dw 9, byte 0x24-27 */ uint32_t lkey; /* dw 8, byte 0x20-23 */ @@ -1480,8 +1912,8 @@ uint32_t mtt_rep :4; uint32_t :17; uint32_t blk_mode :1; - uint32_t len_b64 :1; /* bit 64 of length */ - uint32_t fbo_en :1; + uint32_t len_b64 :1; /* bit 64 of length */ + uint32_t fbo_en :1; uint32_t :8; /* dw 10, byte 0x28-2b */ uint32_t mtt_size; /* dw 13, byte 0x34-37 */ @@ -1489,7 +1921,7 @@ uint32_t :3; uint32_t mtt_addr_l :29; /* dw 12, byte 0x30-33 */ - uint32_t mtt_fbo :21; + uint32_t mtt_fbo :21; uint32_t :11; /* dw 15, byte 0x3c-3f */ uint32_t entity_sz :21; @@ -1500,24 +1932,24 @@ #else /* BIG ENDIAN */ struct hermon_hw_cmpt_s { - uint32_t status :4; + uint32_t status :4; uint32_t :8; uint32_t no_snoop :1; uint32_t :1; uint32_t atc_xlat :1; - uint32_t atc_req :1; - uint32_t en_bind :1; - uint32_t atomic :1; + uint32_t atc_req :1; + uint32_t en_bind :1; + uint32_t atomic :1; uint32_t rw :1; uint32_t rr :1; uint32_t lw :1; uint32_t lr :1; uint32_t phys_addr :1; - uint32_t reg_win :1; + uint32_t reg_win :1; uint32_t :8; /* dw 0, byte 0x0-3 */ uint32_t qpn :24; - uint32_t bnd_qp :1; + uint32_t bnd_qp :1; uint32_t :7; /* dw 1, byte 0x4-7 */ uint32_t mem_key; /* dw 2, byte 0x8-b */ @@ -1539,14 +1971,14 @@ uint32_t lkey; /* dw 8, bytd 0x20-23 */ uint32_t :8; - uint32_t win_cnt :24; /* dw 9, byte 0x24-27 */ + uint32_t win_cnt :24; /* dw 9, byte 0x24-27 */ uint32_t :8; - uint32_t fbo_en :1; - uint32_t len_b64 :1; /* bit 64 of length */ + uint32_t fbo_en :1; + uint32_t len_b64 :1; /* bit 64 of length */ uint32_t blk_mode :1; uint32_t :17; - uint32_t mtt_rep :4; /* dw 10, byte 0x28-2b */ + uint32_t mtt_rep :4; /* dw 10, byte 0x28-2b */ uint32_t :24; uint32_t mtt_addr_h :8; /* dw 11, byte 0x2c-2f */ @@ -1558,6 +1990,9 @@ uint32_t :11; uint32_t entity_sz :21; /* dw 14, byte 0x38-3b */ + + uint32_t :11; /* dw 15, byte 0x3c-3f */ + uint32_t mtt_fbo :21; }; #endif @@ -1862,6 +2297,22 @@ #define HERMON_ERREVT_INTERNAL_PARITY 0x5 +typedef struct hermon_hw_eqe_fcerr_s { + uint32_t :14; + uint32_t port :2; + uint32_t fexch :16; /* fexch number */ + + uint32_t :32; + + uint32_t :24; + uint32_t fcsyndrome :8; + + uint32_t rsvd[3]; +} hermon_hw_eqe_fcerr_t; + +#define HERMON_ERR_FC_BADIU 0x0 +#define HERMON_ERR_FC_SEQUENCE 0x01 + typedef struct hermon_hw_eqe_pgflt_s { uint32_t rsrv0[2]; uint32_t :24; @@ -1901,6 +2352,7 @@ hermon_hw_eqe_cmdcmpl_t eqe_cmdcmpl; hermon_hw_eqe_operr_t eqe_operr; hermon_hw_eqe_pgflt_t eqe_pgflt; + hermon_hw_eqe_fcerr_t eqe_fcerr; } event_data; uint32_t :24; uint32_t owner :1; @@ -1914,6 +2366,7 @@ #define eqe_cmdcmpl event_data.eqe_cmdcmpl #define eqe_operr event_data.eqe_operr #define eqe_pgflt event_data.eqe_pgflt +#define eqe_fcerr event_data.eqe_fcerr /* * The following macros are used for extracting (and in some cases filling in) @@ -1953,6 +2406,13 @@ (((uint8_t *)(eqe))[0xf]) #define HERMON_EQE_OPERRDATA_GET(eq, eqe) \ htonl(((uint32_t *)(eqe))[4]) +#define HERMON_EQE_FEXCH_PORTNUM_GET(eq, eqe) \ + (((uint8_t *)(eqe))[5] & 0x3) +#define HERMON_EQE_FEXCH_FEXCH_GET(eq, eqe) \ + htons(((uint16_t *)(eqe))[3]) +#define HERMON_EQE_FEXCH_SYNDROME_GET(eq, eqe) \ + (((uint8_t *)(eqe))[15]) + /* * Hermon does ownership of CQ and EQ differently from Arbel & Tavor. * Now, you keep track of the TOTAL number of CQE's or EQE's that have been @@ -1964,10 +2424,9 @@ * does not consume it. */ -#define HERMON_EQE_OWNER_IS_SW(eq, eqe) \ +#define HERMON_EQE_OWNER_IS_SW(eq, eqe, consindx, shift) \ ((((uint8_t *)(eqe))[0x1f] & HERMON_EQE_OWNER_MASK) == \ - (((eq->eq_nexteqe) & eq->eq_bufsz) >> \ - (eq->eq_log_eqsz - HERMON_EQE_OWNER_SHIFT))) + (((consindx) & eq->eq_bufsz) >> (shift))) /* * Hermon Completion Queue Context Table (CQC) entries @@ -2049,7 +2508,7 @@ }; #else struct hermon_hw_cqc_s { - uint32_t status :4; + uint32_t status :4; uint32_t :9; uint32_t cqe_coalesc :1; uint32_t overrun_ignore :1; @@ -2125,9 +2584,9 @@ struct hermon_hw_cqe_s { uint32_t dife :1; - uint32_t :2; + uint32_t vlan :2; uint32_t fl :1; - uint32_t fccrc_sd :1; + uint32_t fcrc_sd :1; uint32_t d2s :1; uint32_t :2; uint32_t my_qpn :24; @@ -2139,10 +2598,10 @@ uint32_t srq_rqpn :24; uint32_t sl :4; - uint32_t :12; - uint32_t slid :16; - - uint32_t ipoib_status; + uint32_t vid :12; + uint32_t slid :16; /* SMAC 47:32 or SLID */ + + uint32_t ipoib_status; /* SMAC 31:0 or enet/ipoib/EoIB status */ uint32_t byte_cnt; @@ -2235,6 +2694,9 @@ #define HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe) \ (((uint8_t *)(cqe))[27]) +#define HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe) \ + (((uint8_t *)(cqe))[26]) + #define HERMON_CQE_OPCODE_GET(cq, cqe) \ ((((uint8_t *)(cqe))[31]) & HERMON_CQE_OPCODE_MASK) @@ -2242,12 +2704,30 @@ (((((uint8_t *)(cqe))[31]) & HERMON_CQE_SENDRECV_MASK) >> \ HERMON_CQE_SENDRECV_SHIFT) +#define HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe) \ + HERMON_CQE_CKSUM(cq, cqe) + +#define HERMON_CQE_FEXCH_TX_BYTES(cq, cqe) \ + htonl(((uint32_t *)(cqe))[3]) + +#define HERMON_CQE_FEXCH_RX_BYTES(cq, cqe) \ + htonl(((uint32_t *)(cqe))[4]) + +#define HERMON_CQE_FEXCH_SEQ_ID(cq, cqe) \ + (((uint8_t *)(cqe))[8]) + +#define HERMON_CQE_FEXCH_DETAIL(cq, cqe) \ + htonl(((uint32_t *)(cqe))[0]) + +#define HERMON_CQE_FEXCH_DIFE(cq, cqe) \ + ((((uint8_t *)(cqe))[0]) & 0x80) + /* See Comment above for EQE - ownership of CQE is handled the same */ -#define HERMON_CQE_OWNER_IS_SW(cq, cqe, considx) \ +#define HERMON_CQE_OWNER_IS_SW(cq, cqe, considx, shift, mask) \ (((((uint8_t *)(cqe))[31] & HERMON_CQE_OWNER_MASK) >> \ HERMON_CQE_OWNER_SHIFT) == \ - (((considx) & cq->cq_bufsz) >> cq->cq_log_cqsz)) + (((considx) & (mask)) >> (shift))) /* * Hermon Shared Receive Queue (SRQ) Context Entry Format @@ -2297,7 +2777,7 @@ uint32_t rsrc0[80]; /* to match DEV_CAP size of 0x80 */ }; -#else +#else /* BIG ENDIAN */ struct hermon_hw_srqc_s { uint32_t state :4; uint32_t log_srq_size :4; @@ -2349,9 +2829,17 @@ #ifdef _LITTLE_ENDIAN struct hermon_hw_mod_stat_cfg_s { - uint32_t rsvd0; - - uint32_t :14; + uint32_t :16; + uint32_t qdr_rx_op :4; + uint32_t :3; + uint32_t qdr_rx_opt_m :1; + uint32_t qdr_tx_op :4; + uint32_t :3; + uint32_t qdr_tx_opt_m :1; + + uint32_t log_pg_sz :8; + uint32_t log_pg_sz_m :1; + uint32_t :5; uint32_t dife :1; uint32_t dife_m :1; uint32_t rx_options :4; @@ -2368,7 +2856,7 @@ uint32_t port_en_m :1; uint32_t :10; - uint32_t rsvd1; + uint32_t :32; uint32_t guid_hi; @@ -2376,8 +2864,8 @@ uint32_t guid_hi_m :1; uint32_t guid_lo; + uint32_t :31; - uint32_t guid_lo_m :1; uint32_t rsvd[4]; @@ -2405,7 +2893,13 @@ uint32_t :2; uint32_t serdes_m :1; - uint32_t reserved[50]; + uint32_t reserved[22]; + + uint32_t mac_lo :32; + + uint32_t mac_hi :16; + uint32_t :15; + uint32_t mac_m :1; }; #else /* BIG ENDIAN */ struct hermon_hw_mod_stat_cfg_s { @@ -2417,11 +2911,19 @@ uint32_t rx_options :4; uint32_t dife_m :1; uint32_t dife :1; - uint32_t :14; - - uint32_t rsvd0; - - uint32_t rsvd1; + uint32_t :5; + uint32_t log_pg_sz_m :1; + uint32_t log_pg_sz :8; + + uint32_t qdr_tx_opt_m :1; + uint32_t :3; + uint32_t qdr_tx_op :4; + uint32_t qdr_rx_opt_m :1; + uint32_t :3; + uint32_t qdr_rx_op :4; + uint32_t :16; + + uint32_t :32; uint32_t :10; uint32_t port_en_m :1; @@ -2465,13 +2967,20 @@ uint32_t :1; uint32_t inbuf_ind_en :3; - uint32_t reserved[50]; + uint32_t reserved[22]; /* get to new enet stuff */ + + uint32_t mac_m :1; + uint32_t :15; + uint32_t mac_hi :16; + + uint32_t mac_lo :32; }; #endif - /* * Hermon MOD_STAT_CFG input modifier structure + * NOTE: this might end up defined ONLY one way, + * if usage is access via macros */ struct hermon_hw_msg_in_mod_s { #ifdef _LITTLE_ENDIAN @@ -2506,14 +3015,14 @@ #ifdef _LITTLE_ENDIAN struct hermon_hw_udav_s { uint32_t rlid :16; - uint32_t ml_path :7; + uint32_t ml_path :7; /* mlid or SMAC idx */ uint32_t grh :1; uint32_t :8; uint32_t pd :24; uint32_t portnum :2; uint32_t :5; - uint32_t force_lp :1; + uint32_t force_lb :1; uint32_t flow_label :20; uint32_t tclass :8; @@ -2537,7 +3046,7 @@ uint32_t :8; uint32_t grh :1; - uint32_t ml_path :7; + uint32_t ml_path :7; /* mlid or SMAC idx */ uint32_t rlid :16; uint32_t :9; @@ -2557,6 +3066,75 @@ #define HERMON_UDAV_MODIFY_MASK0 0xFCFFFFFFFF000000ULL #define HERMON_UDAV_MODIFY_MASK1 0xFF80F00000000000ULL +/* UDAV for enthernet */ + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_udav_enet_s { + uint32_t :16; + uint32_t smac_idx :7; + uint32_t :9; + + uint32_t pd :24; + uint32_t portnum :2; + uint32_t :3; + uint32_t cv :1; + uint32_t :1; + uint32_t force_lb :1; + + uint32_t flow_label :20; + uint32_t tclass :8; + uint32_t sl :4; + + uint32_t hop_limit :8; + uint32_t max_stat_rate :4; + uint32_t :4; + uint32_t mgid_index :7; + uint32_t :9; + + uint64_t rgid_h; + uint64_t rgid_l; + + uint32_t rsrv[2]; + + uint32_t dmac_lo; + + uint32_t dmac_hi :16; + uint32_t vlan :16; +}; +#else +struct hermon_hw_udav_enet_s { + uint32_t force_lb :1; + uint32_t :1; + uint32_t cv :1; + uint32_t :3; + uint32_t portnum :2; + uint32_t pd :24; + + uint32_t :9; + uint32_t smac_idx :7; + uint32_t :16; + + uint32_t :9; + uint32_t mgid_index :7; + uint32_t :4; + uint32_t max_stat_rate :4; + uint32_t hop_limit :8; + + uint32_t sl :4; + uint32_t tclass :8; + uint32_t flow_label :20; + + uint64_t rgid_h; + uint64_t rgid_l; + + uint32_t rsrv[2]; + + uint32_t vlan :16; + uint32_t dmac_hi :16; + + uint32_t dmac_low; +}; +#endif /* * Hermon Queue Pair Context Table (QPC) entries @@ -2588,45 +3166,165 @@ #ifdef _LITTLE_ENDIAN struct hermon_hw_addr_path_s { uint32_t rlid :16; + uint32_t mlid :7; /* mlid or SMAC idx */ + uint32_t grh :1; + uint32_t cntr_idx :8; + + uint32_t pkey_indx :7; + uint32_t :22; + uint32_t :1; /* but may be used for enet */ + uint32_t cv :1; + uint32_t force_lb :1; + + uint32_t flow_label :20; + uint32_t tclass :8; + uint32_t sniff_s_in :1; + uint32_t sniff_s_out :1; + uint32_t sniff_r_in :1; + uint32_t sniff_r_out :1; /* sniff-rcv-egress */ + + uint32_t hop_limit :8; + uint32_t max_stat_rate :4; + uint32_t :4; + uint32_t mgid_index :7; + uint32_t :1; + uint32_t link_type :3; + uint32_t ack_timeout :5; + + uint64_t rgid_h; + uint64_t rgid_l; + + uint32_t dmac_hi :16; + uint32_t :16; + + uint32_t :8; /* but may be used for enet */ + uint32_t sp :1; + uint32_t :2; + uint32_t fvl :1; + uint32_t fsip :1; + uint32_t fsm :1; + uint32_t :2; + uint32_t vlan_idx :7; + uint32_t :1; + uint32_t sched_q :8; + + uint32_t dmac_lo :32; +}; +#else +struct hermon_hw_addr_path_s { + uint32_t force_lb :1; + uint32_t cv :1; + uint32_t :1; /* but may be used for enet */ + uint32_t :22; + uint32_t pkey_indx :7; + + uint32_t cntr_idx :8; + uint32_t grh :1; + uint32_t mlid :7; /* mlid or SMAC idx */ + uint32_t rlid :16; + + uint32_t ack_timeout :5; + uint32_t link_type :3; + uint32_t :1; + uint32_t mgid_index :7; + uint32_t :4; + uint32_t max_stat_rate :4; + uint32_t hop_limit :8; + + uint32_t sniff_r_out :1; /* sniff-rcv-egress */ + uint32_t sniff_r_in :1; + uint32_t sniff_s_out :1; + uint32_t sniff_s_in :1; + uint32_t tclass :8; + uint32_t flow_label :20; + + uint64_t rgid_h; + uint64_t rgid_l; + + uint32_t sched_q :8; + uint32_t :1; + uint32_t vlan_idx :7; + uint32_t :2; + uint32_t fsm :1; + uint32_t fsip :1; + uint32_t fvl :1; + uint32_t :2; + uint32_t sp :1; + uint32_t :8; /* but may be used for enet */ + + uint32_t :16; + uint32_t dmac_hi :16; + + uint32_t dmac_lo :32; +}; +#endif /* LITTLE ENDIAN */ + +/* The addr path includes RSS fields for RSS QPs */ +#ifdef _LITTLE_ENDIAN +struct hermon_hw_rss_s { + uint32_t rlid :16; uint32_t mlid :7; uint32_t grh :1; uint32_t cntr_idx :8; uint32_t pkey_indx :7; uint32_t :22; - uint32_t :2; /* but may be used for enet */ + uint32_t :1; /* but may be used for enet */ + uint32_t cv :1; uint32_t force_lb :1; uint32_t flow_label :20; uint32_t tclass :8; - uint32_t :4; + uint32_t sniff_s_in :1; + uint32_t sniff_s_out :1; + uint32_t sniff_r_in :1; + uint32_t sniff_r_out :1; /* sniff-rcv-egress */ uint32_t hop_limit :8; uint32_t max_stat_rate :4; uint32_t :4; uint32_t mgid_index :7; - uint32_t :4; + uint32_t :1; + uint32_t link_type :3; uint32_t ack_timeout :5; uint64_t rgid_h; uint64_t rgid_l; - uint32_t :32; /* but may be used for enet */ - - uint32_t :12; /* but may be used for enet */ + uint32_t base_qpn :24; + uint32_t log2_tbl_sz :4; + uint32_t :4; + + uint32_t :8; /* but may be used for enet */ + uint32_t sp :1; + uint32_t :2; + uint32_t fvl :1; uint32_t fsip :1; - uint32_t :3; - uint32_t :7; + uint32_t fsm :1; + uint32_t :2; + uint32_t vlan_idx :7; uint32_t :1; uint32_t sched_q :8; - - uint32_t :32; + uint32_t :2; + uint32_t tcp_ipv6 :1; + uint32_t ipv6 :1; + uint32_t tcp_ipv4 :1; + uint32_t ipv4 :1; + uint32_t :2; + uint32_t hash_fn :2; + uint32_t :22; + + uint32_t default_qpn :24; + uint32_t :8; + + uint8_t rss_key[40]; }; -#else -struct hermon_hw_addr_path_s { +#else /* BIG ENDIAN */ +struct hermon_hw_rss_s { uint32_t force_lb :1; - uint32_t :2; /* but may be used for enet */ + uint32_t cv :1; + uint32_t :1; /* but may be used for enet */ uint32_t :22; uint32_t pkey_indx :7; @@ -2636,13 +3334,17 @@ uint32_t rlid :16; uint32_t ack_timeout :5; - uint32_t :4; + uint32_t link_type :3; + uint32_t :1; uint32_t mgid_index :7; uint32_t :4; uint32_t max_stat_rate :4; uint32_t hop_limit :8; - uint32_t :4; + uint32_t sniff_r_out :1; /* sniff-rcv-egress */ + uint32_t sniff_r_in :1; + uint32_t sniff_s_out :1; + uint32_t sniff_s_in :1; uint32_t tclass :8; uint32_t flow_label :20; @@ -2651,14 +3353,32 @@ uint32_t sched_q :8; uint32_t :1; - uint32_t :7; - uint32_t :3; + uint32_t vlan_idx :7; + uint32_t :2; + uint32_t fsm :1; uint32_t fsip :1; - uint32_t :12; /* but may be used for enet */ - - uint32_t :32; /* but may be used for enet */ - - uint32_t :32; + uint32_t fvl :1; + uint32_t :2; + uint32_t sp :1; + uint32_t :8; /* but may be used for enet */ + + uint32_t :4; + uint32_t log2_tbl_sz :4; + uint32_t base_qpn :24; + + uint32_t :8; + uint32_t default_qpn :24; + + uint32_t :22; + uint32_t hash_fn :2; + uint32_t :2; + uint32_t ipv4 :1; + uint32_t tcp_ipv4 :1; + uint32_t ipv6 :1; + uint32_t tcp_ipv6 :1; + uint32_t :2; + + uint8_t rss_key[40]; }; #endif /* LITTLE ENDIAN */ @@ -2756,7 +3476,7 @@ uint32_t cqn_rcv :24; uint32_t :8; - uint32_t srcd :16; + uint32_t xrcd :16; uint32_t :16; uint32_t :2; @@ -2785,7 +3505,7 @@ uint32_t rmc_parent_qpn :24; uint32_t header_sep :1; - uint32_t inline_scatter :1; /* m/b 0 for srq */ + uint32_t inline_scatter :1; /* m/b 0 for srq */ uint32_t :1; uint32_t rmc_enable :2; uint32_t :2; /* may use one bit for enet */ @@ -2799,7 +3519,22 @@ uint32_t log2_pgsz :6; uint32_t :2; - uint32_t rsvd[12]; /* may/will be used for FCoIB */ + uint32_t exch_base :16; + uint32_t exch_size :4; + uint32_t :12; + + uint32_t vft_vf_id :12; + uint32_t vft_prior :3; + uint32_t :16; + uint32_t ve :1; + + uint32_t :32; + + uint32_t :16; + uint32_t my_fc_id_idx :8; + uint32_t vft_hop_cnt :8; + + uint32_t rsvd[8]; }; #else /* BIG ENDIAN */ struct hermon_hw_qpc_s { @@ -2886,7 +3621,7 @@ uint32_t next_rcv_psn :24; uint32_t :16; - uint32_t srcd :16; + uint32_t xrcd :16; uint32_t :8; uint32_t cqn_rcv :24; @@ -2916,7 +3651,7 @@ uint32_t :2; /* may use one bit for enet */ uint32_t rmc_enable :2; uint32_t :1; - uint32_t inline_scatter :1; /* m/b 0 for srq */ + uint32_t inline_scatter :1; /* m/b 0 for srq */ uint32_t header_sep :1; uint32_t rmc_parent_qpn :24; @@ -2931,7 +3666,22 @@ uint32_t mtt_base_addrl :29; uint32_t :3; - uint32_t rsvd[12]; /* may/will be used for FCoIB */ + uint32_t ve :1; + uint32_t :16; + uint32_t vft_prior :3; + uint32_t vft_vf_id :12; + + uint32_t :12; + uint32_t exch_size :4; + uint32_t exch_base :16; + + uint32_t vft_hop_cnt :8; + uint32_t my_fc_id_idx :8; + uint32_t :16; + + uint32_t :32; + + uint32_t rsvd[8]; }; #endif /* LITTLE ENDIAN */ @@ -2951,7 +3701,11 @@ #define HERMON_QP_RC 0x0 #define HERMON_QP_UC 0x1 #define HERMON_QP_UD 0x3 +#define HERMON_QP_FCMND 0x4 +#define HERMON_QP_FEXCH 0x5 +#define HERMON_QP_XRC 0x6 #define HERMON_QP_MLX 0x7 +#define HERMON_QP_RFCI 0x9 #define HERMON_QP_PMSTATE_MIGRATED 0x3 #define HERMON_QP_PMSTATE_ARMED 0x0 @@ -2999,7 +3753,8 @@ #ifdef _LITTLE_ENDIAN struct hermon_hw_mcg_s { uint32_t member_cnt :24; - uint32_t :8; + uint32_t :6; + uint32_t protocol :2; uint32_t :6; uint32_t next_gid_indx :26; @@ -3015,7 +3770,8 @@ uint32_t next_gid_indx :26; uint32_t :6; - uint32_t :8; + uint32_t protocol :2; + uint32_t :6; uint32_t member_cnt :24; uint32_t :32; @@ -3026,6 +3782,58 @@ }; #endif +#ifdef _LITTLE_ENDIAN +struct hermon_hw_mcg_en_s { + uint32_t member_cnt :24; + uint32_t :6; + uint32_t protocol :2; + + uint32_t :6; + uint32_t next_gid_indx :26; + + uint32_t :32; + uint32_t :32; + + uint32_t vlan_present :1; + uint32_t :31; + + uint32_t :32; + + uint32_t mac_lo :32; + + uint32_t mac_hi :16; + uint32_t vlan_id :12; + uint32_t vlan_cfi :1; + uint32_t vlan_prior :3; + +}; +#else +struct hermon_hw_mcg_en_s { + uint32_t next_gid_indx :26; + uint32_t :6; + + uint32_t protocol :2; + uint32_t :6; + uint32_t member_cnt :24; + + uint32_t :32; + uint32_t :32; + + uint32_t :32; + + uint32_t :31; + uint32_t vlan_present :1; + + uint32_t vlan_prior :3; + uint32_t vlan_cfi :1; + uint32_t vlan_id :12; + uint32_t mac_hi :16; + + uint32_t mac_lo :32; + +}; +#endif + /* Multicast Group Member - QP List entries */ #ifdef _LITTLE_ENDIAN @@ -3047,6 +3855,152 @@ #define HERMON_MCG_QPN_BLOCK_LB 0x40000000 /* + * ETHERNET ONLY Commands + * The follow are new commands, used only for an Ethernet Port + */ + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_set_mcast_fltr_s { + uint32_t mac_lo; + + uint32_t mac_hi :16; + uint32_t :15; + uint32_t sfs :1; +}; +#else /* BIG ENDIAN */ +struct hermon_hw_set_mcast_fltr_s { + uint32_t sfs :1; + uint32_t :15; + uint32_t mac_hi :16; + + uint32_t mac_lo; +}; +#endif + +/* opmod for set_mcast_fltr */ +#define HERMON_SET_MCAST_FLTR_CONF 0x0 +#define HERMON_SET_MCAST_FLTR_DIS 0x1 +#define HERMON_SET_MCAST_FLTR_EN 0x2 + + +/* + * FC Command structures + */ + + + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_config_fc_basic_s { + uint32_t n_p :2; + uint32_t :6; + uint32_t n_v :3; + uint32_t :5; + uint32_t n_m :4; + uint32_t :12; + + uint32_t :16; + uint32_t fexch_base_hi :8; + uint32_t :8; + + uint32_t rfci_base :24; + uint32_t log2_num_rfci :3; + uint32_t :5; + + uint32_t fx_base_mpt_lo :8; + uint32_t :17; + uint32_t fx_base_mpt_hi :7; + + uint32_t fcoe_prom_qpn :24; + uint32_t uint32_t :8; + + uint32_t :32; + + uint32_t rsrv[58]; +}; +#else +struct hermon_hw_config_fc_basic_s { + uint32_t :8; + uint32_t fexch_base_hi :8; + uint32_t :16; + + uint32_t :12; + uint32_t n_m :4; + uint32_t :5; + uint32_t n_v :3; + uint32_t :6; + uint32_t n_p :2; + + uint32_t fx_base_mpt_hi :7; + uint32_t :17; + uint32_t fx_base_mpt_lo :8; + + uint32_t :5; + uint32_t log2_num_rfci :3; + uint32_t rfci_base :24; + + uint32_t :32; + + uint32_t uint32_t :8; + uint32_t fcoe_prom_qpn :24; + + uint32_t rsrv[58]; +}; +#endif + +#define HERMON_HW_FC_PORT_ENABLE 0x0 +#define HERMON_HW_FC_PORT_DISABLE 0x1 +#define HERMON_HW_FC_CONF_BASIC 0x0000 +#define HERMON_HW_FC_CONF_NPORT 0x0100 + +#ifdef _LITTLE_ENDIAN +struct hermon_hw_query_fc_s { + uint32_t :32; + + uint32_t log2_max_rfci :3; + uint32_t :5; + uint32_t log2_max_fexch :5; + uint32_t :3; + uint32_t log2_max_nports :3; + uint32_t :13; + + uint32_t rsrv[62]; +}; +#else +struct hermon_hw_query_fc_s { + uint32_t :13; + uint32_t log2_max_nports :3; + uint32_t :3; + uint32_t log2_max_fexch :5; + uint32_t :5; + uint32_t log2_max_rfci :3; + + uint32_t :32; + + uint32_t rsrv[62]; +}; +#endif + + + + +/* ARM_RQ - limit water mark for srq & rq */ +#ifdef _LITTLE_ENDIAN +struct hermon_hw_arm_req_s { + uint32_t lwm :16; + uint32_t :16; + + uint32_t :32; +}; +#else +struct hermon_hw_arm_req_s { + uint32_t :32; + + uint32_t :16; + uint32_t lwm :16; +}; +#endif + +/* * Structure for getting the peformance counters from the HCA */ @@ -3380,25 +4334,39 @@ struct hermon_hw_snd_wqe_ctrl_s { uint32_t owner :1; - /* NOTE: some/many may be used by enet */ - uint32_t :26; + uint32_t :1; + uint32_t nec :1; + uint32_t :5; + uint32_t fceof :8; + uint32_t :9; + uint32_t rr :1; + uint32_t :1; uint32_t opcode :5; - /* NOTE: some will be used by enet */ - uint32_t :25; + + uint32_t vlan :16; + uint32_t :1; + uint32_t cv :1; + uint32_t :7; uint32_t fence :1; - /* WQE size in octowords */ - uint32_t ds :6; - /* SRC remote buffer if impl */ - uint32_t src_rem_buf :24; + uint32_t ds :6; /* WQE size in octowords */ + + /* + * XRC remote buffer if impl + * XRC 23:0, or DMAC 47:32& 8 bits of pad + */ + uint32_t xrc_rem_buf :24; uint32_t so :1; - uint32_t :1; /* FCoIB only */ + uint32_t fcrc :1; /* fc crc calc */ uint32_t tcp_udp :1; /* Checksumming */ uint32_t ip :1; /* Checksumming */ uint32_t cq_gen :2; /* 00=no cqe, 11= gen cqe */ - /* set means solicit bit in last packet */ + /* s-bit set means solicit bit in last packet */ uint32_t s :1; uint32_t force_lb :1; + /* + * immediate OR invalidation key OR DMAC 31:0 depending + */ uint32_t immediate :32; }; @@ -3410,6 +4378,96 @@ }; +struct hermonw_hw_fcp3_ctrl_s { + uint32_t owner :1; + uint32_t :1; + uint32_t nec :1; + uint32_t :24; + uint32_t opcode :5; + + uint32_t :24; + uint32_t sit :1; + uint32_t :1; + uint32_t ds :6; + + uint32_t seq_id :8; + uint32_t info :4; + uint32_t :3; + uint32_t ls :1; + uint32_t :8; + uint32_t so :1; + uint32_t :3; + uint32_t cq_gen :2; + uint32_t :2; + + uint32_t param :32; +}; + +struct hermon_hw_fcp3_init_s { + uint32_t :8; + uint32_t pe :1; + uint32_t :23; + + uint32_t csctl_prior :8; + uint32_t seqid_tx :8; + uint32_t :6; + uint32_t mtu :10; + + uint32_t rem_id :24; + uint32_t abort :2; + uint32_t :1; + uint32_t op :2; + uint32_t :1; + uint32_t org :1; + uint32_t :1; + + uint32_t rem_exch :16; + uint32_t loc_exch_idx :16; +}; + +struct hermon_hw_fcmd_o_enet_s { + uint32_t :4; + uint32_t stat_rate :4; + uint32_t :24; + + uint32_t :32; + + uint32_t :16; + uint32_t dmac_hi :16; + + uint32_t dmac_lo :32; +}; + +struct hermon_hw_fcmd_o_ib_s { + uint32_t :32; + + uint32_t :8; + uint32_t grh :1; + uint32_t :7; + uint32_t rlid :16; + + uint32_t :20; + uint32_t stat_rate :4; + uint32_t hop_limit :8; + + uint32_t sl :4; + uint32_t tclass :8; + uint32_t flow_label :20; + + uint64_t rgid_hi; + + uint64_t rgid_lo; + + uint32_t :8; + uint32_t rqp :24; + + uint32_t rsrv[3]; +}; + + + + + #define HERMON_WQE_SEND_FENCE_MASK 0x40 #define HERMON_WQE_SEND_NOPCODE_NOP 0x00 @@ -3418,6 +4476,7 @@ #define HERMON_WQE_SEND_NOPCODE_RDMAWI 0x9 #define HERMON_WQE_SEND_NOPCODE_SEND 0xA #define HERMON_WQE_SEND_NOPCODE_SENDI 0xB +#define HERMON_WQE_SEND_NOPCODE_INIT_AND_SEND 0xD #define HERMON_WQE_SEND_NOPCODE_LSO 0xE #define HERMON_WQE_SEND_NOPCODE_RDMAR 0x10 #define HERMON_WQE_SEND_NOPCODE_ATMCS 0x11 @@ -3429,6 +4488,9 @@ #define HERMON_WQE_SEND_NOPCODE_LCL_INV 0x1B #define HERMON_WQE_SEND_NOPCODE_CONFIG 0x1F /* for ccq only */ +#define HERMON_WQE_FCP_OPCODE_INIT_AND_SEND 0xD +#define HERMON_WQE_FCP_OPCODE_INIT_FEXCH 0xC + #define HERMON_WQE_SEND_SIGNALED_MASK 0x0000000C00000000ull #define HERMON_WQE_SEND_SOLICIT_MASK 0x0000000200000000ull #define HERMON_WQE_SEND_IMMEDIATE_MASK 0x0000000100000000ull @@ -3438,9 +4500,13 @@ uint32_t :8; uint32_t dest_qp :24; + uint32_t qkey :32; - uint32_t :32; - uint32_t :32; + + uint32_t vlan :16; + uint32_t dmac_hi :16; + + uint32_t dmac_lo :32; }; #define HERMON_WQE_SENDHDR_UD_AV_MASK 0xFFFFFFFFFFFFFFE0ull #define HERMON_WQE_SENDHDR_UD_DQPN_MASK 0xFFFFFF @@ -3466,6 +4532,12 @@ #define HERMON_WQE_SENDHDR_BIND_WR 0x4000000000000000ull #define HERMON_WQE_SENDHDR_BIND_RD 0x2000000000000000ull +struct hermon_hw_snd_wqe_lso_s { + uint32_t mss :16; + uint32_t :6; + uint32_t hdr_size :10; +}; + struct hermon_hw_snd_wqe_remaddr_s { uint64_t vaddr; uint32_t rkey; @@ -3484,8 +4556,6 @@ uint64_t cmpmask; }; - - struct hermon_hw_snd_wqe_local_inv_s { uint32_t :6; uint32_t atc_shoot :1; @@ -3495,16 +4565,25 @@ uint32_t mkey; + uint32_t rsrv0; + + uint32_t rsrv1; uint32_t :25; uint32_t guest_id :7; /* for atc shootdown */ - uint32_t rsrv0[6]; - uint32_t p_addrh; uint32_t p_addrl :23; uint32_t :9; }; +struct hermon_hw_snd_rem_addr_s { + uint64_t rem_vaddr; + + uint32_t rkey; + uint32_t rsrv; +}; + + struct hermon_hw_snd_wqe_frwr_s { uint32_t rem_atomic :1; uint32_t rem_write :1; @@ -3513,7 +4592,8 @@ uint32_t loc_read :1; uint32_t fbo_en :1; uint32_t len_64 :1; - uint32_t :3; /* but some for FCoIB */ + uint32_t :2; + uint32_t dif :1; /* FCoIB */ uint32_t bind_en :1; uint32_t blk_pg_mode :1; uint32_t mtt_rep :4; @@ -3521,10 +4601,7 @@ uint32_t mkey; /* swapped w/ addrh relative to arbel */ - uint32_t pbl_addrh; - - uint32_t pbl_addrl :26; - uint32_t :6; + uint64_t pbl_addr; uint64_t start_addr; @@ -3539,15 +4616,25 @@ uint32_t rsrv0[2]; }; -/* - * NOTE: Some hermon-PRM defined Send WQE segments are not defined here - * because they will not be used initially: they should be added and - * used later on: - * FCP-3 init - * FCP-3 Control - * Large Send Offload - * - */ +struct hermon_hw_snd_wqe_frwr_ext_s { + uint32_t dif_in_mem :1; + uint32_t dif_on_wire :1; + uint32_t valid_ref :1; + uint32_t valid_crc :1; + uint32_t repl_ref_tag :1; + uint32_t repl_app_tag :1; + uint32_t :10; + uint32_t app_mask :16; + + uint32_t wire_app_tag :16; + uint32_t mem_app_tag :16; + + uint32_t wire_ref_tag_base; + + uint32_t mem_ref_tag_base; +}; + + /* * Hermon "MLX transport" Work Queue Element (WQE) @@ -3704,7 +4791,7 @@ tmp[1] = htonll((uint64_t)(wr_rdma)->rdma_rkey << 32); \ } -#define HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr) \ +#define HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr) \ { \ uint64_t *tmp; \ \ @@ -3713,7 +4800,7 @@ tmp[1] = htonll((uint64_t)(wr)->wr.rc.rcwr.atomic->atom_rkey << 32); \ } -#define HERMON_WQE_BUILD_ATOMIC(qp, at, wr_atom) \ +#define HERMON_WQE_BUILD_ATOMIC(qp, at, wr_atom) \ { \ uint64_t *tmp; \ \ @@ -3722,7 +4809,7 @@ tmp[1] = htonll((wr_atom)->atom_arg1); \ } -#define HERMON_WQE_BUILD_BIND(qp, bn, wr_bind) \ +#define HERMON_WQE_BUILD_BIND(qp, bn, wr_bind) \ { \ uint64_t *tmp; \ uint64_t bn0_tmp; \ @@ -3743,16 +4830,67 @@ tmp[3] = htonll((wr_bind)->bind_len); \ } -#define HERMON_WQE_BUILD_DATA_SEG_RECV(ds, sgl) \ +#define HERMON_WQE_BUILD_FRWR(qp, frwr_arg, pmr_arg) \ +{ \ + ibt_mr_flags_t flags; \ + ibt_lkey_t lkey; \ + ibt_wr_reg_pmr_t *pmr = (pmr_arg); \ + uint64_t *frwr64 = (uint64_t *)(frwr_arg); \ + \ + flags = pmr->pmr_flags; \ + ((uint32_t *)frwr64)[0] = htonl(0x08000000 | \ + ((flags & IBT_MR_ENABLE_REMOTE_ATOMIC) ? 0x80000000 : 0) | \ + ((flags & IBT_MR_ENABLE_REMOTE_WRITE) ? 0x40000000 : 0) | \ + ((flags & IBT_MR_ENABLE_REMOTE_READ) ? 0x20000000 : 0) | \ + ((flags & IBT_MR_ENABLE_LOCAL_WRITE) ? 0x10000000 : 0) | \ + ((flags & IBT_MR_ENABLE_WINDOW_BIND) ? 0x00200000 : 0)); \ + lkey = (pmr->pmr_lkey & ~0xff) | pmr->pmr_key; \ + pmr->pmr_rkey = pmr->pmr_lkey = lkey; \ + ((uint32_t *)frwr64)[1] = htonl(lkey); \ + frwr64[1] = htonll(pmr->pmr_addr_list->p_laddr); \ + frwr64[2] = htonll(pmr->pmr_iova); \ + frwr64[3] = htonll(pmr->pmr_len); \ + ((uint32_t *)frwr64)[8] = htonl(pmr->pmr_offset); \ + ((uint32_t *)frwr64)[9] = htonl(pmr->pmr_buf_sz); \ + frwr64[5] = 0; \ +} + +#define HERMON_WQE_BUILD_LI(qp, li_arg, wr_li) \ +{ \ + uint64_t *li64 = (uint64_t *)(void *)(li_arg); \ + \ + li64[0] = 0; \ + ((uint32_t *)li64)[2] = htonl((wr_li)->li_rkey); \ + ((uint32_t *)li64)[3] = 0; \ + li64[2] = 0; \ + li64[3] = 0; \ +} + +#define HERMON_WQE_BUILD_FCP3_INIT(ds, fctl, cs_pri, seq_id, mtu, \ + dest_id, op, rem_exch, local_exch_idx) \ +{ \ + uint32_t *fc_init; \ + \ + fc_init = (uint32_t *)ds; \ + fc_init[1] = htonl((cs_pri) << 24 | (seq_id) << 16 | (mtu)); \ + fc_init[2] = htonl((dest_id) << 8 | \ + IBT_FCTL_GET_ABORT_FIELD(fctl) << 6 | (op) << 3 | 0x2); \ + fc_init[3] = htonl((rem_exch) << 16 | (local_exch_idx)); \ + membar_producer(); /* fc_init[0] is where the stamping is */ \ + fc_init[0] = htonl(((fctl) & IBT_FCTL_PRIO) << 6); \ +} + +#define HERMON_WQE_BUILD_DATA_SEG_RECV(ds, sgl) \ { \ uint64_t *tmp; \ \ tmp = (uint64_t *)(ds); \ - tmp[0] = htonll((((uint64_t)((sgl)->ds_len & \ + tmp[0] = htonll((((uint64_t)((sgl)->ds_len & \ HERMON_WQE_SGL_BYTE_CNT_MASK) << 32) | (sgl)->ds_key)); \ - tmp[1] = htonll((sgl)->ds_va); \ + tmp[1] = htonll((sgl)->ds_va); \ } -#define HERMON_WQE_BUILD_DATA_SEG_SEND(ds, sgl) \ + +#define HERMON_WQE_BUILD_DATA_SEG_SEND(ds, sgl) \ { \ ((uint64_t *)(ds))[1] = htonll((sgl)->ds_va); \ ((uint32_t *)(ds))[1] = htonl((sgl)->ds_key); \ @@ -3764,17 +4902,18 @@ #define HERMON_WQE_BUILD_INLINE(qp, ds, sz) \ *(uint32_t *)(ds) = htonl(HERMON_WQE_SGL_INLINE_MASK | (sz)) -#define HERMON_WQE_BUILD_INLINE_ICRC(qp, ds, sz, icrc) \ +#define HERMON_WQE_BUILD_INLINE_ICRC(qp, ds, sz, icrc) \ { \ uint32_t *tmp; \ \ tmp = (uint32_t *)(ds); \ + tmp[1] = htonl(icrc); \ + membar_producer(); \ tmp[0] = htonl(HERMON_WQE_SGL_INLINE_MASK | (sz)); \ - tmp[1] = htonl(icrc); \ } #define HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, \ - imm, sol, sig, ip_cksum, qp) \ + imm, sol, sig, cksum, qp, strong, fccrc) \ { \ uint32_t *tmp; \ uint32_t cntr_tmp; \ @@ -3783,11 +4922,7 @@ tmp = (uint32_t *)desc; \ cntr_tmp = (fence << 6) | desc_sz; \ tmp[1] = ntohl(cntr_tmp); \ - cntr_tmp = 0; \ - if ((sol) != 0) cntr_tmp |= 0x02; \ - if ((sig) != 0) cntr_tmp |= 0x0C; \ - /*LINTED*/ \ - if (ip_cksum) cntr_tmp |= 0x30; \ + cntr_tmp = strong | fccrc | sol | sig | cksum; \ tmp[2] = ntohl(cntr_tmp); \ tmp[3] = ntohl(imm); \ } @@ -3804,13 +4939,11 @@ cntr_tmp |= HERMON_WQE_SEND_NOPCODE_SEND; \ tmp[0] = ntohl(cntr_tmp); \ tmp[1] = ntohl(desc_sz); \ - cntr_tmp = ((maxstat << 4) | (sl & 0xff)) << 8; \ + cntr_tmp = (((maxstat << 4) | (sl & 0xff)) << 8) | sig; \ if (qp->qp_is_special == HERMON_QP_SMI) \ cntr_tmp |= (0x02 << 16); \ if (lid == IB_LID_PERMISSIVE) \ cntr_tmp |= (0x01 << 16); \ - if ((sig) != 0) \ - cntr_tmp |= 0xC; \ tmp[2] = ntohl(cntr_tmp); \ tmp[3] = ntohl((lid) << 16); \ } @@ -3851,7 +4984,7 @@ * Also note: Filling in the GIDs in the way we do below is helpful because * it avoids potential alignment restrictions and/or conflicts. */ -#define HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen) \ +#define HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen) \ { \ uint32_t *tmp; \ uint32_t grh_tmp; \ @@ -3877,7 +5010,7 @@ bcopy(&(udav)->rgid_h, &tmp[6], sizeof (ib_gid_t)); \ } -#define HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr) \ +#define HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr) \ { \ uint32_t *tmp; \ uint32_t bth_tmp; \ @@ -3900,7 +5033,7 @@ tmp[2] = 0x0; \ } -#define HERMON_WQE_BUILD_MLX_DETH(deth, qp) \ +#define HERMON_WQE_BUILD_MLX_DETH(deth, qp) \ { \ uint32_t *tmp; \ \ @@ -3916,10 +5049,6 @@ } - - - - /* * Flash interface: * Below we have PCI config space space offsets for flash interface
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_misc.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_misc.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_MISC_H @@ -566,56 +565,61 @@ * Mellanox FMR */ typedef struct hermon_fmr_list_s { - avl_node_t fmr_avlnode; struct hermon_fmr_list_s *fmr_next; hermon_mrhdl_t fmr; - ibt_pmr_desc_t fmr_desc; hermon_fmrhdl_t fmr_pool; - uint_t fmr_refcnt; uint_t fmr_remaps; - uint_t fmr_in_cache; + uint_t fmr_remap_gen; /* generation */ } hermon_fmr_list_t; struct hermon_sw_fmr_s { hermon_state_t *fmr_state; kmutex_t fmr_lock; - ddi_taskq_t *fmr_taskq; + hermon_fmr_list_t *fmr_free_list; + hermon_fmr_list_t **fmr_free_list_tail; + int fmr_free_len; + int fmr_pool_size; + int fmr_max_pages; + int fmr_flags; + int fmr_stat_register; ibt_fmr_flush_handler_t fmr_flush_function; void *fmr_flush_arg; - int fmr_pool_size; - int fmr_max_pages; + int fmr_max_remaps; + uint_t fmr_remap_gen; /* generation */ int fmr_page_sz; + + kmutex_t remap_lock; + hermon_fmr_list_t *fmr_remap_list; + hermon_fmr_list_t **fmr_remap_list_tail; + int fmr_remap_watermark; + int fmr_remap_len; + + kmutex_t dirty_lock; + hermon_fmr_list_t *fmr_dirty_list; + hermon_fmr_list_t **fmr_dirty_list_tail; int fmr_dirty_watermark; int fmr_dirty_len; - int fmr_flags; - - hermon_fmr_list_t *fmr_free_list; - hermon_fmr_list_t *fmr_dirty_list; - - int fmr_cache; - avl_tree_t fmr_cache_avl; - kmutex_t fmr_cachelock; }; _NOTE(MUTEX_PROTECTS_DATA(hermon_sw_fmr_s::fmr_lock, - hermon_sw_fmr_s::fmr_state hermon_sw_fmr_s::fmr_pool_size - hermon_sw_fmr_s::fmr_max_pages hermon_sw_fmr_s::fmr_page_sz + hermon_sw_fmr_s::fmr_flags + hermon_sw_fmr_s::fmr_free_list)) +_NOTE(MUTEX_PROTECTS_DATA(hermon_sw_fmr_s::dirty_lock, hermon_sw_fmr_s::fmr_dirty_watermark hermon_sw_fmr_s::fmr_dirty_len - hermon_sw_fmr_s::fmr_flags - hermon_sw_fmr_s::fmr_free_list - hermon_sw_fmr_s::fmr_dirty_list - hermon_sw_fmr_s::fmr_cache)) + hermon_sw_fmr_s::fmr_dirty_list)) +_NOTE(DATA_READABLE_WITHOUT_LOCK(hermon_sw_fmr_s::fmr_remap_gen + hermon_sw_fmr_s::fmr_state + hermon_sw_fmr_s::fmr_max_pages + hermon_sw_fmr_s::fmr_max_remaps)) -_NOTE(MUTEX_PROTECTS_DATA(hermon_sw_fmr_s::fmr_cachelock, - hermon_sw_fmr_s::fmr_cache_avl)) - -#define HERMON_FMR_MAX_REMAPS 128 +/* FRWR guarantees 8 bits of key; avoid corner cases by using "-2" */ +#define HERMON_FMR_MAX_REMAPS (256 - 2) /* Hermon doorbell record routines */
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_mr.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_mr.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_MR_H @@ -61,7 +60,7 @@ * MTTs per MPT. We also define a log MTT size, since it's not likely * to change. */ -#define HERMON_NUM_MTT_SHIFT 0x1a +#define HERMON_NUM_MTT_SHIFT 0x1d #define HERMON_MTT_SIZE_SHIFT 0x3 /* @@ -172,6 +171,7 @@ #define HERMON_BINDHDL_VADDR 1 #define HERMON_BINDHDL_BUF 2 #define HERMON_BINDHDL_UBUF 3 +#define HERMON_BINDHDL_LKEY 4 /* * The hermon_sw_mr_s structure is also referred to using the "hermon_mrhdl_t" @@ -288,6 +288,8 @@ ibc_mem_alloc_s::ibc_dma_hdl ibc_mem_alloc_s::ibc_acc_hdl)) +int hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pdhdl, + ibt_dmr_attr_t *attr_p, hermon_mrhdl_t *mrhdl); int hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pdhdl, ibt_mr_attr_t *attr_p, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type); @@ -324,8 +326,12 @@ int hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl); int hermon_mr_register_physical_fmr(hermon_state_t *state, ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p); -int hermon_mr_invalidate_fmr(hermon_state_t *state, hermon_mrhdl_t mr); -int hermon_mr_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr); +int hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd, + ibt_lkey_flags_t flags, uint_t sz, hermon_mrhdl_t *mr); +int hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd, + uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep); +int hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd, + uint32_t mpt_indx, uint_t sleep); #ifdef __cplusplus
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_qp.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_qp.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_QP_H @@ -55,9 +54,11 @@ * controlled via the "hermon_log_num_qp" configuration variables. * We also have a define for the minimum size of a QP. QPs allocated * with size 0, 1, 2, or 3 will always get back a QP of size 4. + * + * Note: Increasing #QPs from 64K to 256K for reserved ranges for FCoIB. */ -#define HERMON_NUM_QP_SHIFT 0x10 -#define HERMON_NUM_QPS (1 << HERMON_NUM_QP_SHIFT) /* 65,536 */ +#define HERMON_NUM_QP_SHIFT 0x12 +#define HERMON_NUM_QPS (1 << HERMON_NUM_QP_SHIFT) /* 256K */ #define HERMON_QP_MIN_SIZE 0xf /* @@ -80,7 +81,7 @@ * as recommended by the PRM. All XRC QPs will have this bit set. */ #define HERMON_QP_MAXNUMBER_MSK 0x7FFFFF -#define HERMON_QP_XRC 0x800000 +#define HERMON_QP_XRC_MSK 0x800000 /* * This define and the following macro are used to find a schedule queue for @@ -134,8 +135,10 @@ * to ensure the types match. */ #define HERMON_QP_TYPE_VALID(qp_trans, qp_serv) \ - ((qp_trans == IBT_UD_SRV && qp_serv == HERMON_QP_UD) || \ - (qp_trans == IBT_RC_SRV && qp_serv == HERMON_QP_RC) || \ + ((qp_trans == IBT_RC_SRV && qp_serv == HERMON_QP_RC) || \ + (qp_trans == IBT_UD_SRV && (qp_serv == HERMON_QP_UD || \ + qp_serv == HERMON_QP_RFCI || qp_serv == HERMON_QP_FCMND || \ + qp_serv == HERMON_QP_FEXCH)) || \ (qp_trans == IBT_UC_SRV && qp_serv == HERMON_QP_UC)) /* @@ -165,6 +168,17 @@ /* + * The hermon_qp_range_t is used to manage a qp_range for RSS and FEXCH. + * It has a reference count. When the reference count goes to 0, + * the qpc resource can be freed. + */ +typedef struct hermon_qp_range_s { + kmutex_t hqpr_lock; + hermon_rsrc_t *hqpr_qpcrsrc; + uint_t hqpr_refcnt; +} hermon_qp_range_t; + +/* * The hermon_qp_info_t structure is used internally by the Hermon driver to * pass information to and from the hermon_qp_alloc() and * hermon_special_qp_alloc() routines. It contains placeholders for all of the @@ -231,11 +245,12 @@ uint32_t qp_qpnum; hermon_pdhdl_t qp_pdhdl; uint_t qp_serv_type; + ibt_qp_type_t qp_type; uint_t qp_sl; /* service level */ hermon_mrhdl_t qp_mrhdl; uint_t qp_sq_sigtype; uint_t qp_is_special; - uint_t qp_is_umap; + ibt_qp_alloc_flags_t qp_alloc_flags; uint32_t qp_uarpg; devmap_cookie_t qp_umap_dhp; uint_t qp_portnum; /* port 0/1 for HCA */ @@ -260,9 +275,9 @@ uint32_t qp_sq_sgl; uint_t qp_uses_lso; uint32_t qp_ring; + uint_t qp_state_for_post_send; /* copy of qp_state */ /* Receive Work Queue - not used when SRQ is used */ - kmutex_t qp_rq_lock; hermon_cqhdl_t qp_rq_cqhdl; hermon_workq_avl_t qp_rq_wqavl; /* needed for srq */ hermon_workq_hdr_t *qp_rq_wqhdr; @@ -290,7 +305,6 @@ /* Shared Receive Queue */ hermon_srqhdl_t qp_srqhdl; - uint_t qp_srq_en; /* Refcnt of QP belongs to an MCG */ uint_t qp_mcg_refcnt; @@ -301,6 +315,12 @@ struct hermon_qalloc_info_s qp_wqinfo; + ibt_fc_attr_t qp_fc_attr; + + struct hermon_qp_range_s *qp_rangep; + + /* Beware: 8-byte alignment needed here */ + struct hermon_hw_qpc_s qpc; }; _NOTE(READ_ONLY_DATA(hermon_sw_qp_s::qp_qpnum @@ -324,7 +344,7 @@ hermon_sw_qp_s::qp_sq_sigtype hermon_sw_qp_s::qp_serv_type hermon_sw_qp_s::qp_is_special - hermon_sw_qp_s::qp_is_umap + hermon_sw_qp_s::qp_alloc_flags hermon_sw_qp_s::qp_uarpg hermon_sw_qp_s::qp_sq_wqhdr hermon_sw_qp_s::qp_rq_wqhdr @@ -341,12 +361,19 @@ hermon_sw_qp_s::qp_pkeyindx hermon_sw_qp_s::qp_portnum)) +#define HERMON_SET_QP_POST_SEND_STATE(qp, state) \ + mutex_enter(&qp->qp_sq_lock); \ + qp->qp_state_for_post_send = state; \ + mutex_exit(&qp->qp_sq_lock) /* Defined in hermon_qp.c */ int hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo, uint_t sleepflag); int hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo, uint_t sleepflag); +int hermon_qp_alloc_range(hermon_state_t *state, uint_t log2, + hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibtl_qp_p, ibc_cq_hdl_t *send_cq_p, + ibc_cq_hdl_t *recv_cq_p, hermon_qphdl_t *qp_p, uint_t sleepflag); int hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl, ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh, uint_t sleepflag); int hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qphdl,
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_rsrc.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_rsrc.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_RSRC_H @@ -166,6 +165,10 @@ HERMON_UARPG, HERMON_INTR_IN_MBOX, HERMON_INTR_OUT_MBOX, /* type 0x1B */ + HERMON_QPC_FEXCH_PORT1, + HERMON_QPC_FEXCH_PORT2, + HERMON_QPC_RFCI_PORT1, + HERMON_QPC_RFCI_PORT2, HERMON_NUM_RESOURCES } hermon_rsrc_type_t; @@ -374,6 +377,9 @@ void hermon_rsrc_fini(hermon_state_t *state, hermon_rsrc_cleanup_level_t clean); +/* Exporting resource reservation capabilitity to FCoIB */ +int hermon_rsrc_reserve(hermon_state_t *state, hermon_rsrc_type_t rsrc, + uint_t num, uint_t sleepflag, hermon_rsrc_t **hdl); #ifdef __cplusplus }
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_typedef.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_typedef.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_HERMON_TYPEDEF_H @@ -57,6 +56,17 @@ typedef struct hermon_hw_querydevlim_s hermon_hw_querydevlim_t; typedef struct hermon_hw_query_port_s hermon_hw_query_port_t; typedef struct hermon_hw_set_port_s hermon_hw_set_port_t; +typedef struct hermon_hw_set_port_en_s hermon_hw_set_port_en_t; +typedef struct hermon_hw_set_port_en_rqpn_s hermon_hw_set_port_en_rqpn_t; +typedef struct hermon_hw_set_port_en_mact_s hermon_hw_set_port_en_mact_t; +typedef struct hermon_hw_set_port_en_vlant_s hermon_hw_set_port_en_vlant_t; +typedef struct hermon_hw_set_port_en_priot_s hermon_hw_set_port_en_priot_t; +typedef struct hermon_fw_set_port_gidtable_s hermon_fw_set_port_gidtable_t; +typedef struct hermon_hw_set_mcast_fltr_s hermon_hw_set_mcast_fltr_t; +typedef struct hermon_hw_arm_req_s hermon_hw_arm_req_t; +typedef struct hermon_hw_config_fc_basic_s hermon_hw_config_fc_basic_t; +typedef struct hermon_hw_query_fc_s hermon_hw_query_fc_t; + typedef struct hermon_hw_queryfw_s hermon_hw_queryfw_t; typedef struct hermon_hw_queryadapter_s hermon_hw_queryadapter_t; typedef struct hermon_hw_initqueryhca_s hermon_hw_initqueryhca_t; @@ -73,20 +83,32 @@ typedef struct hermon_hw_mod_stat_cfg_s hermon_hw_mod_stat_cfg_t; typedef struct hermon_hw_msg_in_mod_s hermon_hw_msg_in_mod_t; typedef struct hermon_hw_udav_s hermon_hw_udav_t; +typedef struct hermon_hw_udav_enet_s hermon_hw_udav_enet_t; typedef struct hermon_hw_qpc_s hermon_hw_qpc_t; typedef struct hermon_hw_mcg_s hermon_hw_mcg_t; +typedef struct hermon_hw_mcg_en_s hermon_hw_mcg_en_t; typedef struct hermon_hw_mcg_qp_list_s hermon_hw_mcg_qp_list_t; typedef struct hermon_hw_sm_perfcntr_s hermon_hw_sm_perfcntr_t; typedef struct hermon_hw_sm_extperfcntr_s hermon_hw_sm_extperfcntr_t; -typedef struct hermon_hw_snd_wqe_ctrl_s hermon_hw_snd_wqe_ctrl_t; -typedef struct hermon_hw_srq_wqe_next_s hermon_hw_srq_wqe_next_t; -typedef struct hermon_hw_snd_wqe_ud_s hermon_hw_snd_wqe_ud_t; -typedef struct hermon_hw_snd_wqe_bind_s hermon_hw_snd_wqe_bind_t; + +typedef struct hermon_hw_snd_wqe_ud_s hermon_hw_snd_wqe_ud_t; +typedef struct hermon_hw_snd_wqe_bind_s hermon_hw_snd_wqe_bind_t; typedef struct hermon_hw_snd_wqe_remaddr_s hermon_hw_snd_wqe_remaddr_t; typedef struct hermon_hw_snd_wqe_atomic_s hermon_hw_snd_wqe_atomic_t; +typedef struct hermon_hw_snd_wqe_frwr_s hermon_hw_snd_wqe_frwr_t; +typedef struct hermon_hw_snd_wqe_frwr_ext_s hermon_hw_snd_wqe_frwr_ext_t; +typedef struct hermon_hw_snd_wqe_local_inv_s hermon_hw_snd_wqe_local_inv_t; +typedef struct hermon_hw_snd_rem_addr_s hermon_hw_snd_rem_addr_t; +typedef struct hermon_sw_send_wqe_lso_s hermon_sw_send_wqe_lso_t; typedef struct hermon_hw_mlx_wqe_nextctrl_s hermon_hw_mlx_wqe_nextctrl_t; typedef struct hermon_hw_rcv_wqe_nextctrl_s hermon_hw_rcv_wqe_nextctrl_t; typedef struct hermon_hw_wqe_sgl_s hermon_hw_wqe_sgl_t; +typedef struct hermon_hw_snd_wqe_ctrl_s hermon_hw_snd_wqe_ctrl_t; +typedef struct hermon_hw_srq_wqe_next_s hermon_hw_srq_wqe_next_t; +typedef struct hermonw_hw_fcp3_ctrl_s hermonw_hw_fcp3_ctrl_t; +typedef struct hermon_hw_fcp3_init_s hermon_hw_fcp3_init_t; +typedef struct hermon_hw_fcmd_o_enet_s hermon_hw_fcmd_o_enet_t; +typedef struct hermon_hw_fcmd_o_ib_s hermon_hw_fcmd_o_ib_t; typedef struct hermon_sw_mr_s *hermon_mrhdl_t; typedef struct hermon_sw_mr_s *hermon_mwhdl_t;
--- a/usr/src/uts/common/sys/ib/adapters/tavor/tavor_mr.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/adapters/tavor/tavor_mr.h Thu Jul 29 22:10:26 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_ADAPTERS_TAVOR_MR_H @@ -307,6 +306,8 @@ #define TAVOR_BINDMEM_NORMAL 1 #define TAVOR_BINDMEM_BYPASS 0 +int tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl, + ibt_dmr_attr_t *attr_p, tavor_mrhdl_t *mrhdl); int tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl, ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); int tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pdhdl,
--- a/usr/src/uts/common/sys/ib/clients/of/rdma/ib_verbs.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/clients/of/rdma/ib_verbs.h Thu Jul 29 22:10:26 2010 -0700 @@ -721,15 +721,14 @@ * @cq_context: Context associated with the CQ returned to the user via * the associated completion and event handlers. * @cqe: The minimum size of the CQ. - * @comp_vector - Completion vector used to signal completion events. - * Must be >= 0 and < context->num_comp_vectors. + * @comp_vector - Completion queue sched handle. * * Users can examine the cq structure to determine the actual CQ size. */ struct ib_cq *ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), - void *cq_context, int cqe, int comp_vector); + void *cq_context, int cqe, void *comp_vector); /* * ib_destroy_cq - Destroys the specified CQ.
--- a/usr/src/uts/common/sys/ib/ibtl/ibci.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibci.h Thu Jul 29 22:10:26 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_IBTL_IBCI_H @@ -64,6 +63,7 @@ typedef struct ibc_eec_s *ibc_eec_hdl_t; /* End-to-End Context Handle */ typedef struct ibc_mem_alloc_s *ibc_mem_alloc_hdl_t; /* Memory Handle */ +#define ibc_sched_hdl_t ibt_sched_hdl_t /* CQ Sched Handle */ #define ibc_fmr_pool_hdl_t ibt_fmr_pool_hdl_t /* FMR Pool Handle */ #define ibc_mr_hdl_t ibt_mr_hdl_t /* Memory Region Handle */ #define ibc_mw_hdl_t ibt_mw_hdl_t /* Memory Window Handle */ @@ -142,11 +142,11 @@ /* Channel Interface version */ -typedef enum ibc_version_e { - IBCI_V1 = 1, - IBCI_V2 = 2, /* FMR Support */ - IBCI_V3 = 3 -} ibc_version_t; +typedef int ibc_version_t; +#define IBCI_V1 1 +#define IBCI_V2 2 +#define IBCI_V3 3 +#define IBCI_V4 4 typedef enum ibc_free_qp_flags_e { @@ -176,24 +176,6 @@ IBT_EEC_DEFER_ALLOC = (1 << 1) } ibc_eec_flags_t; - -/* - * Completion Queues - * - */ - -/* - * CQ handler attribute structure. - */ -typedef struct ibc_cq_handler_attr_s { - ibt_cq_handler_id_t h_id; /* Valid ID != NULL */ - int h_flags; /* Flags of ddi_intr_get_cap */ - int h_pri; /* priority from */ - /* ddi_intr_get_pri */ - void *h_bind; /* unknown intrd stuff */ -} ibc_cq_handler_attr_t; - - /* * Event data for asynchronous events and errors. The QP/EEC/CQ/SRQ handle, * or port number associated with the Event/Error is passed as an argument @@ -207,6 +189,7 @@ ibt_srq_hdl_t ev_srq_hdl; /* SRQ handle */ ibt_port_change_t ev_port_flags; /* Port Change flags */ uint8_t ev_port; /* For PORT UP/DOWN/CHANGE events */ + ibt_fc_syndrome_t ev_fc; /* FEXCH syndrome */ } ibc_async_event_t; @@ -272,9 +255,11 @@ ibt_status_t (*ibc_modify_cq)(ibc_hca_hdl_t hca, ibc_cq_hdl_t cq, uint_t count, uint_t usec, ibt_cq_handler_id_t hid); ibt_status_t (*ibc_alloc_cq_sched)(ibc_hca_hdl_t hca, - ibt_cq_sched_flags_t flags, ibc_cq_handler_attr_t *handler_attrs_p); + ibt_cq_sched_attr_t *attr, ibc_sched_hdl_t *sched_hdl_p); ibt_status_t (*ibc_free_cq_sched)(ibc_hca_hdl_t hca, - ibt_cq_handler_id_t id); + ibc_sched_hdl_t sched_hdl); + ibt_status_t (*ibc_query_cq_handler_id)(ibc_hca_hdl_t hca, + ibt_cq_handler_id_t hid, ibt_cq_handler_attr_t *attrs); /* EE Context */ ibt_status_t (*ibc_alloc_eec)(ibc_hca_hdl_t hca, ibc_eec_flags_t flags, @@ -409,6 +394,17 @@ ibt_status_t (*ibc_free_xrc_tgt_qp)(); ibt_status_t (*ibc_query_xrc_tgt_qp)(); ibt_status_t (*ibc_modify_xrc_tgt_qp)(); + + /* DMA memory region */ + ibt_status_t (*ibc_register_dma_mr)(ibc_hca_hdl_t hca, ibc_pd_hdl_t pd, + ibt_dmr_attr_t *attr_p, void *ibtl_reserved, ibc_mr_hdl_t *mr_p, + ibt_mr_desc_t *mem_desc); + + /* OPS extensions for next round of enhancements */ + ibt_status_t (*ibc_enhancement1)(); + ibt_status_t (*ibc_enhancement2)(); + ibt_status_t (*ibc_enhancement3)(); + ibt_status_t (*ibc_enhancement4)(); } ibc_operations_t; @@ -421,12 +417,10 @@ */ typedef struct ibc_hca_info_s { ibc_version_t hca_ci_vers; /* CI Version */ - dev_info_t *hca_dip; /* HCA dev_info */ ibc_hca_hdl_t hca_handle; /* used for call through */ /* "hca_ops" */ ibc_operations_t *hca_ops; ibt_hca_attr_t *hca_attr; - ibc_cq_handler_attr_t hca_def_cq_handler_attr; } ibc_hca_info_t;
--- a/usr/src/uts/common/sys/ib/ibtl/ibti.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibti.h Thu Jul 29 22:10:26 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_IBTL_IBTI_H @@ -47,7 +46,12 @@ IBT_ACHAN_USER_MAP = (1 << 1), IBT_ACHAN_DEFER_ALLOC = (1 << 2), IBT_ACHAN_USES_SRQ = (1 << 3), - IBT_ACHAN_USES_RSS = (1 << 4) + IBT_ACHAN_USES_RSS = (1 << 4), + + /* UD variants for FC support */ + IBT_ACHAN_USES_RFCI = (1 << 5), /* from RFCI pool */ + IBT_ACHAN_USES_FCMD = (1 << 6), + IBT_ACHAN_USES_FEXCH = (1 << 7) /* from FEXCH pool */ } ibt_chan_alloc_flags_t; @@ -151,6 +155,7 @@ ibt_channel_hdl_t ud_clone_chan; /* Optional clone handle */ ibt_srq_hdl_t ud_srq; /* Optional Shared Rcv Queue */ ibt_rss_attr_t ud_rss; + ibt_fc_attr_t ud_fc; } ibt_ud_chan_alloc_args_t; /* @@ -170,6 +175,8 @@ ibt_attr_flags_t ud_flags; /* Signaling Type etc */ ibt_srq_hdl_t ud_srq; /* Optional Shared Rcv Queue */ ibt_rss_attr_t ud_rss; + ibt_fc_attr_t ud_fc; + ibt_fexch_query_attr_t ud_query_fc; /* query only set */ } ibt_ud_chan_query_attr_t; /* @@ -187,6 +194,7 @@ uint_t ud_rq_sz; /* Set RQ Max outstanding WRs */ ib_qkey_t ud_qkey; /* Set Q_Key */ ibt_rss_attr_t ud_rss; /* Set RSS stuff */ + ibt_fc_attr_t ud_fc; } ibt_ud_chan_modify_attr_t;
--- a/usr/src/uts/common/sys/ib/ibtl/ibti_common.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibti_common.h Thu Jul 29 22:10:26 2010 -0700 @@ -57,12 +57,12 @@ /* Transport Interface version */ -typedef enum ibt_version_e { - IBTI_V1 = 1, - IBTI_V2 = 2, /* FMR Support */ - IBTI_V3 = 3, - IBTI_V_CURR = IBTI_V3 -} ibt_version_t; +typedef int ibt_version_t; +#define IBTI_V1 1 +#define IBTI_V2 2 +#define IBTI_V3 3 +#define IBTI_V4 4 +#define IBTI_V_CURR IBTI_V4 /* * Driver class type. Identifies a type of client driver so that @@ -128,6 +128,7 @@ ibt_srq_hdl_t ev_srq_hdl; /* SRQ handle */ ibt_port_change_t ev_port_flags; /* Port Change flags */ uint8_t ev_port; /* HCA port */ + ibt_fc_syndrome_t ev_fc; /* FEXCH syndrome */ } ibt_async_event_t; /* @@ -205,48 +206,6 @@ IBT_RECV_Q = 1 << 1 /* Op applies to the Recv Q */ } ibt_qflags_t; -/* - * CQ priorities - * The IBTF will attempt to implement a coarse 3 level priority scheme - * (IBT_CQ_LOW, IBT_CQ_MEDIUM, IBT_CQ_HIGH) based on the class of client - * driver. The requested priority is not guaranteed. If a CI implementation - * has the ability to implement priority CQs, then the IBTF will take advantage - * of that when calling the CI to create a CQ by passing a priority indicator - * to the CI. - */ -typedef enum ibt_cq_priority_e { - IBT_CQ_DEFAULT = 0x0, - IBT_CQ_LOW = 0x1, - IBT_CQ_MEDIUM = 0x2, - IBT_CQ_HIGH = 0x3, - IBT_CQ_OPAQUE_1 = 0x4, - IBT_CQ_OPAQUE_2 = 0x5, - IBT_CQ_OPAQUE_3 = 0x6, - IBT_CQ_OPAQUE_4 = 0x7, - IBT_CQ_OPAQUE_5 = 0x8, - IBT_CQ_OPAQUE_6 = 0x9, - IBT_CQ_OPAQUE_7 = 0xA, - IBT_CQ_OPAQUE_8 = 0xB, - IBT_CQ_OPAQUE_9 = 0xC, - IBT_CQ_OPAQUE_10 = 0xD, - IBT_CQ_OPAQUE_11 = 0xE, - IBT_CQ_OPAQUE_12 = 0xF, - IBT_CQ_OPAQUE_13 = 0x10, - IBT_CQ_OPAQUE_14 = 0x11, - IBT_CQ_OPAQUE_15 = 0x12, - IBT_CQ_OPAQUE_16 = 0x13 -} ibt_cq_priority_t; - -/* - * Attributes when creating a Completion Queue Scheduling Handle. - */ -typedef struct ibt_cq_sched_attr_s { - ibt_cq_sched_flags_t cqs_flags; - ibt_cq_priority_t cqs_priority; - uint_t cqs_load; - ibt_sched_hdl_t cqs_affinity_hdl; -} ibt_cq_sched_attr_t; - /* * ibt_cq_handler_t @@ -1113,7 +1072,7 @@ ibt_cq_sched_attr_t *attr, ibt_sched_hdl_t *sched_hdl_p); ibt_status_t ibt_free_cq_sched(ibt_hca_hdl_t hca_hdl, - ibt_sched_hdl_t sched_hdl, uint_t load); + ibt_sched_hdl_t sched_hdl); /* * ibt_alloc_cq() @@ -1172,6 +1131,13 @@ uint_t *count_p, uint_t *usec_p, ibt_cq_handler_id_t *hid_p); /* + * ibt_query_cq_handler_id() + * Return interrupt characteristics of the CQ handler + */ +ibt_status_t ibt_query_cq_handler_id(ibt_hca_hdl_t hca_hdl, + ibt_cq_handler_id_t hid, ibt_cq_handler_attr_t *attrs); + +/* * ibt_resize_cq() * Change the size of a CQ. */ @@ -1303,6 +1269,13 @@ /* + * Register DMA Memory Region + */ +ibt_status_t ibt_register_dma_mr(ibt_hca_hdl_t hca_hdl, ibt_pd_hdl_t pd, + ibt_dmr_attr_t *mem_attr, ibt_mr_hdl_t *mr_hdl_p, ibt_mr_desc_t *mem_desc); + + +/* * Address Translation. */ @@ -1741,6 +1714,7 @@ uint_t ipa_flow:20; /* Optional */ uint8_t ipa_hop; /* Optional */ uint8_t ipa_tclass; /* Optional */ + zoneid_t ipa_zoneid; /* Default 0 = Global Zone */ } ibt_ip_path_attr_t; /* @@ -1756,8 +1730,51 @@ ibt_ip_path_attr_t *attr, ibt_path_info_t *paths_p, uint8_t *num_paths_p, ibt_path_ip_src_t *src_ip_p); -ibt_status_t ibt_get_src_ip(ib_gid_t gid, ib_pkey_t pkey, - ibt_ip_addr_t *src_ip); +/* + * ibt_get_src_ip() + * Get List of IP-Address that matches the parameters specified in + * srcip_attr. As a given MAC address can have both IPv4 and IPv6 + * addressed configured, caller can optional request to return only + * the desired family by specifying the "sip_family" field. If + * "sip_family" is AF_UNSPEC, then all assigned IP address (IPv4 + * and/or IPv6) will be returned. In case of IPv6 address, scope_id + * for that specific address will also be returned. + * "sip_zoneid" will specify the zones the user is interested in. + * + * Information on each ip-address is returned to the caller in the + * form of an array of ibt_srcip_info_t. ibt_get_src_ip() allocates the + * memory for this array and returns a pointer to the array (src_info_p) + * and the number of entries in the array (entries_p). This memory + * should be freed by the client using ibt_free_srcip_info(). + * + * ibt_free_srcip_info() + * Free the memory allocated by successful ibt_get_src_ip() + */ +typedef struct ibt_srcip_attr_s { + ib_gid_t sip_gid; /* REQUIRED: Local Port GID */ + zoneid_t sip_zoneid; /* Zero means Global Zone */ + ib_pkey_t sip_pkey; /* Optional */ + sa_family_t sip_family; /* Optional : IPv4 or IPv6 */ +} ibt_srcip_attr_t; + +/* + * ip_flag : Flag to indicate whether the returned list of ip-address + * has any duplicate records. + */ +#define IBT_IPADDR_NO_FLAGS 0 +#define IBT_IPADDR_DUPLICATE 1 + +typedef struct ibt_srcip_info_s { + ibt_ip_addr_t ip_addr; + zoneid_t ip_zoneid; /* ZoneId of this ip-addr */ + uint_t ip_flag; /* Flag to indicate any gotchas */ +} ibt_srcip_info_t; + +ibt_status_t ibt_get_src_ip(ibt_srcip_attr_t *srcip_attr, + ibt_srcip_info_t **src_info_p, uint_t *entries_p); + +void ibt_free_srcip_info(ibt_srcip_info_t *src_info, uint_t entries); + /* * Callback function that can be used in ibt_aget_ip_paths(), a Non-Blocking @@ -1832,6 +1849,7 @@ uint8_t apa_sl:4; uint8_t apa_hop; uint8_t apa_tclass; + zoneid_t apa_zoneid; /* Default 0 = Global Zone */ } ibt_alt_ip_path_attr_t; ibt_status_t ibt_get_ip_alt_path(ibt_channel_hdl_t rc_chan, @@ -1934,6 +1952,14 @@ ibt_status_t ibt_get_all_part_attr(ibt_part_attr_t **, int *); ibt_status_t ibt_free_part_attr(ibt_part_attr_t *, int); + +/* + * ibt_lid_to_node_info() + * Retrieve node record information for the specified LID. + */ +ibt_status_t ibt_lid_to_node_info(ib_lid_t lid, ibt_node_info_t *node_info_p); + + #ifdef __cplusplus } #endif
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_ci_types.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_ci_types.h Thu Jul 29 22:10:26 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_IBTL_IBTL_CI_TYPES_H @@ -228,7 +227,12 @@ IBT_QP_USER_MAP = (1 << 0), IBT_QP_DEFER_ALLOC = (1 << 1), IBT_QP_USES_SRQ = (1 << 2), - IBT_QP_USES_RSS = (1 << 3) + IBT_QP_USES_RSS = (1 << 3), + + /* FC variants of UD */ + IBT_QP_USES_RFCI = (1 << 4), + IBT_QP_USES_FCMD = (1 << 5), + IBT_QP_USES_FEXCH = (1 << 6) } ibt_qp_alloc_flags_t; /* @@ -248,6 +252,7 @@ ibt_opaque1_t qp_opaque2; ibt_srq_hdl_t qp_srq_hdl; /* SRQ ibt hdl */ ibt_opaque2_t qp_opaque3; + ibt_fc_attr_t qp_fc; } ibt_qp_alloc_attr_t; @@ -305,6 +310,7 @@ uint16_t ud_pkey_ix; /* P_Key Index */ uint8_t ud_port; /* port */ ibt_rss_attr_t ud_rss; /* RSS stuff */ + ibt_fc_attr_t ud_fc; } ibt_qp_ud_attr_t; /* @@ -326,7 +332,6 @@ } qp_transport; } ibt_qp_info_t; - /* * QP Query Attributes definition. */ @@ -340,6 +345,7 @@ ibt_qp_info_t qp_info; /* Modifiable attributes */ ibt_srq_hdl_t qp_srq; /* SRQ hdl or NULL */ ibt_attr_flags_t qp_flags; + ibt_fexch_query_attr_t qp_query_fexch; /* FEXCH query only set */ } ibt_qp_query_attr_t;
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_status.h Thu Jul 29 22:10:26 2010 -0700 @@ -224,6 +224,9 @@ IBT_CQ_EMPTY = 503, /* Completion Queue Empty */ IBT_CQ_NOTIFY_TYPE_INVALID = 504, /* Invalid notification type */ IBT_CQ_INVALID_PRIORITY = 505, /* Invalid CQ Priority */ + IBT_CQ_SCHED_INVALID = 550, /* Invalid CQ Sched Handle */ + IBT_CQ_NO_SCHED_GROUP = 551, /* Schedule group not found */ + IBT_CQ_HID_INVALID = 552, /* CQ Handler ID invalid */ /* * Reserved for future use. @@ -315,8 +318,8 @@ /* for posted WR */ #define IBT_WC_WR_FLUSHED_ERR 14 /* WR was in process when the */ /* chan went to error state */ -#define IBT_WC_MEM_WIN_BIND_ERR 15 /* Consumer had insufficient */ - /* access rights */ +#define IBT_WC_MEM_MGT_OP_ERR 15 /* bind plus 1.2 mem ext */ +#define IBT_WC_MEM_WIN_BIND_ERR IBT_WC_MEM_MGT_OP_ERR /* * Errors that are only reported for Reliable Queue Pairs.
--- a/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibtl_types.h Thu Jul 29 22:10:26 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_IBTL_IBTL_TYPES_H @@ -386,7 +385,9 @@ IBT_HCA2_RSS_XOR_ALG = 1 << 7, /* RSS: XOR algorithm */ IBT_HCA2_XRC = 1 << 8, /* Extended RC (XRC) */ IBT_HCA2_XRC_SRQ_RESIZE = 1 << 9, /* resize XRC SRQ */ - IBT_HCA2_MEM_MGT_EXT = 1 << 10 /* FMR-WR, send-inv, local-inv */ + IBT_HCA2_MEM_MGT_EXT = 1 << 10, /* FMR-WR, send-inv, local-inv */ + IBT_HCA2_DMA_MR = 1 << 11, /* DMA MR */ + IBT_HCA2_FC = 1 << 12 /* FCoIB or FCoE offload */ } ibt_hca_flags2_t; /* @@ -535,7 +536,16 @@ uint_t hca_recv_sgl_sz; /* detailed SGL sizes */ uint_t hca_ud_send_sgl_sz; uint_t hca_conn_send_sgl_sz; + uint_t hca_conn_rdma_read_sgl_sz; + uint_t hca_conn_rdma_write_sgl_sz; uint_t hca_conn_rdma_sgl_overhead; + + /* FC Support */ + uint8_t hca_rfci_max_log2_qp; /* max log2 RFCI QPs */ + uint8_t hca_fexch_max_log2_qp; /* max log2 FEXCH QPs */ + uint8_t hca_fexch_max_log2_mem; /* max log2 mem per FEXCH */ + + dev_info_t *hca_dip; /* HCA dev_info */ } ibt_hca_attr_t; /* @@ -745,6 +755,9 @@ #define IBT_UD_SRV 3 #define IBT_RAWIP_SRV 4 #define IBT_RAWETHER_SRV 5 +#define IBT_RFCI_SRV 6 +#define IBT_FCMD_SRV 7 +#define IBT_FEXCH_SRV 8 /* * Channel (QP/EEC) state definitions. @@ -818,7 +831,8 @@ IBT_CEP_SET_OPAQUE6 = (1 << 21), IBT_CEP_SET_OPAQUE7 = (1 << 22), IBT_CEP_SET_OPAQUE8 = (1 << 23), - IBT_CEP_SET_RSS = (1 << 24) + IBT_CEP_SET_RSS = (1 << 24), + IBT_CEP_SET_FEXCH_RANGE = (1 << 25) } ibt_cep_modify_flags_t; /* @@ -837,22 +851,35 @@ IBT_CQ_HANDLER_IN_THREAD = 1 << 0, /* A thread calls the */ /* CQ handler */ IBT_CQ_USER_MAP = 1 << 1, - IBT_CQ_DEFER_ALLOC = 1 << 2 + IBT_CQ_DEFER_ALLOC = 1 << 2, + IBT_CQ_HID = 1 << 3 } ibt_cq_flags_t; -/* - * CQ types shared across TI and CI. - */ typedef enum ibt_cq_sched_flags_e { IBT_CQS_NO_FLAGS = 0, IBT_CQS_WARM_CACHE = 1 << 0, /* run on same CPU */ - IBT_CQS_AFFINITY = 1 << 1, + IBT_CQS_EXACT_SCHED_GROUP = 1 << 1, IBT_CQS_SCHED_GROUP = 1 << 2, IBT_CQS_USER_MAP = 1 << 3, IBT_CQS_DEFER_ALLOC = 1 << 4 } ibt_cq_sched_flags_t; /* + * Attributes when creating a Completion Queue Scheduling Handle. + */ +typedef struct ibt_cq_sched_attr_s { + ibt_cq_sched_flags_t cqs_flags; + char *cqs_pool_name; +} ibt_cq_sched_attr_t; + +typedef void *ibt_intr_handle_t; + +typedef struct ibt_cq_handler_attr_s { + dev_info_t *cha_dip; + ibt_intr_handle_t cha_ih; +} ibt_cq_handler_attr_t; + +/* * Attributes when creating a Completion Queue. * * Note: @@ -863,6 +890,7 @@ ibt_sched_hdl_t cq_sched; /* 0 = no hint, */ /* other = cq_sched value */ ibt_cq_flags_t cq_flags; + ibt_cq_handler_id_t cq_hid; } ibt_cq_attr_t; /* @@ -894,7 +922,8 @@ /* Additional physical registration flags */ IBT_MR_CONSUMER_KEY = (1 << 13), /* Consumer owns key */ /* portion of keys */ - IBT_MR_DISABLE_RO = (1 << 14) + IBT_MR_DISABLE_RO = (1 << 14), + IBT_MR_USER_BUF = (1 << 15) /* ibt_(re)register_buf */ } ibt_mr_flags_t; @@ -995,6 +1024,13 @@ uint8_t pmr_key; /* Key to use on new Lkey & Rkey */ } ibt_pmr_attr_t; +/* DMA Memory Region */ +typedef struct ibt_dmr_attr_s { + uint64_t dmr_paddr; /* starting physical addr */ + ib_memlen_t dmr_len; /* length in bytes */ + ibt_mr_flags_t dmr_flags; /* no sleep, memory permissions */ +} ibt_dmr_attr_t; + /* addr/length pair */ typedef struct ibt_iov_s { caddr_t iov_addr; /* Beginning address */ @@ -1006,7 +1042,9 @@ IBT_IOV_SLEEP = 0, IBT_IOV_NOSLEEP = (1 << 0), IBT_IOV_BUF = (1 << 1), - IBT_IOV_RECV = (1 << 2) + IBT_IOV_RECV = (1 << 2), + IBT_IOV_USER_BUF = (1 << 3), + IBT_IOV_ALT_LKEY = (1 << 4) } ibt_iov_flags_t; typedef struct ibt_iov_attr_s { @@ -1016,6 +1054,7 @@ uint32_t iov_list_len; uint32_t iov_wr_nds; ib_msglen_t iov_lso_hdr_sz; + ibt_lkey_t iov_alt_lkey; ibt_iov_flags_t iov_flags; } ibt_iov_attr_t; @@ -1090,7 +1129,8 @@ IBT_VA_FMR = (1 << 2), IBT_VA_BLOCK_MODE = (1 << 3), IBT_VA_BUF = (1 << 4), - IBT_VA_REG_FN = (1 << 5) + IBT_VA_REG_FN = (1 << 5), + IBT_VA_USER_BUF = (1 << 6) } ibt_va_flags_t; @@ -1127,6 +1167,29 @@ void *fmr_func_arg; } ibt_fmr_pool_attr_t; +/* + * Define types for Fibre Channel over IB (fcoib) + */ +typedef enum ibt_fexch_query_flags_e { + IBT_FEXCH_NO_FLAGS = 0, + IBT_FEXCH_HEART_BEAT_OK = (1 << 0) /* FEXCH only */ +} ibt_fexch_query_flags_t; + +typedef struct ibt_fexch_query_attr_s { + ibt_pmr_desc_t fq_uni_mem_desc; /* FEXCH: uni-directional MR attrs */ + ibt_pmr_desc_t fq_bi_mem_desc; /* FEXCH: bi-directional MR attrs */ + ibt_fexch_query_flags_t fq_flags; +} ibt_fexch_query_attr_t; + +typedef struct ibt_fc_attr_s { + uint32_t fc_src_id; /* S_ID assigned to the RFCI QP */ + /* FCMD, FEXCH: matching RFCI QP = RFCI base + idx */ + ib_qpn_t fc_rfci_qpn; + uint16_t fc_exch_base_off; /* FCMD: FEXCH usable base */ + uint8_t fc_exch_log2_sz; /* FCMD: FEXCH log2 size */ + uint8_t fc_hca_port; /* RFCI, FEXCH: HCA port number */ +} ibt_fc_attr_t; + /* * WORK REQUEST AND WORK REQUEST COMPLETION DEFINITIONS. @@ -1156,6 +1219,8 @@ #define IBT_WRC_FAST_REG_PMR 9 /* Fast Register Physical mem region */ #define IBT_WRC_LOCAL_INVALIDATE 10 #define IBT_WRC_SEND_LSO 11 +#define IBT_WRC_INIT_SEND_FCMD 12 /* Init & Send for FCMD initiator */ +#define IBT_WRC_INIT_FEXCH 13 /* Init for FEXCH target */ /* @@ -1169,6 +1234,8 @@ #define IBT_WC_IMMED_DATA_PRESENT (1 << 1) #define IBT_WC_RKEY_INVALIDATED (1 << 2) #define IBT_WC_CKSUM_OK (1 << 3) +#define IBT_WC_FEXCH_FMT (1 << 4) +#define IBT_WC_DIF_ERROR (1 << 5) /* IPoIB flags for wc_detail field */ #define IBT_WC_DETAIL_ALL_FLAGS_MASK (0x0FC00000) @@ -1185,6 +1252,12 @@ #define IBT_WC_DETAIL_RSS_TCP_IPV4 (1 << 20) #define IBT_WC_DETAIL_RSS_IPV4 (1 << 21) +/* FEXCH flags for wc_detail field */ +#define IBT_WC_DETAIL_FC_MATCH_MASK (0xE000000) +#define IBT_WC_DETAIL_FEXCH_INIT_XFER (1 << 25) +#define IBT_WC_DETAIL_FEXCH_LAST (1 << 26) +#define IBT_WC_DETAIL_RFCI_CRC_OK (1 << 27) + /* * Work Request Completion - This structure encapsulates the information * necessary to define a work request completion. @@ -1211,6 +1284,13 @@ ib_path_bits_t wc_opaque4; } ibt_wc_t; +/* FC format alternative field names */ +#define wc_fexch_seq_cnt wc_cksum +#define wc_fexch_tx_bytes_xfer wc_immed_data +#define wc_fexch_rx_bytes_xfer wc_res_hash +#define wc_fexch_seq_id wc_opaque2 + + /* * WR Flags. Common for both RC and UD * @@ -1225,6 +1305,7 @@ #define IBT_WR_SEND_SOLICIT (1 << 3) /* Solicited Event Indicator */ #define IBT_WR_SEND_REMOTE_INVAL (1 << 4) /* Remote Invalidate */ #define IBT_WR_SEND_CKSUM (1 << 5) /* Checksum offload Indicator */ +#define IBT_WR_SEND_FC_CRC IBT_WR_SEND_CKSUM /* RFCI: FC CRC */ #define IBT_WR_SEND_INLINE (1 << 6) /* INLINE required (no lkey) */ /* @@ -1433,11 +1514,75 @@ ib_msglen_t lso_mss; } ibt_wr_lso_t; +/* FC WR definitions */ +typedef enum ibt_fctl_flags_e { /* F_CTL flags */ + IBT_FCTL_NO_FLAGS = 0, + IBT_FCTL_SIT = (1 << 16), /* seq initiative transfer */ + IBT_FCTL_PRIO = (1 << 17), /* InitAndSend WR: priority */ + IBT_FCTL_LAST_SEQ = (1 << 20), + /* InitAndSend WR: Exchange Originator, set = initiator, off = tgt */ + IBT_FCTL_ORIG_INIT = (1 << 23) +} ibt_fctl_flags_t; +#define IBT_FCTL_SET_ABORT_FIELD(VAL) (((VAL) & 0x3) << 4) /* InitAndSend WR */ +#define IBT_FCTL_GET_ABORT_FIELD(FCTL) (((FCTL) & 0x30) >> 4) + +/* FC information category value, low 4 bits of routing control */ +#define IBT_FC_INFO_SOL_DATA 1 /* solicited data */ +#define IBT_FC_INFO_DATA_DESC 5 /* data descriptor */ +#define IBT_FC_INFO_UNSOL_CMD 6 /* unsolicited command */ +#define IBT_FC_INFO_CMD_STAT 7 /* command status */ + +typedef struct ibt_fc_ctl_s { + ibt_ud_dest_hdl_t fc_dest; + ibt_fctl_flags_t fc_frame_ctrl; + uint32_t fc_parameter; + uint8_t fc_seq_id; + /* FC R_CTL containing information category */ + uint8_t fc_routing_ctrl; +} ibt_fc_ctl_t; + +/* RFCI version of send */ +typedef struct ibt_wr_rfci_send_s { + ibt_ud_dest_hdl_t rfci_dest; + uint8_t rfci_eof; /* RFCI: when FC CRC set */ +} ibt_wr_rfci_send_t; + +typedef uint8_t ibt_init_send_op_t; +#define IBT_IS_OP_TARGET 0x0 /* target mode or no IO initiator op */ +#define IBT_IS_OP_NO_IO IBT_IS_OP_TARGET +#define IBT_IS_OP_IO_READ 0x1 /* IO read */ +#define IBT_IS_OP_IO_WRITE 0x2 /* IO write */ +#define IBT_IS_OP_BIDIR 0x3 /* bidirectional command */ + +/* Init and Send for FCMD initiator and also Init for FEXCH target */ +typedef struct ibt_wr_init_send_s { + ibt_fc_ctl_t is_ctl; + uint32_t is_dest_id; /* FC hdr: D_ID, low 24 bits */ + uint16_t is_fc_mtu; /* packet MTU (4B), low 10 bits */ + uint16_t is_rem_exch; /* target: remote exchange */ + uint16_t is_exch_qp_idx; /* FEXCH index for ULP */ + uint8_t is_cs_priority; /* FC hdr: CS_CTL/Priority */ + uint8_t is_tx_seq_id; /* initiator: FCP_DATA seq_id */ + ibt_init_send_op_t is_op; +} ibt_wr_init_send_t; + +typedef union ibt_wr_fc_u { + ibt_wr_rfci_send_t rfci_send; /* RFCI send */ + ibt_wr_init_send_t *fc_is; /* FCMD, FEXCH */ + ibt_wr_reg_pmr_t *reg_pmr; /* FCMD */ +} ibt_wr_fc_t; + + /* * Send Work Request (WR) attributes structure. * * Operation type in ibt_wrc_opcode_t. * Immediate Data indicator in ibt_wr_flags_t. + * + * RFCI initiator QP: send (FCP_CONF) + * FCMD initiator QP: init & send (FCP_CMND), FRWR + * FEXCH target QP: init, FRWR, RDMA-R (FCP_XFER_RDY), RDMA-W (FCP_DATA), + * Send (FCP_RSP) */ typedef struct ibt_send_wr_s { ibt_wrid_t wr_id; /* WR ID */ @@ -1456,11 +1601,17 @@ ibt_wr_reth_t reth; /* Reserved For Future Use */ ibt_wr_ripv6_t ripv6; /* Reserved For Future Use */ ibt_wr_lso_t ud_lso; + ibt_wr_fc_t fc; /* RFCI, FCMD, FEXCH */ } wr; /* operation specific */ } ibt_send_wr_t; /* * Receive Work Request (WR) attributes structure. + * + * also used by these FC QP types: + * RFCI initiator QP + * FEXCH initiator QP (FCP_RSP) + * RFCI target QP (FCP_CMND) */ typedef struct ibt_recv_wr_s { ibt_wrid_t wr_id; /* WR ID */ @@ -1514,7 +1665,8 @@ IBT_ERROR_CATASTROPHIC_SRQ = 0x080000, IBT_PORT_CHANGE_EVENT = 0x100000, - IBT_CLNT_REREG_EVENT = 0x200000 + IBT_CLNT_REREG_EVENT = 0x200000, + IBT_FEXCH_ERROR = 0x400000 } ibt_async_code_t; #define IBT_PORT_EVENTS (IBT_EVENT_PORT_UP|IBT_PORT_CHANGE_EVENT|\ @@ -1530,6 +1682,10 @@ IBT_PORT_CHANGE_REREG = 0x000040 /* IsClientReregSupport */ } ibt_port_change_t; +typedef uint8_t ibt_fc_syndrome_t; +#define IBT_FC_BAD_IU 0x0 +#define IBT_FC_BROKEN_SEQ 0x1 + /* * ibt_ci_data_in() and ibt_ci_data_out() flags. */
--- a/usr/src/uts/common/sys/ib/ibtl/ibvti.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/ibtl/ibvti.h Thu Jul 29 22:10:26 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_IBTL_IBVTI_H @@ -89,26 +88,6 @@ */ #define ibt_qp_hdl_t ibt_channel_hdl_t -/* - * ibt_cq_priority_t - * VTI clients have full control over CQ priorities. - */ -#define IBT_CQ_PRI_1 IBT_CQ_OPAQUE_1 /* Lowest priority */ -#define IBT_CQ_PRI_2 IBT_CQ_OPAQUE_2 -#define IBT_CQ_PRI_3 IBT_CQ_OPAQUE_3 -#define IBT_CQ_PRI_4 IBT_CQ_OPAQUE_4 -#define IBT_CQ_PRI_5 IBT_CQ_OPAQUE_5 -#define IBT_CQ_PRI_6 IBT_CQ_OPAQUE_6 -#define IBT_CQ_PRI_7 IBT_CQ_OPAQUE_7 -#define IBT_CQ_PRI_8 IBT_CQ_OPAQUE_8 -#define IBT_CQ_PRI_9 IBT_CQ_OPAQUE_9 -#define IBT_CQ_PRI_10 IBT_CQ_OPAQUE_10 -#define IBT_CQ_PRI_11 IBT_CQ_OPAQUE_11 -#define IBT_CQ_PRI_12 IBT_CQ_OPAQUE_12 -#define IBT_CQ_PRI_13 IBT_CQ_OPAQUE_13 -#define IBT_CQ_PRI_14 IBT_CQ_OPAQUE_14 -#define IBT_CQ_PRI_15 IBT_CQ_OPAQUE_15 -#define IBT_CQ_PRI_16 IBT_CQ_OPAQUE_16 /* Highest priority */ /* * FUNCTION PROTOTYPES.
--- a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_arp.h Thu Jul 29 22:10:26 2010 -0700 @@ -90,6 +90,7 @@ } ip_sin; #define ip_cm_sin ip_sin.ip_sockaddr #define ip_cm_sin6 ip_sin.ip_sockaddr6 + zoneid_t ip_zoneid; } ibcm_arp_ip_t; typedef struct ibcm_arp_ibd_insts_s {
--- a/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h Fri Jul 30 10:59:02 2010 +0800 +++ b/usr/src/uts/common/sys/ib/mgt/ibcm/ibcm_impl.h Thu Jul 29 22:10:26 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_IB_MGT_IBCM_IBCM_IMPL_H @@ -493,7 +492,7 @@ close_ret_priv_data_len close_ret_status})) _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_state_data_s::{timedout_state - cm_handler mra_msg abort_flag})) + cm_handler mra_msg abort_flag local_qp_rnr_cnt})) /* * Definitions for send mad flags. Respective bits in send_mad_flags or