Mercurial > illumos > illumos-gate
changeset 9879:2547a76dc28b
6832231 new info ioctls for ibnex
6832234 new performance kstats
6839158 assertion failed: status != IBT_HCA_IN_USE, file: ../../common/io/ib/mgt/ibmf/ibmf_impl.c
line wrap: on
line diff
--- a/usr/src/pkgdefs/SUNWhea/prototype_com Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/pkgdefs/SUNWhea/prototype_com Wed Jun 17 02:10:30 2009 -0700 @@ -922,6 +922,8 @@ f none usr/include/sys/ib/adapters/hermon/hermon_ioctl.h 644 root bin f none usr/include/sys/ib/ib_pkt_hdrs.h 644 root bin f none usr/include/sys/ib/ib_types.h 644 root bin +d none usr/include/sys/ib/ibnex 755 root bin +f none usr/include/sys/ib/ibnex/ibnex_devctl.h 644 root bin d none usr/include/sys/ib/ibtl 755 root bin f none usr/include/sys/ib/ibtl/ibci.h 644 root bin f none usr/include/sys/ib/ibtl/ibti.h 644 root bin
--- a/usr/src/pkgdefs/etc/exception_list_i386 Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/pkgdefs/etc/exception_list_i386 Wed Jun 17 02:10:30 2009 -0700 @@ -789,11 +789,6 @@ # usr/include/aclutils.h i386 # -# User<->kernel interface used by cfgadm/IB only -# -usr/include/sys/ib/ibnex i386 -usr/include/sys/ib/ibnex/ibnex_devctl.h i386 -# # USB skeleton driver stays in sync with the rest of USB but doesn't ship. # kernel/drv/usbskel i386
--- a/usr/src/pkgdefs/etc/exception_list_sparc Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/pkgdefs/etc/exception_list_sparc Wed Jun 17 02:10:30 2009 -0700 @@ -809,11 +809,6 @@ # usr/include/aclutils.h sparc # -# User<->kernel interface used by cfgadm/IB only -# -usr/include/sys/ib/ibnex sparc -usr/include/sys/ib/ibnex/ibnex_devctl.h sparc -# # Entries for the SCM IFD Handler which are used only to build # the SCM IFD Handler and are not delivered to customers. #
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_cmd.c Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_cmd.c Wed Jun 17 02:10:30 2009 -0700 @@ -2046,14 +2046,17 @@ return (status); } - /* * hermon_getpefcntr_cmd_post() * Context: Can be called from interrupt or base context. + * + * If reset is zero, read the performance counters of the specified port and + * copy them into perfinfo. + * If reset is non-zero reset the performance counters of the specified port. */ int hermon_getperfcntr_cmd_post(hermon_state_t *state, uint_t port, - uint_t sleepflag, hermon_hw_sm_perfcntr_t *perfinfo) + uint_t sleepflag, hermon_hw_sm_perfcntr_t *perfinfo, int reset) { hermon_mbox_info_t mbox_info; hermon_cmd_post_t cmd; @@ -2074,13 +2077,31 @@ /* Build the GetPortInfo request MAD in the "In" mailbox */ size = HERMON_CMD_MAD_IFC_SIZE; mbox = (uint32_t *)mbox_info.mbi_in->mb_addr; - ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[0], HERMON_CMD_PERFHDR0); + + if (reset) { + ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[0], + HERMON_CMD_PERF_SET); + } else { + ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[0], + HERMON_CMD_PERF_GET); + } ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[1], HERMON_CMD_MADHDR1); ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[2], HERMON_CMD_MADHDR2); ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[3], HERMON_CMD_MADHDR3); ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[4], HERMON_CMD_PERFCNTRS); ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[5], HERMON_CMD_PERFATTR); - ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[16], (port << 16)); + + if (reset) { + /* reset counters for XmitData, RcvData, XmitPkts, RcvPkts */ + ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[16], + ((port << 16) | 0xf000)); + + ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[22], 0); + ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[23], 0); + ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[24], 0); + ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[25], 0); + } else + ddi_put32(mbox_info.mbi_in->mb_acchdl, &mbox[16], (port << 16)); /* Sync the mailbox for the device to read */ hermon_mbox_sync(mbox_info.mbi_in, 0, size, DDI_DMA_SYNC_FORDEV); @@ -2102,16 +2123,19 @@ size = HERMON_CMD_MAD_IFC_SIZE; hermon_mbox_sync(mbox_info.mbi_out, 0, size, DDI_DMA_SYNC_FORCPU); - size = sizeof (hermon_hw_sm_perfcntr_t); /* for the copy */ - /* - * Copy Perfcounters into "perfinfo". We can discard the MAD header and - * the 8 Quadword reserved area of the PERM mgmt class MAD - */ - - for (i = 0; i < size >> 3; i++) { - data = ddi_get64(mbox_info.mbi_out->mb_acchdl, - ((uint64_t *)mbox_info.mbi_out->mb_addr + i + 8)); - ((uint64_t *)(void *)perfinfo)[i] = data; + if (reset == 0) { + size = sizeof (hermon_hw_sm_perfcntr_t); /* for the copy */ + /* + * Copy Perfcounters into "perfinfo". We can discard the MAD + * header and the 8 Quadword reserved area of the PERM mgmt + * class MAD + */ + + for (i = 0; i < size >> 3; i++) { + data = ddi_get64(mbox_info.mbi_out->mb_acchdl, + ((uint64_t *)mbox_info.mbi_out->mb_addr + i + 8)); + ((uint64_t *)(void *)perfinfo)[i] = data; + } } getperfinfo_fail:
--- a/usr/src/uts/common/io/ib/adapters/hermon/hermon_stats.c Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/ib/adapters/hermon/hermon_stats.c Wed Jun 17 02:10:30 2009 -0700 @@ -47,6 +47,12 @@ int (*update)(kstat_t *, int)); static int hermon_kstat_cntr_update(kstat_t *ksp, int rw); +void hermon_kstat_perfcntr64_create(hermon_state_t *state, uint_t port_num); +static int hermon_kstat_perfcntr64_read(hermon_state_t *state, uint_t port, + int reset); +static void hermon_kstat_perfcntr64_thread_exit(hermon_ks_info_t *ksi); +static int hermon_kstat_perfcntr64_update(kstat_t *ksp, int rw); + /* * Hermon IB Performance Events structure * This structure is read-only and is used to setup the individual kstats @@ -72,6 +78,19 @@ {"clear_pic", 0, 0} }; +/* + * Return the maximum of (x) and (y) + */ +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) + +/* + * Set (x) to the maximum of (x) and (y) + */ +#define SET_TO_MAX(x, y) \ +{ \ + if ((x) < (y)) \ + (x) = (y); \ +} /* * hermon_kstat_init() @@ -93,8 +112,9 @@ state->hs_ks_info = ksi; /* - * Create as many "pic" kstats as we have IB ports. Enable all - * of the events specified in the "hermon_ib_perfcnt_list" structure. + * Create as many "pic" and perfcntr64 kstats as we have IB ports. + * Enable all of the events specified in the "hermon_ib_perfcnt_list" + * structure. */ numports = state->hs_cfg_profile->cp_num_ports; for (i = 0; i < numports; i++) { @@ -103,6 +123,11 @@ if (ksi->hki_picN_ksp[i] == NULL) { goto kstat_init_fail; } + + hermon_kstat_perfcntr64_create(state, i + 1); + if (ksi->hki_perfcntr64[i].hki64_ksp == NULL) { + goto kstat_init_fail; + } } /* Create the "counters" kstat too */ @@ -125,6 +150,9 @@ ksi->hki_ib_perfcnt[i] = hermon_ib_perfcnt_list[i]; } + mutex_init(&ksi->hki_perfcntr64_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&ksi->hki_perfcntr64_cv, NULL, CV_DRIVER, NULL); + return (DDI_SUCCESS); @@ -138,6 +166,9 @@ if (ksi->hki_picN_ksp[i] != NULL) { kstat_delete(ksi->hki_picN_ksp[i]); } + if (ksi->hki_perfcntr64[i].hki64_ksp != NULL) { + kstat_delete(ksi->hki_perfcntr64[i].hki64_ksp); + } } /* Free the kstat info structure */ @@ -154,24 +185,39 @@ void hermon_kstat_fini(hermon_state_t *state) { - hermon_ks_info_t *ksi; + hermon_ks_info_t *ksi; uint_t numports; int i; /* Get pointer to kstat info */ ksi = state->hs_ks_info; - /* Delete all the "pic" kstats (one per port) */ + /* + * Signal the perfcntr64_update_thread to exit and wait until the + * thread exits. + */ + mutex_enter(&ksi->hki_perfcntr64_lock); + hermon_kstat_perfcntr64_thread_exit(ksi); + mutex_exit(&ksi->hki_perfcntr64_lock); + + /* Delete all the "pic" and perfcntr64 kstats (one per port) */ numports = state->hs_cfg_profile->cp_num_ports; for (i = 0; i < numports; i++) { if (ksi->hki_picN_ksp[i] != NULL) { kstat_delete(ksi->hki_picN_ksp[i]); } + + if (ksi->hki_perfcntr64[i].hki64_ksp != NULL) { + kstat_delete(ksi->hki_perfcntr64[i].hki64_ksp); + } } /* Delete the "counter" kstats (one per port) */ kstat_delete(ksi->hki_cntr_ksp); + cv_destroy(&ksi->hki_perfcntr64_cv); + mutex_destroy(&ksi->hki_perfcntr64_lock); + /* Free the kstat info structure */ kmem_free(ksi, sizeof (hermon_ks_info_t)); } @@ -355,7 +401,7 @@ oldval = ib_perf[indx].ks_old_pic0; status = hermon_getperfcntr_cmd_post(state, 1, - HERMON_CMD_NOSLEEP_SPIN, &sm_perfcntr); + HERMON_CMD_NOSLEEP_SPIN, &sm_perfcntr, 0); if (status != HERMON_CMD_SUCCESS) { return (-1); } @@ -431,7 +477,7 @@ oldval = ib_perf[indx].ks_old_pic1; status = hermon_getperfcntr_cmd_post(state, 2, - HERMON_CMD_NOSLEEP_SPIN, &sm_perfcntr); + HERMON_CMD_NOSLEEP_SPIN, &sm_perfcntr, 0); if (status != HERMON_CMD_SUCCESS) { return (-1); } @@ -501,3 +547,350 @@ return (0); } } + +/* + * 64 bit kstats for performance counters: + * + * Since the hardware as of now does not support 64 bit performance counters, + * we maintain 64 bit performance counters in software using the 32 bit + * hardware counters. + * + * We create a thread that, every one second, reads the values of 32 bit + * hardware counters and adds them to the 64 bit software counters. Immediately + * after reading, it resets the 32 bit hardware counters to zero (so that they + * start counting from zero again). At any time the current value of a counter + * is going to be the sum of the 64 bit software counter and the 32 bit + * hardware counter. + * + * Since this work need not be done if there is no consumer, by default + * we do not maintain 64 bit software counters. To enable this the consumer + * needs to write a non-zero value to the "enable" component of the of + * perf_counters kstat. Writing zero to this component will disable this work. + * + * If performance monitor is enabled in subnet manager, the SM could + * periodically reset the hardware counters by sending perf-MADs. So only + * one of either our software 64 bit counters or the SM performance monitor + * could be enabled at the same time. However, if both of them are enabled at + * the same time we still do our best by keeping track of the values of the + * last read 32 bit hardware counters. If the current read of a 32 bit hardware + * counter is less than the last read of the counter, we ignore the current + * value and go with the last read value. + */ + +/* + * hermon_kstat_perfcntr64_create() + * Context: Only called from attach() path context + * + * Create "port#/perf_counters" kstat for the specified port number. + */ +void +hermon_kstat_perfcntr64_create(hermon_state_t *state, uint_t port_num) +{ + hermon_ks_info_t *ksi = state->hs_ks_info; + struct kstat *cntr_ksp; + struct kstat_named *cntr_named_data; + int drv_instance; + char *drv_name; + char kname[32]; + + ASSERT(port_num != 0); + + drv_name = (char *)ddi_driver_name(state->hs_dip); + drv_instance = ddi_get_instance(state->hs_dip); + (void) snprintf(kname, sizeof (kname), "port%u/perf_counters", + port_num); + cntr_ksp = kstat_create(drv_name, drv_instance, kname, "ib", + KSTAT_TYPE_NAMED, HERMON_PERFCNTR64_NUM_COUNTERS, + KSTAT_FLAG_WRITABLE); + if (cntr_ksp == NULL) { + return; + } + cntr_named_data = (struct kstat_named *)(cntr_ksp->ks_data); + + kstat_named_init(&cntr_named_data[HERMON_PERFCNTR64_ENABLE_IDX], + "enable", KSTAT_DATA_UINT32); + kstat_named_init(&cntr_named_data[HERMON_PERFCNTR64_XMIT_DATA_IDX], + "xmit_data", KSTAT_DATA_UINT64); + kstat_named_init(&cntr_named_data[HERMON_PERFCNTR64_RECV_DATA_IDX], + "recv_data", KSTAT_DATA_UINT64); + kstat_named_init(&cntr_named_data[HERMON_PERFCNTR64_XMIT_PKTS_IDX], + "xmit_pkts", KSTAT_DATA_UINT64); + kstat_named_init(&cntr_named_data[HERMON_PERFCNTR64_RECV_PKTS_IDX], + "recv_pkts", KSTAT_DATA_UINT64); + + ksi->hki_perfcntr64[port_num - 1].hki64_ksp = cntr_ksp; + ksi->hki_perfcntr64[port_num - 1].hki64_port_num = port_num; + ksi->hki_perfcntr64[port_num - 1].hki64_state = state; + + cntr_ksp->ks_private = &ksi->hki_perfcntr64[port_num - 1]; + cntr_ksp->ks_update = hermon_kstat_perfcntr64_update; + + /* Install the kstat */ + kstat_install(cntr_ksp); +} + +/* + * hermon_kstat_perfcntr64_read() + * + * Read the values of 32 bit hardware counters. + * + * If reset is true, reset the 32 bit hardware counters. Add the values of the + * 32 bit hardware counters to the 64 bit software counters. + * + * If reset is false, just save the values read from the 32 bit hardware + * counters in hki64_last_read[]. + * + * See the general comment on the 64 bit performance counters + * regarding the use of last read 32 bit hardware counter values. + */ +static int +hermon_kstat_perfcntr64_read(hermon_state_t *state, uint_t port, int reset) +{ + hermon_ks_info_t *ksi = state->hs_ks_info; + hermon_perfcntr64_ks_info_t *ksi64 = &ksi->hki_perfcntr64[port - 1]; + int status, i; + uint32_t tmp; + hermon_hw_sm_perfcntr_t sm_perfcntr; + + ASSERT(MUTEX_HELD(&ksi->hki_perfcntr64_lock)); + ASSERT(port != 0); + + /* read the 32 bit hardware counters */ + status = hermon_getperfcntr_cmd_post(state, port, + HERMON_CMD_NOSLEEP_SPIN, &sm_perfcntr, 0); + if (status != HERMON_CMD_SUCCESS) { + return (status); + } + + if (reset) { + /* reset the hardware counters */ + status = hermon_getperfcntr_cmd_post(state, port, + HERMON_CMD_NOSLEEP_SPIN, NULL, 1); + if (status != HERMON_CMD_SUCCESS) { + return (status); + } + + /* + * Update 64 bit software counters + */ + tmp = MAX(sm_perfcntr.portxmdata, + ksi64->hki64_last_read[HERMON_PERFCNTR64_XMIT_DATA_IDX]); + ksi64->hki64_counters[HERMON_PERFCNTR64_XMIT_DATA_IDX] += tmp; + + tmp = MAX(sm_perfcntr.portrcdata, + ksi64->hki64_last_read[HERMON_PERFCNTR64_RECV_DATA_IDX]); + ksi64->hki64_counters[HERMON_PERFCNTR64_RECV_DATA_IDX] += tmp; + + tmp = MAX(sm_perfcntr.portxmpkts, + ksi64->hki64_last_read[HERMON_PERFCNTR64_XMIT_PKTS_IDX]); + ksi64->hki64_counters[HERMON_PERFCNTR64_XMIT_PKTS_IDX] += tmp; + + tmp = MAX(sm_perfcntr.portrcpkts, + ksi64->hki64_last_read[HERMON_PERFCNTR64_RECV_PKTS_IDX]); + ksi64->hki64_counters[HERMON_PERFCNTR64_RECV_PKTS_IDX] += tmp; + + for (i = 0; i < HERMON_PERFCNTR64_NUM_COUNTERS; i++) + ksi64->hki64_last_read[i] = 0; + + } else { + /* + * Update ksi64->hki64_last_read[] + */ + SET_TO_MAX( + ksi64->hki64_last_read[HERMON_PERFCNTR64_XMIT_DATA_IDX], + sm_perfcntr.portxmdata); + + SET_TO_MAX( + ksi64->hki64_last_read[HERMON_PERFCNTR64_RECV_DATA_IDX], + sm_perfcntr.portrcdata); + + SET_TO_MAX( + ksi64->hki64_last_read[HERMON_PERFCNTR64_XMIT_PKTS_IDX], + sm_perfcntr.portxmpkts); + + SET_TO_MAX( + ksi64->hki64_last_read[HERMON_PERFCNTR64_RECV_PKTS_IDX], + sm_perfcntr.portrcpkts); + } + + return (HERMON_CMD_SUCCESS); +} + +/* + * hermon_kstat_perfcntr64_update_thread() + * Context: Entry point for a kernel thread + * + * Maintain 64 bit performance counters in software using the 32 bit + * hardware counters. + */ +static void +hermon_kstat_perfcntr64_update_thread(void *arg) +{ + hermon_state_t *state = (hermon_state_t *)arg; + hermon_ks_info_t *ksi = state->hs_ks_info; + uint_t i; + + mutex_enter(&ksi->hki_perfcntr64_lock); + /* + * Every one second update the values 64 bit software counters + * for all ports. Exit if HERMON_PERFCNTR64_THREAD_EXIT flag is set. + */ + while (!(ksi->hki_perfcntr64_flags & HERMON_PERFCNTR64_THREAD_EXIT)) { + for (i = 0; i < state->hs_cfg_profile->cp_num_ports; i++) { + if (ksi->hki_perfcntr64[i].hki64_enabled) { + (void) hermon_kstat_perfcntr64_read(state, + i + 1, 1); + } + } + /* sleep for a second */ + (void) cv_timedwait(&ksi->hki_perfcntr64_cv, + &ksi->hki_perfcntr64_lock, + ddi_get_lbolt() + drv_usectohz(1000000)); + } + ksi->hki_perfcntr64_flags = 0; + mutex_exit(&ksi->hki_perfcntr64_lock); +} + +/* + * hermon_kstat_perfcntr64_thread_create() + * Context: Called from the kstat context + * + * Create a thread that maintains 64 bit performance counters in software. + */ +static void +hermon_kstat_perfcntr64_thread_create(hermon_state_t *state) +{ + hermon_ks_info_t *ksi = state->hs_ks_info; + kthread_t *thr; + + ASSERT(MUTEX_HELD(&ksi->hki_perfcntr64_lock)); + + /* + * One thread per hermon instance. Don't create a thread if already + * created. + */ + if (!(ksi->hki_perfcntr64_flags & HERMON_PERFCNTR64_THREAD_CREATED)) { + thr = thread_create(NULL, 0, + hermon_kstat_perfcntr64_update_thread, + state, 0, &p0, TS_RUN, minclsyspri); + ksi->hki_perfcntr64_thread_id = thr->t_did; + ksi->hki_perfcntr64_flags |= HERMON_PERFCNTR64_THREAD_CREATED; + } +} + +/* + * hermon_kstat_perfcntr64_thread_exit() + * Context: Called from attach, detach or kstat context + */ +static void +hermon_kstat_perfcntr64_thread_exit(hermon_ks_info_t *ksi) +{ + kt_did_t tid; + + ASSERT(MUTEX_HELD(&ksi->hki_perfcntr64_lock)); + + if (ksi->hki_perfcntr64_flags & HERMON_PERFCNTR64_THREAD_CREATED) { + /* + * Signal the thread to exit and wait until the thread exits. + */ + ksi->hki_perfcntr64_flags |= HERMON_PERFCNTR64_THREAD_EXIT; + tid = ksi->hki_perfcntr64_thread_id; + cv_signal(&ksi->hki_perfcntr64_cv); + + mutex_exit(&ksi->hki_perfcntr64_lock); + thread_join(tid); + mutex_enter(&ksi->hki_perfcntr64_lock); + } +} + +/* + * hermon_kstat_perfcntr64_update() + * Context: Called from the kstat context + * + * See the general comment on 64 bit kstats for performance counters: + */ +static int +hermon_kstat_perfcntr64_update(kstat_t *ksp, int rw) +{ + hermon_state_t *state; + struct kstat_named *data; + hermon_ks_info_t *ksi; + hermon_perfcntr64_ks_info_t *ksi64; + int i, thr_exit; + + ksi64 = ksp->ks_private; + state = ksi64->hki64_state; + ksi = state->hs_ks_info; + data = (struct kstat_named *)(ksp->ks_data); + + mutex_enter(&ksi->hki_perfcntr64_lock); + + /* + * 64 bit performance counters maintained by the software is not + * enabled by default. Enable them upon a writing a non-zero value + * to "enable" kstat. Disable them upon a writing zero to the + * "enable" kstat. + */ + if (rw == KSTAT_WRITE) { + if (data[HERMON_PERFCNTR64_ENABLE_IDX].value.ui32) { + if (ksi64->hki64_enabled == 0) { + /* Enable 64 bit software counters */ + ksi64->hki64_enabled = 1; + for (i = 0; + i < HERMON_PERFCNTR64_NUM_COUNTERS; i++) { + ksi64->hki64_counters[i] = 0; + ksi64->hki64_last_read[i] = 0; + } + hermon_kstat_perfcntr64_thread_create(state); + } + + } else if (ksi64->hki64_enabled) { + /* Disable 64 bit software counters */ + ksi64->hki64_enabled = 0; + thr_exit = 1; + for (i = 0; i < state->hs_cfg_profile->cp_num_ports; + i++) { + if (ksi->hki_perfcntr64[i].hki64_enabled) { + thr_exit = 0; + break; + } + } + if (thr_exit) + hermon_kstat_perfcntr64_thread_exit(ksi); + } + } else if (ksi64->hki64_enabled) { + /* + * Read the counters and update kstats. + */ + if (hermon_kstat_perfcntr64_read(state, ksi64->hki64_port_num, + 0) != HERMON_CMD_SUCCESS) { + mutex_exit(&ksi->hki_perfcntr64_lock); + return (EIO); + } + + data[HERMON_PERFCNTR64_XMIT_DATA_IDX].value.ui64 = + ksi64->hki64_counters[HERMON_PERFCNTR64_XMIT_DATA_IDX] + + ksi64->hki64_last_read[HERMON_PERFCNTR64_XMIT_DATA_IDX]; + + data[HERMON_PERFCNTR64_RECV_DATA_IDX].value.ui64 = + ksi64->hki64_counters[HERMON_PERFCNTR64_RECV_DATA_IDX] + + ksi64->hki64_last_read[HERMON_PERFCNTR64_RECV_DATA_IDX]; + + data[HERMON_PERFCNTR64_XMIT_PKTS_IDX].value.ui64 = + ksi64->hki64_counters[HERMON_PERFCNTR64_XMIT_PKTS_IDX] + + ksi64->hki64_last_read[HERMON_PERFCNTR64_XMIT_PKTS_IDX]; + + data[HERMON_PERFCNTR64_RECV_PKTS_IDX].value.ui64 = + ksi64->hki64_counters[HERMON_PERFCNTR64_RECV_PKTS_IDX] + + ksi64->hki64_last_read[HERMON_PERFCNTR64_RECV_PKTS_IDX]; + + } else { + /* return 0 in kstats if not enabled */ + data[HERMON_PERFCNTR64_ENABLE_IDX].value.ui32 = 0; + for (i = 1; i < HERMON_PERFCNTR64_NUM_COUNTERS; i++) + data[i].value.ui64 = 0; + } + + mutex_exit(&ksi->hki_perfcntr64_lock); + return (0); +}
--- a/usr/src/uts/common/io/ib/ibnex/ibnex.c Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/ib/ibnex/ibnex.c Wed Jun 17 02:10:30 2009 -0700 @@ -513,6 +513,18 @@ return (DDI_FAILURE); } + /* + * Create "devctl" minor node for general ioctl interface to the + * ib nexus. + */ + if (ddi_create_minor_node(dip, "devctl", S_IFCHR, instance, + DDI_NT_IB_NEXUS, 0) != DDI_SUCCESS) { + IBTF_DPRINTF_L2("ibnex", + "\tattach: failed to create devctl minornode"); + (void) ddi_remove_minor_node(dip, NULL); + (void) mdi_vhci_unregister(dip, 0); + return (DDI_FAILURE); + } /* * Set pm-want-child-notification property for
--- a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c Wed Jun 17 02:10:30 2009 -0700 @@ -34,12 +34,16 @@ #include <sys/ib/ibnex/ibnex.h> #include <sys/ib/ibnex/ibnex_devctl.h> #include <sys/ib/ibtl/impl/ibtl_ibnex.h> +#include <sys/ib/ibtl/impl/ibtl.h> #include <sys/file.h> #include <sys/sunndi.h> #include <sys/fs/dv_node.h> #include <sys/mdi_impldefs.h> #include <sys/sunmdi.h> +/* return the minimum value of (x) and (y) */ +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + /* * function prototypes */ @@ -75,6 +79,17 @@ static ibnex_rval_t ibnex_commsvc_fininode(dev_info_t *); static ibnex_rval_t ibnex_pseudo_fininode(dev_info_t *); +static int ibnex_devctl(dev_t, int, intptr_t, int, + cred_t *, int *); +static int ibnex_ctl_get_api_ver(dev_t, int, intptr_t, int, + cred_t *, int *); +static int ibnex_ctl_get_hca_list(dev_t, int, intptr_t, int, + cred_t *, int *); +static int ibnex_ctl_query_hca(dev_t, int, intptr_t, int, + cred_t *, int *); +static int ibnex_ctl_query_hca_port(dev_t, int, intptr_t, int, + cred_t *, int *); + extern uint64_t ibnex_str2hex(char *, int, int *); extern int ibnex_ioc_initnode_all_pi(ibdm_ioc_info_t *); extern dev_info_t *ibnex_commsvc_initnode(dev_info_t *, @@ -112,6 +127,42 @@ return (0); } +int +ibnex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, + int *rvalp) +{ + /* + * For all generic devctl ioctls (such as DEVCTL_AP_CONFIGURE), + * call ibnex_devctl(). + */ + if (IS_DEVCTL(cmd)) + return (ibnex_devctl(dev, cmd, arg, mode, credp, rvalp)); + + /* + * The rest are ibnex specific ioctls. + */ + + switch (cmd) { + case IBNEX_CTL_GET_API_VER: + return (ibnex_ctl_get_api_ver(dev, cmd, arg, mode, + credp, rvalp)); + + case IBNEX_CTL_GET_HCA_LIST: + return (ibnex_ctl_get_hca_list(dev, cmd, arg, mode, + credp, rvalp)); + + case IBNEX_CTL_QUERY_HCA: + return (ibnex_ctl_query_hca(dev, cmd, arg, mode, + credp, rvalp)); + + case IBNEX_CTL_QUERY_HCA_PORT: + return (ibnex_ctl_query_hca_port(dev, cmd, arg, mode, + credp, rvalp)); + + default: + return (EINVAL); + } +} /* * ibnex_ioctl() @@ -138,8 +189,8 @@ * DEVCTL_AP_UNCONFIGURE: "unconfigure" the attachment point */ /* ARGSUSED */ -int -ibnex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, +static int +ibnex_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) { int ret, rv = 0, ioc_reprobe_pending = 0; @@ -164,21 +215,21 @@ ibnex_node_data_t *nodep, *scanp; struct devctl_iocdata *dcp = NULL; - IBTF_DPRINTF_L4("ibnex", "\tioctl: cmd=%x, arg=%p, mode=%x, cred=%p, " + IBTF_DPRINTF_L4("ibnex", "\tdevctl: cmd=%x, arg=%p, mode=%x, cred=%p, " "\t\trval=%p dev=0x%x", cmd, arg, mode, credp, rvalp, dev); /* read devctl ioctl data */ if ((cmd != DEVCTL_AP_CONTROL) && (ndi_dc_allochdl((void *)arg, &dcp) != NDI_SUCCESS)) { IBTF_DPRINTF_L4("ibnex", - "\tioctl: ndi_dc_allochdl failed\n"); + "\tdevctl: ndi_dc_allochdl failed\n"); return (EFAULT); } mutex_enter(&ibnex.ibnex_mutex); switch (cmd) { case DEVCTL_AP_GETSTATE: - msg = "\tioctl: DEVCTL_AP_GETSTATE"; + msg = "\tdevctl: DEVCTL_AP_GETSTATE"; IBTF_DPRINTF_L4("ibnex", "%s:", msg); apid_n = ibnex_get_apid(dcp); @@ -222,7 +273,7 @@ int num_nodes = 0; ibnex_ioctl_data_t ioc; /* for 64-bit copies only */ - msg = "\tioctl: DEVCTL_AP_CONTROL"; + msg = "\tdevctl: DEVCTL_AP_CONTROL"; #ifdef _MULTI_DATAMODEL if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { ibnex_ioctl_data_32_t ioc32; @@ -281,7 +332,7 @@ /* process sub-commands */ switch (ioc.cmd) { case IBNEX_NUM_DEVICE_NODES: - msg = "\tioctl: DEVCTL_AP_CONTROL: NUM_DEVICE_NODES"; + msg = "\tdevctl: DEVCTL_AP_CONTROL: NUM_DEVICE_NODES"; /* * figure out how many IOC, VPPA, @@ -300,7 +351,7 @@ return (rv); case IBNEX_NUM_HCA_NODES: - msg = "\tioctl: DEVCTL_AP_CONTROL: NUM_HCA_NODES"; + msg = "\tdevctl: DEVCTL_AP_CONTROL: NUM_HCA_NODES"; /* figure out how many HCAs are present in the host */ mutex_exit(&ibnex.ibnex_mutex); @@ -315,7 +366,7 @@ return (rv); case IBNEX_UPDATE_PKEY_TBLS: - msg = "\tioctl: DEVCTL_AP_CONTROL: UPDATE_PKEY_TBLS"; + msg = "\tdevctl: DEVCTL_AP_CONTROL: UPDATE_PKEY_TBLS"; IBTF_DPRINTF_L4("ibnex", "%s", msg); /* @@ -342,8 +393,8 @@ case IBNEX_GET_SNAPSHOT: case IBNEX_SNAPSHOT_SIZE: msg = (ioc.cmd == IBNEX_SNAPSHOT_SIZE) ? - "\tioctl: DEVCTL_AP_CONTROL: IBNEX_SNAPSHOT_SIZE" : - "\tioctl: DEVCTL_AP_CONTROL: IBNEX_GET_SNAPSHOT"; + "\tdevctl: DEVCTL_AP_CONTROL: IBNEX_SNAPSHOT_SIZE" : + "\tdevctl: DEVCTL_AP_CONTROL: IBNEX_GET_SNAPSHOT"; IBTF_DPRINTF_L4("ibnex", "%s:", msg); @@ -392,8 +443,8 @@ char path[MAXPATHLEN]; msg = (ioc.cmd == IBNEX_DEVICE_PATH_SZ) ? - "\tioctl:DEVCTL_AP_CONTROL: IBNEX_DEVICE_PATH_SZ" : - "\tioctl:DEVCTL_AP_CONTROL: IBNEX_GET_DEVICE_PATH"; + "\tdevctl:DEVCTL_AP_CONTROL: IBNEX_DEVICE_PATH_SZ" : + "\tdevctl:DEVCTL_AP_CONTROL: IBNEX_GET_DEVICE_PATH"; IBTF_DPRINTF_L4("ibnex", "%s: apid = %s", msg, apid_n); @@ -508,8 +559,9 @@ case IBNEX_UNCFG_CLNTS_SZ: case IBNEX_UNCFG_CLNTS_INFO: msg = (ioc.cmd == IBNEX_UNCFG_CLNTS_SZ) ? - "\tioctl:DEVCTL_AP_CONTROL: IBNEX_UNCFG_CLNTS_SZ" : - "\tioctl:DEVCTL_AP_CONTROL: IBNEX_UNCFG_CLNTS_INFO"; + "\tdevctl:DEVCTL_AP_CONTROL: IBNEX_UNCFG_CLNTS_SZ" : + "\tdevctl:DEVCTL_AP_CONTROL: " + "IBNEX_UNCFG_CLNTS_INFO"; guid_str = strrchr(apid_n, ':') + 1; IBTF_DPRINTF_L4("ibnex", "%s, apid = %s, guid = %s", @@ -571,7 +623,7 @@ break; case IBNEX_CONF_ENTRY_ADD: - msg = "\tioctl: IBNEX_CONF_ENTRY_ADD: "; + msg = "\tdevctl: IBNEX_CONF_ENTRY_ADD: "; service = kmem_zalloc(ioc.bufsiz + 1, KM_SLEEP); /* read in the "service" name */ if (ddi_copyin(ioc.buf, service, @@ -598,7 +650,7 @@ break; case IBNEX_CONF_ENTRY_DEL: - msg = "\tioctl:IBNEX_CONF_ENTRY_DEL: "; + msg = "\tdevctl:IBNEX_CONF_ENTRY_DEL: "; service = kmem_zalloc(ioc.bufsiz + 1, KM_SLEEP); /* read in the "service" name */ if (ddi_copyin(ioc.buf, service, @@ -690,7 +742,7 @@ break; case IBNEX_UPDATE_IOC_CONF : - msg = "\tioctl:IBNEX_UPDATE_IOC_CONF: "; + msg = "\tdevctl:IBNEX_UPDATE_IOC_CONF: "; /* * If IB fabric APID, call ibnex_update_all @@ -2728,3 +2780,391 @@ mutex_enter(&ibnex.ibnex_mutex); return (rval == MDI_SUCCESS ? IBNEX_SUCCESS : IBNEX_OFFLINE_FAILED); } + +/* + * IOCTL implementation to get api version number. + */ +static int +ibnex_ctl_get_api_ver(dev_t dev, int cmd, intptr_t arg, int mode, + cred_t *credp, int *rvalp) +{ + ibnex_ctl_api_ver_t api_ver; + + IBTF_DPRINTF_L4("ibnex", "\tctl_get_api_ver: cmd=%x, arg=%p, " + "mode=%x, cred=%p, rval=%p, dev=0x%x", cmd, arg, mode, credp, + rvalp, dev); + + api_ver.api_ver_num = IBNEX_CTL_API_VERSION; + + if (ddi_copyout(&api_ver, (void *)arg, sizeof (ibnex_ctl_api_ver_t), + mode) != 0) { + IBTF_DPRINTF_L2("ibnex", + "\tctl_get_api_ver: ddi_copyout err"); + return (EFAULT); + } + + return (0); +} + +/* + * IOCTL implementation to get the list of HCAs + */ +static int +ibnex_ctl_get_hca_list(dev_t dev, int cmd, intptr_t arg, int mode, + cred_t *credp, int *rvalp) +{ + ibnex_ctl_get_hca_list_t hca_list; + int rv = 0; + uint_t *in_nhcasp; + uint_t nhcas, n; + ib_guid_t *hca_guids; + + IBTF_DPRINTF_L4("ibnex", "\tctl_get_hca_list: cmd=%x, arg=%p, " + "mode=%x, cred=%p, rval=%p, dev=0x%x", cmd, arg, mode, credp, + rvalp, dev); + +#ifdef _MULTI_DATAMODEL + if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { + ibnex_ctl_get_hca_list_32_t hca_list_32; + + if (ddi_copyin((void *)arg, &hca_list_32, + sizeof (ibnex_ctl_get_hca_list_32_t), mode) != 0) { + IBTF_DPRINTF_L2("ibnex", + "\tctl_get_hca_list: ddi_copyin err 1"); + return (EFAULT); + } + + hca_list.hca_guids_alloc_sz = hca_list_32.hca_guids_alloc_sz; + hca_list.hca_guids = + (ib_guid_t *)(uintptr_t)hca_list_32.hca_guids; + in_nhcasp = &((ibnex_ctl_get_hca_list_32_t *)arg)->nhcas; + } else +#endif + { + if (ddi_copyin((void *)arg, &hca_list, + sizeof (ibnex_ctl_get_hca_list_t), mode) != 0) { + IBTF_DPRINTF_L2("ibnex", + "\tctl_get_hca_list: ddi_copyin err 2"); + return (EFAULT); + } + + in_nhcasp = &((ibnex_ctl_get_hca_list_t *)arg)->nhcas; + } + + nhcas = ibt_get_hca_list(&hca_guids); + + /* copy number of hcas to user space */ + if (ddi_copyout(&nhcas, in_nhcasp, sizeof (uint_t), mode) != 0) { + IBTF_DPRINTF_L2("ibnex", + "\tctl_get_hca_list: ddi_copyout err 1"); + rv = EFAULT; + goto out; + } + + n = MIN(nhcas, hca_list.hca_guids_alloc_sz); + if (n == 0) + goto out; + + /* copy HCA guids to user space */ + if (ddi_copyout(hca_guids, hca_list.hca_guids, + n * sizeof (ib_guid_t), mode) != 0) { + IBTF_DPRINTF_L2("ibnex", + "\tctl_get_hca_list: ddi_copyout err 2"); + rv = EFAULT; + } + +out: + if (nhcas > 0) + ibt_free_hca_list(hca_guids, nhcas); + + return (rv); +} + +/* + * IOCTL implementation to query HCA attributes + */ +static int +ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, + cred_t *credp, int *rvalp) +{ + int rv = 0; + ibnex_ctl_hca_info_t *hca_info; + ibnex_ctl_query_hca_t *query_hca; + ibt_hca_attr_t *hca_attr; + char driver_name[MAX_HCA_DRVNAME_LEN]; + int instance; + + IBTF_DPRINTF_L4("ibnex", "\tctl_query_hca: cmd=%x, arg=%p, " + "mode=%x, cred=%p, rval=%p, dev=0x%x", cmd, arg, mode, credp, + rvalp, dev); + + /* + * NOTE: 32-bit versions of the structures for ibnex_ctl_query_hca_t + * and ibnex_ctl_hca_info_t are not defined because the alignment + * of fields for these structures happen to be the same for both + * 64-bit and 32-bit cases. + */ + + query_hca = kmem_zalloc(sizeof (ibnex_ctl_query_hca_t), KM_SLEEP); + hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP); + + if (ddi_copyin((void *)arg, query_hca, + sizeof (ibnex_ctl_query_hca_t), mode) != 0) { + IBTF_DPRINTF_L2("ibnex", "\tctl_query_hca: ddi_copyin err"); + rv = EFAULT; + goto out; + } + + if (ibtl_ibnex_query_hca_byguid(query_hca->hca_guid, hca_attr, + driver_name, sizeof (driver_name), &instance) != IBT_SUCCESS) { + rv = ENXIO; + goto out; + } + + hca_info = &query_hca->hca_info; + + hca_info->hca_node_guid = hca_attr->hca_node_guid; + hca_info->hca_si_guid = hca_attr->hca_si_guid; + hca_info->hca_nports = hca_attr->hca_nports; + hca_info->hca_flags = hca_attr->hca_flags; + hca_info->hca_flags2 = hca_attr->hca_flags2; + hca_info->hca_vendor_id = hca_attr->hca_vendor_id; + hca_info->hca_device_id = hca_attr->hca_device_id; + hca_info->hca_version_id = hca_attr->hca_version_id; + hca_info->hca_max_chans = hca_attr->hca_max_chans; + hca_info->hca_max_chan_sz = hca_attr->hca_max_chan_sz; + hca_info->hca_max_sgl = hca_attr->hca_max_sgl; + hca_info->hca_max_cq = hca_attr->hca_max_cq; + hca_info->hca_max_cq_sz = hca_attr->hca_max_cq_sz; + hca_info->hca_page_sz = hca_attr->hca_page_sz; + hca_info->hca_max_memr = hca_attr->hca_max_memr; + hca_info->hca_max_memr_len = hca_attr->hca_max_memr_len; + hca_info->hca_max_mem_win = hca_attr->hca_max_mem_win; + hca_info->hca_max_rsc = hca_attr->hca_max_rsc; + hca_info->hca_max_rdma_in_chan = hca_attr->hca_max_rdma_in_chan; + hca_info->hca_max_rdma_out_chan = hca_attr->hca_max_rdma_out_chan; + hca_info->hca_max_ipv6_chan = hca_attr->hca_max_ipv6_chan; + hca_info->hca_max_ether_chan = hca_attr->hca_max_ether_chan; + hca_info->hca_max_mcg_chans = hca_attr->hca_max_mcg_chans; + hca_info->hca_max_mcg = hca_attr->hca_max_mcg; + hca_info->hca_max_chan_per_mcg = hca_attr->hca_max_chan_per_mcg; + hca_info->hca_max_partitions = hca_attr->hca_max_partitions; + hca_info->hca_local_ack_delay = hca_attr->hca_local_ack_delay; + hca_info->hca_max_port_sgid_tbl_sz = hca_attr->hca_max_port_sgid_tbl_sz; + hca_info->hca_max_port_pkey_tbl_sz = hca_attr->hca_max_port_pkey_tbl_sz; + hca_info->hca_max_pd = hca_attr->hca_max_pd; + hca_info->hca_max_ud_dest = hca_attr->hca_max_ud_dest; + hca_info->hca_max_srqs = hca_attr->hca_max_srqs; + hca_info->hca_max_srqs_sz = hca_attr->hca_max_srqs_sz; + hca_info->hca_max_srq_sgl = hca_attr->hca_max_srq_sgl; + hca_info->hca_max_cq_handlers = hca_attr->hca_max_cq_handlers; + hca_info->hca_reserved_lkey = hca_attr->hca_reserved_lkey; + hca_info->hca_max_fmrs = hca_attr->hca_max_fmrs; + hca_info->hca_max_lso_size = hca_attr->hca_max_lso_size; + hca_info->hca_max_lso_hdr_size = hca_attr->hca_max_lso_hdr_size; + hca_info->hca_max_inline_size = hca_attr->hca_max_inline_size; + hca_info->hca_max_cq_mod_count = hca_attr->hca_max_cq_mod_count; + hca_info->hca_max_cq_mod_usec = hca_attr->hca_max_cq_mod_usec; + hca_info->hca_fw_major_version = hca_attr->hca_fw_major_version; + hca_info->hca_fw_minor_version = hca_attr->hca_fw_minor_version; + hca_info->hca_fw_micro_version = hca_attr->hca_fw_micro_version; + hca_info->hca_ud_send_inline_sz = hca_attr->hca_ud_send_inline_sz; + hca_info->hca_conn_send_inline_sz = hca_attr->hca_conn_send_inline_sz; + hca_info->hca_conn_rdmaw_inline_overhead = + hca_attr->hca_conn_rdmaw_inline_overhead; + hca_info->hca_recv_sgl_sz = hca_attr->hca_recv_sgl_sz; + hca_info->hca_ud_send_sgl_sz = hca_attr->hca_ud_send_sgl_sz; + hca_info->hca_conn_send_sgl_sz = hca_attr->hca_conn_send_sgl_sz; + hca_info->hca_conn_rdma_sgl_overhead = + hca_attr->hca_conn_rdma_sgl_overhead; + + (void) strlcpy(hca_info->hca_driver_name, driver_name, + MAX_HCA_DRVNAME_LEN); + hca_info->hca_driver_instance = instance; + + /* copy hca information to the user space */ + if (ddi_copyout(hca_info, &((ibnex_ctl_query_hca_t *)arg)->hca_info, + sizeof (ibnex_ctl_hca_info_t), mode) != 0) { + IBTF_DPRINTF_L2("ibnex", "\tctl_query_hca: ddi_copyout err"); + rv = EFAULT; + } + +out: + kmem_free(query_hca, sizeof (ibnex_ctl_query_hca_t)); + kmem_free(hca_attr, sizeof (ibt_hca_attr_t)); + return (rv); +} + +#define IBNEX_CTL_CP_PORT_INFO(x, y, sgid_tbl, pkey_tbl) \ +{ \ + (x)->p_lid = (y)->p_opaque1; \ + (x)->p_qkey_violations = (y)->p_qkey_violations; \ + (x)->p_pkey_violations = (y)->p_pkey_violations; \ + (x)->p_sm_sl = (y)->p_sm_sl; \ + (x)->p_phys_state = (y)->p_phys_state; \ + (x)->p_sm_lid = (y)->p_sm_lid; \ + (x)->p_linkstate = (y)->p_linkstate; \ + (x)->p_port_num = (y)->p_port_num; \ + (x)->p_width_supported = (y)->p_width_supported; \ + (x)->p_width_enabled = (y)->p_width_enabled; \ + (x)->p_width_active = (y)->p_width_active; \ + (x)->p_mtu = (y)->p_mtu; \ + (x)->p_lmc = (y)->p_lmc; \ + (x)->p_speed_supported = (y)->p_speed_supported; \ + (x)->p_speed_enabled = (y)->p_speed_enabled; \ + (x)->p_speed_active = (y)->p_speed_active; \ + (x)->p_sgid_tbl = (sgid_tbl); \ + (x)->p_sgid_tbl_sz = (y)->p_sgid_tbl_sz; \ + (x)->p_pkey_tbl = (pkey_tbl); \ + (x)->p_pkey_tbl_sz = (y)->p_pkey_tbl_sz; \ + (x)->p_def_pkey_ix = (y)->p_def_pkey_ix; \ + (x)->p_max_vl = (y)->p_max_vl; \ + (x)->p_init_type_reply = (y)->p_init_type_reply; \ + (x)->p_subnet_timeout = (y)->p_subnet_timeout; \ + (x)->p_capabilities = (y)->p_capabilities; \ + (x)->p_msg_sz = (y)->p_msg_sz; \ +} + +/* + * IOCTL implementation to query HCA port attributes + */ +static int +ibnex_ctl_query_hca_port(dev_t dev, int cmd, intptr_t arg, int mode, + cred_t *credp, int *rvalp) +{ + ibt_hca_portinfo_t *ibt_pi; + uint_t nports; + uint_t size = 0; + int rv = 0; + ibnex_ctl_query_hca_port_t *query_hca_port = NULL; + ibnex_ctl_query_hca_port_32_t *query_hca_port_32 = NULL; + uint_t sgid_tbl_sz; + uint16_t pkey_tbl_sz; + + IBTF_DPRINTF_L4("ibnex", "\tctl_query_hca_port: cmd=%x, arg=%p, " + "mode=%x, cred=%p, rval=%p, dev=0x%x", cmd, arg, mode, credp, + rvalp, dev); + + query_hca_port = kmem_zalloc(sizeof (ibnex_ctl_query_hca_port_t), + KM_SLEEP); + +#ifdef _MULTI_DATAMODEL + if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { + query_hca_port_32 = kmem_zalloc( + sizeof (ibnex_ctl_query_hca_port_32_t), KM_SLEEP); + + if (ddi_copyin((void *)arg, query_hca_port_32, + sizeof (ibnex_ctl_query_hca_port_32_t), mode) != 0) { + IBTF_DPRINTF_L2("ibnex", + "\tctl_query_hca_port: ddi_copyin err 2"); + rv = EFAULT; + goto out; + } + + query_hca_port->hca_guid = query_hca_port_32->hca_guid; + query_hca_port->port_num = query_hca_port_32->port_num; + + query_hca_port->sgid_tbl = + (ib_gid_t *)(uintptr_t)query_hca_port_32->sgid_tbl; + query_hca_port->sgid_tbl_alloc_sz = + query_hca_port_32->sgid_tbl_alloc_sz; + + query_hca_port->pkey_tbl = + (ib_pkey_t *)(uintptr_t)query_hca_port_32->pkey_tbl; + query_hca_port->pkey_tbl_alloc_sz = + query_hca_port_32->pkey_tbl_alloc_sz; + + } else +#endif + { + if (ddi_copyin((void *)arg, query_hca_port, + sizeof (ibnex_ctl_query_hca_port_t), mode) != 0) { + IBTF_DPRINTF_L2("ibnex", + "\tctl_query_hca_port: ddi_copyin err 2"); + rv = EFAULT; + goto out; + } + } + + if (query_hca_port->port_num == 0) { + rv = EINVAL; + goto out; + } + + /* + * Query hca port attributes and copy them to the user space. + */ + + if (ibt_query_hca_ports_byguid(query_hca_port->hca_guid, + query_hca_port->port_num, &ibt_pi, &nports, &size) != IBT_SUCCESS) { + rv = EINVAL; + goto out; + } + + sgid_tbl_sz = MIN(query_hca_port->sgid_tbl_alloc_sz, + ibt_pi->p_sgid_tbl_sz); + + pkey_tbl_sz = MIN(query_hca_port->pkey_tbl_alloc_sz, + ibt_pi->p_pkey_tbl_sz); + +#ifdef _MULTI_DATAMODEL + if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { + IBNEX_CTL_CP_PORT_INFO( + &query_hca_port_32->port_info, ibt_pi, + query_hca_port_32->sgid_tbl, query_hca_port_32->pkey_tbl); + + if (ddi_copyout(&query_hca_port_32->port_info, + &((ibnex_ctl_query_hca_port_32_t *)arg)->port_info, + sizeof (ibnex_ctl_hca_port_info_32_t), mode) != 0 || + + ddi_copyout(ibt_pi->p_sgid_tbl, + query_hca_port->sgid_tbl, + sgid_tbl_sz * sizeof (ib_gid_t), mode) != 0 || + + ddi_copyout(ibt_pi->p_pkey_tbl, + query_hca_port->pkey_tbl, + pkey_tbl_sz * sizeof (ib_pkey_t), mode) != 0) { + + IBTF_DPRINTF_L2("ibnex", + "\tctl_query_hca_port: ddi_copyout err 2"); + rv = EFAULT; + goto out; + } + } else +#endif + { + IBNEX_CTL_CP_PORT_INFO( + &query_hca_port->port_info, ibt_pi, + query_hca_port->sgid_tbl, query_hca_port->pkey_tbl); + + if (ddi_copyout(&query_hca_port->port_info, + &((ibnex_ctl_query_hca_port_t *)arg)->port_info, + sizeof (ibnex_ctl_hca_port_info_t), mode) != 0 || + + ddi_copyout(ibt_pi->p_sgid_tbl, + query_hca_port->sgid_tbl, + sgid_tbl_sz * sizeof (ib_gid_t), mode) != 0 || + + ddi_copyout(ibt_pi->p_pkey_tbl, + query_hca_port->pkey_tbl, + pkey_tbl_sz * sizeof (ib_pkey_t), mode) != 0) { + + IBTF_DPRINTF_L2("ibnex", + "\tctl_query_hca_port: ddi_copyout err 2"); + rv = EFAULT; + goto out; + } + } + +out: + if (size > 0) + ibt_free_portinfo(ibt_pi, size); + + if (query_hca_port) + kmem_free(query_hca_port, sizeof (ibnex_ctl_query_hca_port_t)); + + if (query_hca_port_32) + kmem_free(query_hca_port_32, + sizeof (ibnex_ctl_query_hca_port_32_t)); + return (rv); +}
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_ibnex.c Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_ibnex.c Wed Jun 17 02:10:30 2009 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -622,3 +622,54 @@ } return (IBT_SUCCESS); } + +/* + * Function: + * ibtl_ibnex_query_hca_byguid + * Input: + * hca_guid - The HCA's node GUID. + * driver_name_size- size of the caller allocated driver_name buffer + * Output: + * hca_attrs - caller allocated buffer which will contain + * HCA attributes upon success + * driver_name - caller allocated buffer which will contain + * HCA driver name upon success + * driver_instance - HCA driver instance + * Returns: + * IBT_SUCCESS/IBT_FAILURE + * Description: + * Get the HCA attributes, driver name and instance number of the + * specified HCA. + */ +ibt_status_t +ibtl_ibnex_query_hca_byguid(ib_guid_t hca_guid, ibt_hca_attr_t *hca_attrs, + char *driver_name, size_t driver_name_size, int *driver_instance) +{ + ibtl_hca_devinfo_t *hca_devp; + + IBTF_DPRINTF_L4(ibtl_ibnex, "ibtl_ibnex_query_hca_byguid(" + "hca_guid = 0x%llx, hca_attrs = 0x%p, driver_name = 0x%p, " + "driver_name_size = 0x%d, driver_instancep = 0x%p)", hca_guid, + hca_attrs, driver_name, (int)driver_name_size, driver_instance); + + mutex_enter(&ibtl_clnt_list_mutex); + + hca_devp = ibtl_get_hcadevinfo(hca_guid); + if (hca_devp == NULL) { + mutex_exit(&ibtl_clnt_list_mutex); + return (IBT_HCA_INVALID); + } + + if (strlcpy(driver_name, + ddi_driver_name(hca_devp->hd_hca_dip), driver_name_size) >= + driver_name_size) { + mutex_exit(&ibtl_clnt_list_mutex); + return (IBT_INSUFF_KERNEL_RESOURCE); + } + + *driver_instance = ddi_get_instance(hca_devp->hd_hca_dip); + bcopy(hca_devp->hd_hca_attr, hca_attrs, sizeof (ibt_hca_attr_t)); + + mutex_exit(&ibtl_clnt_list_mutex); + return (IBT_SUCCESS); +}
--- a/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/ib/ibtl/ibtl_impl.c Wed Jun 17 02:10:30 2009 -0700 @@ -105,6 +105,10 @@ MODREV_1, (void *)&modlmisc, NULL }; +static void ibtl_kstat_init(ibtl_hca_devinfo_t *); +static void ibtl_kstat_fini(ibtl_hca_devinfo_t *); +static void ibtl_kstat_stats_create(ibtl_hca_devinfo_t *, uint_t); +static void ibtl_kstat_pkeys_create(ibtl_hca_devinfo_t *, uint_t); /* * IBTF Loadable Module Routines. @@ -615,6 +619,8 @@ hca_devp->hd_portinfo_locked_port = 0; cv_init(&hca_devp->hd_portinfo_cv, NULL, CV_DEFAULT, NULL); + ibtl_kstat_init(hca_devp); + mutex_exit(&ibtl_clnt_list_mutex); /* @@ -802,6 +808,8 @@ kmem_free(hca_devp->hd_portinfop, hca_devp->hd_portinfo_len); mutex_exit(&ibtl_clnt_list_mutex); + ibtl_kstat_fini(hca_devp); + /* Free up the memory of per-client info struct */ kmem_free(hca_devp, sizeof (ibtl_hca_devinfo_t) + (hca_devp->hd_hca_attr->hca_nports - 1) * @@ -1102,3 +1110,236 @@ IBTF_DPRINTF_L3(ibtf, "ibt_check_failure: type = 0x%X", type); return (type); } + +/* + * Initialize and create kstats. + * + * We create the following kstats on all ports of the HCA: + * <hca_driver_name><instance_number>/port<port_num>/stats + * <hca_driver_name><instance_number>/port<port_num>/pkeys + */ +static void +ibtl_kstat_init(ibtl_hca_devinfo_t *hca_devp) +{ + uint_t nports = hca_devp->hd_hca_attr->hca_nports; + ibtl_hca_port_kstat_t *pks; + int i; + + IBTF_DPRINTF_L3(ibtf, "ibtl_kstat_init(hca_devp = 0x%p)", hca_devp); + + hca_devp->hd_hca_port_ks_info_len = + sizeof (ibtl_hca_port_kstat_t) * nports; + pks = kmem_zalloc(hca_devp->hd_hca_port_ks_info_len, KM_SLEEP); + hca_devp->hd_hca_port_ks_info = pks; + + for (i = 0; i < nports; i++, pks++) { + pks->pks_hca_devp = hca_devp; + pks->pks_port_num = i + 1; + ibtl_kstat_stats_create(hca_devp, i + 1); + ibtl_kstat_pkeys_create(hca_devp, i + 1); + } +} + +/* + * Delete kstats on all ports of the HCA. + */ +static void +ibtl_kstat_fini(ibtl_hca_devinfo_t *hca_devp) +{ + ibtl_hca_port_kstat_t *pks; + int i; + + IBTF_DPRINTF_L3(ibtf, "ibtl_kstat_fini(hca_devp = 0x%p)", hca_devp); + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hca_devp)) + + pks = hca_devp->hd_hca_port_ks_info; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pks)) + + if (pks == NULL) + return; + + for (i = 0; i < hca_devp->hd_hca_attr->hca_nports; i++, pks++) { + if (pks->pks_stats_ksp) + kstat_delete(pks->pks_stats_ksp); + + if (pks->pks_pkeys_ksp) { + ASSERT(!MUTEX_HELD(&ibtl_clnt_list_mutex)); + kstat_delete(pks->pks_pkeys_ksp); + } + } + + kmem_free(hca_devp->hd_hca_port_ks_info, + hca_devp->hd_hca_port_ks_info_len); +} + +/* + * Update "stats" kstat. + * Called by kstat framework. + */ +static int +ibtl_kstat_stats_update(kstat_t *ksp, int rw) +{ + ibtl_hca_port_kstat_t *pks; + ibtl_hca_devinfo_t *hca_devp; + ibt_hca_portinfo_t *p; + struct kstat_named *data; + + IBTF_DPRINTF_L4(ibtf, "ibtl_kstat_stats_update(ksp = 0x%p, rw = %d)", + ksp, rw); + + if (rw == KSTAT_WRITE) + return (EACCES); + + mutex_enter(&ibtl_clnt_list_mutex); + + /* + * Update the link_state kstat using the value from portinfo cache. + */ + pks = ksp->ks_private; + hca_devp = pks->pks_hca_devp; + data = (struct kstat_named *)(ksp->ks_data); + p = hca_devp->hd_portinfop + pks->pks_port_num - 1; + data[0].value.ui32 = (uint32_t)p->p_linkstate; + + mutex_exit(&ibtl_clnt_list_mutex); + + return (0); +} + +/* + * Create "stats" kstat for the specified HCA port in the form: + * <hca_driver_name><instance_number>/port<port_num>/stats + * At preset it contains only one named data of "link_state" + */ +static void +ibtl_kstat_stats_create(ibtl_hca_devinfo_t *hca_devp, uint_t port_num) +{ + struct kstat *ksp; + struct kstat_named *named_data; + char *drv_name; + int drv_instance; + ibtl_hca_port_kstat_t *pks; + char kname[40]; + + IBTF_DPRINTF_L3(ibtf, "ibtl_kstat_stats_create(hca_devp = 0x%p, " + "port_num = 0x%u)", hca_devp, port_num); + + drv_name = (char *)ddi_driver_name(hca_devp->hd_hca_dip); + drv_instance = ddi_get_instance(hca_devp->hd_hca_dip); + (void) snprintf(kname, sizeof (kname), "%s%d/port%d/stats", + drv_name, drv_instance, port_num); + + ksp = kstat_create("ibtf", 0, kname, "ib", KSTAT_TYPE_NAMED, 1, 0); + if (ksp == NULL) { + IBTF_DPRINTF_L2(ibtf, + "ibtl_kstat_stats_create: kstat_create() failed"); + return; + } + + named_data = (struct kstat_named *)(ksp->ks_data); + kstat_named_init(&named_data[0], "link_state", KSTAT_DATA_UINT32); + + pks = hca_devp->hd_hca_port_ks_info + port_num - 1; + pks->pks_stats_ksp = ksp; + + ksp->ks_private = pks; + ksp->ks_update = ibtl_kstat_stats_update; + + /* Install the kstat */ + kstat_install(ksp); +} + +/* + * Update "pkeys" kstat. + * + * Called by kstat framework. Since ks_lock was set to ibtl_clnt_list_mutex + * at the time of the kstat creation, kstat framework will hold this lock + * while calling this function. + */ +static int +ibtl_kstat_pkeys_update(kstat_t *ksp, int rw) +{ + ibtl_hca_port_kstat_t *pks; + ibtl_hca_devinfo_t *hca_devp; + ibt_hca_portinfo_t *p; + + IBTF_DPRINTF_L4(ibtf, "ibtl_kstat_pkeys_update(ksp = 0x%p, rw = %d)", + ksp, rw); + +#ifndef __lock_lint + ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex)); +#endif + + if (rw == KSTAT_WRITE) + return (EACCES); + + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ksp)) + + pks = ksp->ks_private; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pks)) + + hca_devp = pks->pks_hca_devp; + _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hca_devp)) + + /* + * Point kstat data to the pkey table in the portinfo cache. + */ + + p = hca_devp->hd_portinfop + pks->pks_port_num - 1; + + ksp->ks_data = p->p_pkey_tbl; + ksp->ks_ndata = p->p_pkey_tbl_sz; + ksp->ks_data_size = p->p_pkey_tbl_sz * sizeof (ib_pkey_t); + + return (0); +} + +/* + * Create "pkeys" kstat for the specified HCA port in the form: + * <hca_driver_name><instance_number>/port<port_num>/pkeys + * + * Currently kstat framework allows only some fixed data types as named + * data components under a named kstat. Due to this limitation it is not + * possible to add "pkeys" as a named data under the "stats" kstat. + */ +static void +ibtl_kstat_pkeys_create(ibtl_hca_devinfo_t *hca_devp, uint_t port_num) +{ + struct kstat *ksp; + char *drv_name; + int drv_instance; + char kname[40]; + ibtl_hca_port_kstat_t *pks; + + IBTF_DPRINTF_L3(ibtf, "ibtl_kstat_stats_create(hca_devp = 0x%p, " + "port_num = 0x%u)", hca_devp, port_num); + + drv_name = (char *)ddi_driver_name(hca_devp->hd_hca_dip); + drv_instance = ddi_get_instance(hca_devp->hd_hca_dip); + (void) snprintf(kname, sizeof (kname), "%s%d/port%d/pkeys", + drv_name, drv_instance, port_num); + + ksp = kstat_create("ibtf", 0, kname, "ib", KSTAT_TYPE_RAW, 0, + KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL); + if (ksp == NULL) { + IBTF_DPRINTF_L2(ibtf, + "ibtl_kstat_pkeys_create: kstat_create() failed"); + return; + } + + pks = hca_devp->hd_hca_port_ks_info + port_num - 1; + pks->pks_pkeys_ksp = ksp; + + ksp->ks_private = pks; + ksp->ks_update = ibtl_kstat_pkeys_update; + ksp->ks_lock = &ibtl_clnt_list_mutex; + + /* + * We just go with the default_kstat_snapshot(). + * So there is no need to set ks_snapshot field. + */ + + /* Install the kstat */ + kstat_install(ksp); +}
--- a/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_impl.c Wed Jun 17 02:10:30 2009 -0700 @@ -987,14 +987,8 @@ IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d", hcap->hca_guid, hcap->hca_res_cnt); - /* wait on response CV */ - absolute_time = ddi_get_lbolt() + - drv_usectohz(ibcm_wait_for_res_cnt_timeout); - while (hcap->hca_res_cnt > 0) - if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock, - absolute_time) == -1) - break; + cv_wait(&ibcm_global_hca_cv, &ibcm_global_hca_lock); if (hcap->hca_res_cnt != 0) { /* We got a timeout waiting for hca_res_cnt to become 0 */
--- a/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_recv.c Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_recv.c Wed Jun 17 02:10:30 2009 -0700 @@ -349,6 +349,24 @@ ibmf_mod_load_args_t *modlargsp; + /* + * HCA driver handles the Performance management + * class MAD's. It registers with the IBMF during early + * boot and unregisters during detach and during + * HCA unconfigure operation. We come here + * 1. Before HCA registers with IBMF + * Drop the MAD. Since this is a UD MAD, + * sender will resend the request + * 2. After HCA unregistered with IBMF during DR operation. + * Since HCA is going away, we can safely drop the PMA + * MAD's here. + * Solaris does not support BM_AGENT and so drop the BM MAD's + */ + if ((class == PERF_AGENT) || (class == BM_AGENT)) { + (void) ibmf_i_repost_recv_buffer(cip, recv_wqep); + return; + } + recv_wqep->recv_wc = *wcp; /* struct copy */ IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L4,
--- a/usr/src/uts/common/io/warlock/hermon.wlcmd Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/warlock/hermon.wlcmd Wed Jun 17 02:10:30 2009 -0700 @@ -137,6 +137,7 @@ ### Hermon kstat callback root hermon_kstat_cntr_update +root hermon_kstat_perfcntr64_update ### Hermon userland mapping callbacks and functions root hermon_umap_umemlock_cb
--- a/usr/src/uts/common/io/warlock/ibtl.wlcmd Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/io/warlock/ibtl.wlcmd Wed Jun 17 02:10:30 2009 -0700 @@ -194,6 +194,7 @@ root ibtl_cm_get_1st_full_pkey_ix root ibtl_cm_get_local_comp_gids root ibtl_cm_is_multi_sm +root ibtl_cm_get_clnt_name # IBTL-IBNEX private interface root ibtl_ibnex_get_hca_info @@ -203,6 +204,7 @@ root ibtl_ibnex_hcaguid2dip root ibtl_ibnex_get_hca_verbose_data root ibtl_ibnex_valid_hca_parent +root ibtl_ibnex_query_hca_byguid # IBTL internal taskq related callbacks root ibtl_do_mgr_async_task @@ -219,6 +221,11 @@ root ibt_get_module_failure root ibt_check_failure +# IBTL kstat callback +root ibtl_kstat_stats_update +root ibtl_kstat_pkeys_update + + # Internal, but not referenced (n the future, maybe) root ibtl_free_eec_async_check
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_cmd.h Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_cmd.h Wed Jun 17 02:10:30 2009 -0700 @@ -320,7 +320,8 @@ #define HERMON_CMD_GUIDINFO 0x00140000 #define HERMON_CMD_PKEYTBLE 0x00160000 -#define HERMON_CMD_PERFHDR0 0x01040101 +#define HERMON_CMD_PERF_GET 0x01040101 +#define HERMON_CMD_PERF_SET 0x01040102 #define HERMON_CMD_PERFCNTRS 0x00120000 #define HERMON_CMD_PERFATTR 0x00000000 @@ -754,7 +755,7 @@ int hermon_getpkeytable_cmd_post(hermon_state_t *state, uint_t port, uint_t pkeyblock, uint_t sleepflag, sm_pkey_table_t *pkeytable); int hermon_getperfcntr_cmd_post(hermon_state_t *state, uint_t port, - uint_t sleepflag, hermon_hw_sm_perfcntr_t *perfinfo); + uint_t sleepflag, hermon_hw_sm_perfcntr_t *perfinfo, int reset); /* * WRITE_MTT - used for write MTT entries to the Hermon MTT table */
--- a/usr/src/uts/common/sys/ib/adapters/hermon/hermon_misc.h Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/sys/ib/adapters/hermon/hermon_misc.h Wed Jun 17 02:10:30 2009 -0700 @@ -471,6 +471,30 @@ } hermon_ks_mask_t; /* + * Index into the named data components of 64 bit "perf_counters" kstat. + */ +enum { + HERMON_PERFCNTR64_ENABLE_IDX = 0, + HERMON_PERFCNTR64_XMIT_DATA_IDX, + HERMON_PERFCNTR64_RECV_DATA_IDX, + HERMON_PERFCNTR64_XMIT_PKTS_IDX, + HERMON_PERFCNTR64_RECV_PKTS_IDX, + HERMON_PERFCNTR64_NUM_COUNTERS +}; + +/* + * Data associated with the 64 bit "perf_counters" kstat. One for each port. + */ +typedef struct hermon_perfcntr64_ks_info_s { + struct kstat *hki64_ksp; + int hki64_enabled; + uint64_t hki64_counters[HERMON_PERFCNTR64_NUM_COUNTERS]; + uint32_t hki64_last_read[HERMON_PERFCNTR64_NUM_COUNTERS]; + uint_t hki64_port_num; + hermon_state_t *hki64_state; +} hermon_perfcntr64_ks_info_t; + +/* * The hermon_ks_info_t structure stores all the information necessary for * tracking the resources associated with each of the various kstats. In * addition to containing pointers to each of the counter and pic kstats, @@ -485,8 +509,17 @@ uint64_t hki_pic0; uint64_t hki_pic1; hermon_ks_mask_t hki_ib_perfcnt[HERMON_CNTR_NUMENTRIES]; + kt_did_t hki_perfcntr64_thread_id; + kmutex_t hki_perfcntr64_lock; + kcondvar_t hki_perfcntr64_cv; + uint_t hki_perfcntr64_flags; /* see below */ + hermon_perfcntr64_ks_info_t hki_perfcntr64[HERMON_MAX_PORTS]; } hermon_ks_info_t; +/* hki_perfcntr64_flags */ +#define HERMON_PERFCNTR64_THREAD_CREATED 0x0001 +#define HERMON_PERFCNTR64_THREAD_EXIT 0x0002 + /* * The hermon_ports_ioctl32_t, hermon_loopback_ioctl32_t, and * hermon_flash_ioctl32_s structures are used internally by the Hermon
--- a/usr/src/uts/common/sys/ib/ibnex/ibnex_devctl.h Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/sys/ib/ibnex/ibnex_devctl.h Wed Jun 17 02:10:30 2009 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,14 +19,15 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_IB_IBNEX_IBNEX_DEVCTL_H #define _SYS_IB_IBNEX_IBNEX_DEVCTL_H -#pragma ident "%Z%%M% %I% %E% SMI" +#include <sys/ib/ib_types.h> +#include <sys/ib/ibtl/ibtl_types.h> #ifdef __cplusplus extern "C" { @@ -138,6 +138,347 @@ uint32_t misc_arg; /* reserved */ } ibnex_ioctl_data_32_t; +/* + * General ibnex IOCTLs + * + * IBNEX_CTL_GET_API_VER + * ====================== + * + * Gets the version number of the API that IB nexus currently supports. + * + * arg - pointer to a structure of type ibnex_ctl_api_ver_t + * + * Caller does not set any field of this structure. When this IOCTL is issued, + * ib nexus will set api_ver_num field to the currently supported API + * version number. + * + * The caller could issue this IOCTL prior to issuing any other general + * ibnex IOCTLs to detect incompatible changes to the API. The caller may + * call other IOCTLs only if the api_ver_num matches the API version number + * used by the caller. + * + * + * IBNEX_CTL_GET_HCA_LIST + * ====================== + * + * Gets GUIDs for all HCAs in the system + * + * arg - pointer to a structure of type ibnex_ctl_get_hca_list_t + * + * Caller allocates memory for HCA GUIDs. Sets hca_guids field to point to the + * allocated memory. Sets hca_guids_alloc_sz to the number of GUIDs for which + * memory has been allocated. + * + * Upon successful return from the IOCTL, nhcas will contain the number of + * HCAs in the system. HCA GUIDs will be copied into hca_guids array. + * The number of GUIDs copied are nhcas or hca_guids_alloc_sz which ever is + * smaller. + * + * + * IBNEX_CTL_QUERY_HCA + * =================== + * + * Query HCA attributes + * + * arg - pointer to a structure of type ibnex_ctl_query_hca_t + * + * Caller sets hca_guid field of this structure. + * + * Upon successful return from the IOCTL, hca_info will contain HCA attributes + * for the specified GUID. + * + * + * IBNEX_CTL_QUERY_HCA_PORT + * ======================== + * + * Query HCA port attributes + * + * arg - pointer to a structure of type ibnex_ctl_query_hca_port_t + * + * Caller sets hca_guid and port_num fields. + * + * Caller allocates memory for sgid entries. Sets sgid_tbl to point to + * the allocated memory and sgid_tbl_alloc_sz to the number of sgid entries + * for which memory has been allocated. + * + * Caller allocates memory for pkey entries. Sets pkey_tbl to point to + * the allocated memory and pkey_tbl_alloc_sz to the number of pkey entries + * for which memory has been allocated. + * + * Upon successful return from the IOCTL, port_info will contain HCA port + * attributes for the specified HCA port. port_info.p_sgid_tbl_sz will contain + * the actual number of sgids associated with this port. port_info.p_pkey_tbl_sz + * will contain the actual number of pkeys associated with this port. + * + * port_info.p_sgid_tbl will point to an array containing sgids. The number of + * sgids in the array is sgid_tbl_alloc_sz or port_info.p_sgid_tbl_sz + * whichever is smaller. + * + * port_info.p_pkey_tbl will point to an array containing pkeys. The number of + * pkeys in the array is pkey_tbl_alloc_sz or port_info.p_pkey_tbl_sz + * whichever is smaller. + */ + + +/* + * ibnex specific ioctls + * + * NOTE: The ioctl codes should not collide with generic devctl ioctls + * such as DEVCTL_AP_CONFIGURE. + */ +#define IBNEX_IOC (1 << 16) +#define IBNEX_CTL_GET_API_VER (IBNEX_IOC + 1) /* Get API version # */ +#define IBNEX_CTL_GET_HCA_LIST (IBNEX_IOC + 2) /* Get HCA GUID list */ +#define IBNEX_CTL_QUERY_HCA (IBNEX_IOC + 3) /* Query HCA attributes */ +#define IBNEX_CTL_QUERY_HCA_PORT (IBNEX_IOC + 4) /* Query HCA port attributes */ + +/* + * The device to open for issuing ibnex IOCTLs + */ +#define IBNEX_DEVCTL_DEV "/devices/ib:devctl" + +/* + * ibnex IOCTL API version number - to be incremented when making an + * incompatible change to the API. + */ +#define IBNEX_CTL_API_VERSION 1 + +#define MAX_HCA_DRVNAME_LEN 16 + +/* + * Data structure for IBNEX_CTL_GET_API_VER + */ +typedef struct ibnex_ctl_api_ver_s { + uint_t api_ver_num; /* out: supported API version */ +} ibnex_ctl_api_ver_t; + +/* + * Data structure for IBNEX_CTL_GET_HCA_LIST + */ +typedef struct ibnex_ctl_get_hca_list_s { + ib_guid_t *hca_guids; /* in/out: HCA GUID array */ + uint_t hca_guids_alloc_sz; /* in: # of HCA GUIDs for */ + /* which storage is allocated */ + uint_t nhcas; /* out: actual number of HCAs */ +} ibnex_ctl_get_hca_list_t; + +typedef struct ibnex_ctl_get_hca_list_32_s { + caddr32_t hca_guids; /* in/out: HCA GUID array */ + uint_t hca_guids_alloc_sz; /* in: # of HCA GUIDs for */ + /* which storage is allocated */ + uint_t nhcas; /* out: actual number of HCAs */ +} ibnex_ctl_get_hca_list_32_t; + +/* + * HCA information structure + */ +typedef struct ibnex_ctl_hca_info_s { + ib_guid_t hca_node_guid; /* Node GUID */ + ib_guid_t hca_si_guid; /* Optional System Image GUID */ + uint_t hca_nports; /* Number of physical ports */ + + /* HCA driver name and instance number */ + char hca_driver_name[MAX_HCA_DRVNAME_LEN]; + int hca_driver_instance; + + ibt_hca_flags_t hca_flags; /* HCA capabilities etc */ + ibt_hca_flags2_t hca_flags2; /* HCA capabilities etc */ + + uint32_t hca_vendor_id; /* Vendor ID */ + uint16_t hca_device_id; /* Device ID */ + uint32_t hca_version_id; /* Version ID */ + + uint_t hca_max_chans; /* Max channels supported */ + uint_t hca_max_chan_sz; /* Max outstanding WRs on any */ + /* channel */ + + uint_t hca_max_sgl; /* Max SGL entries per WR */ + + uint_t hca_max_cq; /* Max num of CQs supported */ + uint_t hca_max_cq_sz; /* Max capacity of each CQ */ + + ibt_page_sizes_t hca_page_sz; /* Bit mask of page sizes */ + + uint_t hca_max_memr; /* Max num of HCA mem regions */ + ib_memlen_t hca_max_memr_len; /* Largest block, in bytes of */ + /* mem that can be registered */ + uint_t hca_max_mem_win; /* Max Memory windows in HCA */ + + uint_t hca_max_rsc; /* Max Responder Resources of */ + /* this HCA for RDMAR/Atomics */ + /* with this HCA as target. */ + uint8_t hca_max_rdma_in_chan; /* Max RDMAR/Atomics in per */ + /* chan this HCA as target. */ + uint8_t hca_max_rdma_out_chan; /* Max RDMA Reads/Atomics out */ + /* per channel by this HCA */ + uint_t hca_max_ipv6_chan; /* Max IPV6 channels in HCA */ + uint_t hca_max_ether_chan; /* Max Ether channels in HCA */ + + uint_t hca_max_mcg_chans; /* Max number of channels */ + /* that can join multicast */ + /* groups */ + uint_t hca_max_mcg; /* Max multicast groups */ + uint_t hca_max_chan_per_mcg; /* Max number of channels per */ + /* Multicast group in HCA */ + uint16_t hca_max_partitions; /* Max partitions in HCA */ + + ib_time_t hca_local_ack_delay; + + uint_t hca_max_port_sgid_tbl_sz; + uint16_t hca_max_port_pkey_tbl_sz; + uint_t hca_max_pd; /* Max# of Protection Domains */ + + uint_t hca_max_ud_dest; + uint_t hca_max_srqs; /* Max SRQs supported */ + uint_t hca_max_srqs_sz; /* Max outstanding WRs on any */ + /* SRQ */ + uint_t hca_max_srq_sgl; /* Max SGL entries per SRQ WR */ + uint_t hca_max_cq_handlers; + ibt_lkey_t hca_reserved_lkey; /* Reserved L_Key value */ + uint_t hca_max_fmrs; /* Max FMR Supported */ + + uint_t hca_max_lso_size; + uint_t hca_max_lso_hdr_size; + uint_t hca_max_inline_size; + + uint_t hca_max_cq_mod_count; /* CQ notify moderation */ + uint_t hca_max_cq_mod_usec; + + uint32_t hca_fw_major_version; /* firmware version */ + uint16_t hca_fw_minor_version; + uint16_t hca_fw_micro_version; + + /* detailed WQE size info */ + uint_t hca_ud_send_inline_sz; /* inline size in bytes */ + uint_t hca_conn_send_inline_sz; + uint_t hca_conn_rdmaw_inline_overhead; + uint_t hca_recv_sgl_sz; /* detailed SGL sizes */ + uint_t hca_ud_send_sgl_sz; + uint_t hca_conn_send_sgl_sz; + uint_t hca_conn_rdma_sgl_overhead; + int32_t hca_pad; +} ibnex_ctl_hca_info_t; + +/* + * Data structure for IBNEX_CTL_QUERY_HCA + */ +typedef struct ibnex_ctl_query_hca_s { + ib_guid_t hca_guid; /* in: HCA GUID */ + ibnex_ctl_hca_info_t hca_info; /* out: HCA information */ +} ibnex_ctl_query_hca_t; + +/* + * HCA port information structure + */ +typedef struct ibnex_ctl_hca_port_info_s { + ib_lid_t p_lid; /* Base LID of port */ + ib_qkey_cntr_t p_qkey_violations; /* Bad Q_Key cnt */ + ib_pkey_cntr_t p_pkey_violations; /* Optional bad P_Key cnt */ + uint8_t p_sm_sl; /* SM Service level */ + ib_port_phys_state_t p_phys_state; + ib_lid_t p_sm_lid; /* SM LID */ + ibt_port_state_t p_linkstate; /* Port state */ + uint8_t p_port_num; /* Port number */ + + ib_link_width_t p_width_supported; + ib_link_width_t p_width_enabled; + ib_link_width_t p_width_active; + + ib_mtu_t p_mtu; /* Max transfer unit - pkt */ + uint8_t p_lmc; /* LID mask control */ + + ib_link_speed_t p_speed_supported; + ib_link_speed_t p_speed_enabled; + ib_link_speed_t p_speed_active; + + ib_gid_t *p_sgid_tbl; /* SGID Table */ + uint_t p_sgid_tbl_sz; /* # of entries in SGID table */ + + ib_pkey_t *p_pkey_tbl; /* P_Key table */ + uint16_t p_pkey_tbl_sz; /* # of entries in P_Key tbl */ + uint16_t p_def_pkey_ix; /* default pkey index for TI */ + + uint8_t p_max_vl; /* Max num of virtual lanes */ + uint8_t p_init_type_reply; /* Optional InitTypeReply */ + ib_time_t p_subnet_timeout; /* Max Subnet Timeout */ + ibt_port_caps_t p_capabilities; /* Port Capabilities */ + uint32_t p_msg_sz; /* Max message size */ +} ibnex_ctl_hca_port_info_t; + +typedef struct ibnex_ctl_hca_port_info_32_s { + ib_lid_t p_lid; /* Base LID of port */ + ib_qkey_cntr_t p_qkey_violations; /* Bad Q_Key cnt */ + ib_pkey_cntr_t p_pkey_violations; /* Optional bad P_Key cnt */ + uint8_t p_sm_sl; /* SM Service level */ + ib_port_phys_state_t p_phys_state; + ib_lid_t p_sm_lid; /* SM LID */ + ibt_port_state_t p_linkstate; /* Port state */ + uint8_t p_port_num; /* Port number */ + + ib_link_width_t p_width_supported; + ib_link_width_t p_width_enabled; + ib_link_width_t p_width_active; + + ib_mtu_t p_mtu; /* Max transfer unit - pkt */ + uint8_t p_lmc; /* LID mask control */ + + ib_link_speed_t p_speed_supported; + ib_link_speed_t p_speed_enabled; + ib_link_speed_t p_speed_active; + + caddr32_t p_sgid_tbl; /* SGID Table */ + uint_t p_sgid_tbl_sz; /* # of entries in SGID table */ + + caddr32_t p_pkey_tbl; /* P_Key table */ + uint16_t p_pkey_tbl_sz; /* # of entries in P_Key tbl */ + uint16_t p_def_pkey_ix; /* default pkey index for TI */ + + uint8_t p_max_vl; /* Max num of virtual lanes */ + uint8_t p_init_type_reply; /* Optional InitTypeReply */ + ib_time_t p_subnet_timeout; /* Max Subnet Timeout */ + ibt_port_caps_t p_capabilities; /* Port Capabilities */ + uint32_t p_msg_sz; /* Max message size */ +} ibnex_ctl_hca_port_info_32_t; + +/* + * Data structure for IBNEX_CTL_QUERY_HCA_PORT + */ +typedef struct ibnex_ctl_query_hca_port_s { + ib_guid_t hca_guid; /* in: HCA GUID */ + uint_t port_num; /* in: port number */ + + ib_gid_t *sgid_tbl; /* in: SGID Table */ + uint_t sgid_tbl_alloc_sz; /* in: # of entries in SGID table */ + + ib_pkey_t *pkey_tbl; /* in: P_Key table */ + uint_t pkey_tbl_alloc_sz; /* in: # of entries in P_Key table */ + + uint32_t pad; + ibnex_ctl_hca_port_info_t port_info; /* out: port information */ +} ibnex_ctl_query_hca_port_t; + +typedef struct ibnex_ctl_query_hca_port_32_s { + ib_guid_t hca_guid; /* in: HCA GUID */ + uint_t port_num; /* in: port number */ + + caddr32_t sgid_tbl; /* in: SGID Table */ + uint_t sgid_tbl_alloc_sz; /* in: # of entries in SGID table */ + + caddr32_t pkey_tbl; /* in: P_Key table */ + uint_t pkey_tbl_alloc_sz; /* in: # of entries in P_Key table */ + + uint32_t pad; + ibnex_ctl_hca_port_info_32_t port_info; /* out: port information */ +} ibnex_ctl_query_hca_port_32_t; + +#ifdef _KERNEL +_NOTE(SCHEME_PROTECTS_DATA("", ibnex_ctl_hca_info_s)) +_NOTE(SCHEME_PROTECTS_DATA("", ibnex_ctl_hca_port_info_s)) +_NOTE(SCHEME_PROTECTS_DATA("", ibnex_ctl_hca_port_info_32_s)) +_NOTE(SCHEME_PROTECTS_DATA("", ibnex_ctl_query_hca_port_s)) +_NOTE(SCHEME_PROTECTS_DATA("", ibnex_ctl_query_hca_port_32_s)) +#endif + #ifdef __cplusplus }
--- a/usr/src/uts/common/sys/ib/ibtl/impl/ibtl.h Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/sys/ib/ibtl/impl/ibtl.h Wed Jun 17 02:10:30 2009 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_IB_IBTL_IMPL_IBTL_H #define _SYS_IB_IBTL_IMPL_IBTL_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * ibtl.h * @@ -115,6 +112,16 @@ } ibtl_async_flags_t; /* + * Keeps track of all data associated with HCA port kstats. + */ +typedef struct ibtl_hca_port_kstat_s { + struct ibtl_hca_devinfo_s *pks_hca_devp; + uint_t pks_port_num; + struct kstat *pks_stats_ksp; + struct kstat *pks_pkeys_ksp; +} ibtl_hca_port_kstat_t; + +/* * Define a per CI HCA Device structure. Its address is returned * to the CI as an opaque IBTL HCA Handle - ibc_hdl_t. * @@ -155,6 +162,8 @@ uint32_t hd_async_task_cnt; /* #clients doing asyncs */ kcondvar_t hd_async_task_cv; /* wakeup when #clients = 0 */ uint_t hd_multism; /* 1 - MultiSM, 0 - Single SM */ + ibtl_hca_port_kstat_t *hd_hca_port_ks_info; /* port kstat ptr */ + uint_t hd_hca_port_ks_info_len; /* port kstat size */ /* The following must be at the end of this struct */ ibtl_async_port_status_t hd_async_port[1]; /* per-port async data */ } ibtl_hca_devinfo_t;
--- a/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_ibnex.h Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/sys/ib/ibtl/impl/ibtl_ibnex.h Wed Jun 17 02:10:30 2009 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -226,6 +226,27 @@ */ ibt_status_t ibtl_ibnex_phci_unregister(dev_info_t *hca_dip); +/* + * Function: + * ibtl_ibnex_query_hca_byguid + * Input: + * hca_guid - The HCA's node GUID. + * driver_name_size- size of the caller allocated driver_name buffer + * Output: + * hca_attrs - caller allocated buffer which will contain + * HCA attributes upon success + * driver_name - caller allocated buffer which will contain + * HCA driver name upon success + * driver_instance - HCA driver instance + * Returns: + * IBT_SUCCESS/IBT_FAILURE + * Description: + * Get the HCA attributes, driver name and instance number of the + * specified HCA. + */ +ibt_status_t +ibtl_ibnex_query_hca_byguid(ib_guid_t, ibt_hca_attr_t *, char *, size_t, int *); + #ifdef __cplusplus } #endif
--- a/usr/src/uts/common/sys/sunddi.h Tue Jun 16 10:40:20 2009 -0600 +++ b/usr/src/uts/common/sys/sunddi.h Wed Jun 17 02:10:30 2009 -0700 @@ -211,6 +211,8 @@ #define DDI_NT_SATA_NEXUS "ddi_ctl:devctl:sata" /* nexus drivers */ +#define DDI_NT_IB_NEXUS "ddi_ctl:devctl:ib" /* nexus drivers */ + #define DDI_NT_ATTACHMENT_POINT "ddi_ctl:attachment_point" /* attachment pt */ #define DDI_NT_SCSI_ATTACHMENT_POINT "ddi_ctl:attachment_point:scsi"