Mercurial > illumos > illumos-gate
changeset 10300:9c18fcead732
6861355 ibd datalink device in the 'unknown' state
author | Venki Rajagopalan <Venki.Rajagopalan@Sun.COM> |
---|---|
date | Thu, 13 Aug 2009 11:52:11 -0400 |
parents | 80845694147f |
children | 0c33f1942e85 |
files | usr/src/uts/common/io/ib/clients/ibd/ibd.c usr/src/uts/common/sys/ib/clients/ibd/ibd.h |
diffstat | 2 files changed, 156 insertions(+), 84 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c Wed Aug 12 22:17:19 2009 -0700 +++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c Thu Aug 13 11:52:11 2009 -0400 @@ -256,6 +256,14 @@ #define IBD_DRV_STARTED 0x80000 /* + * Start/stop in-progress flags; note that restart must always remain + * the OR of start and stop flag values. + */ +#define IBD_DRV_START_IN_PROGRESS 0x10000000 +#define IBD_DRV_STOP_IN_PROGRESS 0x20000000 +#define IBD_DRV_RESTART_IN_PROGRESS 0x30000000 + +/* * Miscellaneous constants */ #define IBD_SEND 0 @@ -433,7 +441,10 @@ static int ibd_get_port_details(ibd_state_t *); static int ibd_alloc_cqs(ibd_state_t *); static int ibd_setup_ud_channel(ibd_state_t *); -static int ibd_undo_m_start(ibd_state_t *); +static int ibd_start(ibd_state_t *); +static int ibd_undo_start(ibd_state_t *, link_state_t); +static void ibd_set_mac_progress(ibd_state_t *, uint_t); +static void ibd_clr_mac_progress(ibd_state_t *, uint_t); /* @@ -2006,19 +2017,14 @@ link_state_t new_link_state = LINK_STATE_UP; uint8_t itreply; uint16_t pkix; - - /* - * Do not send a request to the async daemon if it has not - * yet been created or is being destroyed. If the async - * daemon has not yet been created, we still need to track - * last known state of the link. If this code races with the - * detach path, then we are assured that the detach path has - * not yet done the ibt_close_hca (which waits for all async - * events to complete). If the code races with the attach path, - * we need to validate the pkey/gid (in the link_up case) if - * the initialization path has already set these up and created - * IBTF resources based on the values. - */ + int ret; + + /* + * Let's not race with a plumb or an unplumb; if we detect a + * pkey relocation event later on here, we may have to restart. + */ + ibd_set_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS); + mutex_enter(&state->id_link_mutex); /* @@ -2027,7 +2033,7 @@ */ if (state->id_link_state == LINK_STATE_UNKNOWN) { mutex_exit(&state->id_link_mutex); - return; + goto link_mod_return; } /* @@ -2085,12 +2091,21 @@ ibt_free_portinfo(port_infop, port_infosz); mutex_exit(&state->id_link_mutex); - ibd_m_stop(state); - if ((ibt_status = ibd_m_start(state)) != IBT_SUCCESS) { - DPRINT(10, "link_mod: cannot " - "restart, ret=%d", ibt_status); + /* + * Currently a restart is required if our pkey has moved + * in the pkey table. If we get the ibt_recycle_ud() to + * work as documented (expected), we may be able to + * avoid a complete restart. Note that we've already + * marked both the start and stop 'in-progress' flags, + * so it is ok to go ahead and do this restart. + */ + ibd_undo_start(state, LINK_STATE_DOWN); + if ((ret = ibd_start(state)) != 0) { + DPRINT(10, "ibd_restart: cannot restart, " + "ret=%d", ret); } - return; + + goto link_mod_return; } else { new_link_state = LINK_STATE_DOWN; } @@ -2106,7 +2121,7 @@ */ if (state->id_link_state == new_link_state) { mutex_exit(&state->id_link_mutex); - return; + goto link_mod_return; } /* @@ -2116,7 +2131,7 @@ if (!ibd_async_safe(state)) { state->id_link_state = new_link_state; mutex_exit(&state->id_link_mutex); - return; + goto link_mod_return; } mutex_exit(&state->id_link_mutex); @@ -2144,6 +2159,9 @@ req = kmem_cache_alloc(state->id_req_kmc, KM_SLEEP); req->rq_ptr = (void *)opcode; ibd_queue_work_slot(state, req, IBD_ASYNC_LINK); + +link_mod_return: + ibd_clr_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS); } /* @@ -2619,6 +2637,9 @@ state->id_req_kmc = kmem_cache_create(buf, sizeof (ibd_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + mutex_init(&state->id_macst_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&state->id_macst_cv, NULL, CV_DEFAULT, NULL); + return (DDI_SUCCESS); } @@ -2628,6 +2649,9 @@ static void ibd_state_fini(ibd_state_t *state) { + cv_destroy(&state->id_macst_cv); + mutex_destroy(&state->id_macst_lock); + kmem_cache_destroy(state->id_req_kmc); mutex_destroy(&state->id_rxpost_lock); @@ -4050,7 +4074,7 @@ mutex_exit(&state->id_link_mutex); DPRINT(10, "ibd_get_port_details: ibt_query_hca_ports() " "failed, ret=%d", ret); - return (DDI_FAILURE); + return (ENETDOWN); } /* @@ -4061,7 +4085,7 @@ mutex_exit(&state->id_link_mutex); ibt_free_portinfo(port_infop, port_infosz); DPRINT(10, "ibd_get_port_details: port is not active"); - return (DDI_FAILURE); + return (ENETDOWN); } /* @@ -4073,7 +4097,7 @@ ibt_free_portinfo(port_infop, port_infosz); DPRINT(10, "ibd_get_port_details: ibt_pkey2index " "failed, ret=%d", ret); - return (DDI_FAILURE); + return (ENONET); } state->id_mtu = (128 << port_infop->p_mtu); @@ -4088,7 +4112,7 @@ */ state->id_link_speed = ibd_get_portspeed(state); - return (DDI_SUCCESS); + return (0); } static int @@ -4263,7 +4287,7 @@ } static int -ibd_undo_m_start(ibd_state_t *state) +ibd_undo_start(ibd_state_t *state, link_state_t cur_link_state) { uint32_t progress = state->id_mac_state; uint_t attempts; @@ -4273,17 +4297,23 @@ uint8_t jstate; /* - * Before we try to stop/undo whatever we did in ibd_m_start(), - * we need to mark the link state as unknown to prevent nw - * layer from using this instance for any new transfers. - */ - if (progress & IBD_DRV_PORT_DETAILS_OBTAINED) { + * Before we try to stop/undo whatever we did in ibd_start(), + * we need to mark the link state appropriately to prevent the + * ip layer from using this instance for any new transfers. Note + * that if the original state of the link was "up" when we're + * here, we'll set the final link state to "unknown", to behave + * in the same fashion as other ethernet drivers. + */ + mutex_enter(&state->id_link_mutex); + if (cur_link_state == LINK_STATE_DOWN) { + state->id_link_state = cur_link_state; + } else { state->id_link_state = LINK_STATE_UNKNOWN; - mac_link_update(state->id_mh, state->id_link_state); - - state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED); - } - + } + mutex_exit(&state->id_link_mutex); + mac_link_update(state->id_mh, state->id_link_state); + + state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED); if (progress & IBD_DRV_STARTED) { state->id_mac_state &= (~IBD_DRV_STARTED); } @@ -4309,7 +4339,7 @@ * we turned off the notification and * return failure. */ - DPRINT(2, "ibd_undo_m_start: " + DPRINT(2, "ibd_undo_start: " "reclaiming failed"); ibd_poll_compq(state, state->id_rcq_hdl); ibt_set_cq_handler(state->id_rcq_hdl, @@ -4342,7 +4372,7 @@ */ if ((ret = ibt_flush_channel(state->id_chnl_hdl)) != IBT_SUCCESS) { - DPRINT(10, "undo_m_start: flush_channel " + DPRINT(10, "ibd_undo_start: flush_channel " "failed, ret=%d", ret); } @@ -4394,7 +4424,7 @@ * has also been suppressed at this point. Thus, no locks * are required while traversing the mc full list. */ - DPRINT(2, "ibd_undo_m_start: clear full cache entries"); + DPRINT(2, "ibd_undo_start: clear full cache entries"); mce = list_head(&state->id_mc_full); while (mce != NULL) { mgid = mce->mc_info.mc_adds_vect.av_dgid; @@ -4418,7 +4448,7 @@ if (progress & IBD_DRV_UD_CHANNEL_SETUP) { if ((ret = ibt_free_channel(state->id_chnl_hdl)) != IBT_SUCCESS) { - DPRINT(10, "undo_m_start: free_channel " + DPRINT(10, "ibd_undo_start: free_channel " "failed, ret=%d", ret); } @@ -4431,7 +4461,7 @@ sizeof (ibt_wc_t) * state->id_txwcs_size); if ((ret = ibt_free_cq(state->id_scq_hdl)) != IBT_SUCCESS) { - DPRINT(10, "undo_m_start: free_cq(scq) " + DPRINT(10, "ibd_undo_start: free_cq(scq) " "failed, ret=%d", ret); } } @@ -4439,7 +4469,7 @@ kmem_free(state->id_rxwcs, sizeof (ibt_wc_t) * state->id_rxwcs_size); if ((ret = ibt_free_cq(state->id_rcq_hdl)) != IBT_SUCCESS) { - DPRINT(10, "undo_m_start: free_cq(rcq) failed, " + DPRINT(10, "ibd_undo_start: free_cq(rcq) failed, " "ret=%d", ret); } @@ -4478,6 +4508,34 @@ } /* + * These pair of routines are used to set/clear the condition that + * the caller is likely to do something to change the id_mac_state. + * If there's already someone doing either a start or a stop (possibly + * due to the async handler detecting a pkey relocation event, a plumb + * or dlpi_open, or an unplumb or dlpi_close coming in), we wait until + * that's done. + */ +static void +ibd_set_mac_progress(ibd_state_t *state, uint_t flag) +{ + mutex_enter(&state->id_macst_lock); + while (state->id_mac_state & IBD_DRV_RESTART_IN_PROGRESS) + cv_wait(&state->id_macst_cv, &state->id_macst_lock); + + state->id_mac_state |= flag; + mutex_exit(&state->id_macst_lock); +} + +static void +ibd_clr_mac_progress(ibd_state_t *state, uint_t flag) +{ + mutex_enter(&state->id_macst_lock); + state->id_mac_state &= (~flag); + cv_signal(&state->id_macst_cv); + mutex_exit(&state->id_macst_lock); +} + +/* * GLDv3 entry point to start hardware. */ /*ARGSUSED*/ @@ -4485,6 +4543,20 @@ ibd_m_start(void *arg) { ibd_state_t *state = arg; + int ret; + + ibd_set_mac_progress(state, IBD_DRV_START_IN_PROGRESS); + + ret = ibd_start(state); + + ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS); + + return (ret); +} + +static int +ibd_start(ibd_state_t *state) +{ kthread_t *kht; int err; ibt_status_t ret; @@ -4494,11 +4566,11 @@ /* * Get port details; if we fail here, very likely the port - * state is inactive or the pkey can't be verified - */ - if (ibd_get_port_details(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_get_port_details() failed"); - return (EAGAIN); + * state is inactive or the pkey can't be verified. + */ + if ((err = ibd_get_port_details(state)) != 0) { + DPRINT(10, "ibd_start: ibd_get_port_details() failed"); + goto start_fail; } state->id_mac_state |= IBD_DRV_PORT_DETAILS_OBTAINED; @@ -4506,9 +4578,9 @@ * Find the IPoIB broadcast group */ if (ibd_find_bgroup(state) != IBT_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_find_bgroup() failed"); - err = ENOENT; - goto m_start_fail; + DPRINT(10, "ibd_start: ibd_find_bgroup() failed"); + err = ENOTACTIVE; + goto start_fail; } state->id_mac_state |= IBD_DRV_BCAST_GROUP_FOUND; @@ -4517,9 +4589,9 @@ * it is most likely due to a lack of resources */ if (ibd_acache_init(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_acache_init() failed"); + DPRINT(10, "ibd_start: ibd_acache_init() failed"); err = ENOMEM; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_ACACHE_INITIALIZED; @@ -4527,9 +4599,9 @@ * Allocate send and receive completion queues */ if (ibd_alloc_cqs(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_alloc_cqs() failed"); + DPRINT(10, "ibd_start: ibd_alloc_cqs() failed"); err = ENOMEM; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_CQS_ALLOCD; @@ -4538,8 +4610,8 @@ */ if (ibd_setup_ud_channel(state) != DDI_SUCCESS) { err = ENOMEM; - DPRINT(10, "ibd_m_start: ibd_setup_ud_channel() failed"); - goto m_start_fail; + DPRINT(10, "ibd_start: ibd_setup_ud_channel() failed"); + goto start_fail; } state->id_mac_state |= IBD_DRV_UD_CHANNEL_SETUP; @@ -4547,9 +4619,9 @@ * Allocate and initialize the tx buffer list */ if (ibd_init_txlist(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_init_txlist() failed"); + DPRINT(10, "ibd_start: ibd_init_txlist() failed"); err = ENOMEM; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_TXLIST_ALLOCD; @@ -4560,10 +4632,10 @@ ibt_set_cq_handler(state->id_scq_hdl, ibd_scq_handler, state); if ((ret = ibt_enable_cq_notify(state->id_scq_hdl, IBT_NEXT_COMPLETION)) != IBT_SUCCESS) { - DPRINT(10, "ibd_m_start: ibt_enable_cq_notify(scq) " + DPRINT(10, "ibd_start: ibt_enable_cq_notify(scq) " "failed, ret=%d", ret); err = EINVAL; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_SCQ_NOTIFY_ENABLED; } @@ -4572,9 +4644,9 @@ * Allocate and initialize the rx buffer list */ if (ibd_init_rxlist(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_init_rxlist() failed"); + DPRINT(10, "ibd_start: ibd_init_rxlist() failed"); err = ENOMEM; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_RXLIST_ALLOCD; @@ -4582,9 +4654,9 @@ * Join IPoIB broadcast group */ if (ibd_join_group(state, state->id_mgid, IB_MC_JSTATE_FULL) == NULL) { - DPRINT(10, "ibd_m_start: ibd_join_group() failed"); - err = EINVAL; - goto m_start_fail; + DPRINT(10, "ibd_start: ibd_join_group() failed"); + err = ENOTACTIVE; + goto start_fail; } state->id_mac_state |= IBD_DRV_BCAST_GROUP_JOINED; @@ -4617,10 +4689,10 @@ ibt_set_cq_handler(state->id_rcq_hdl, ibd_rcq_handler, state); if ((ret = ibt_enable_cq_notify(state->id_rcq_hdl, IBT_NEXT_COMPLETION)) != IBT_SUCCESS) { - DPRINT(10, "ibd_m_start: ibt_enable_cq_notify(rcq) " + DPRINT(10, "ibd_start: ibt_enable_cq_notify(rcq) " "failed, ret=%d", ret); err = EINVAL; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_RCQ_NOTIFY_ENABLED; @@ -4656,14 +4728,14 @@ return (DDI_SUCCESS); -m_start_fail: - /* - * If we ran into a problem during ibd_m_start() and ran into +start_fail: + /* + * If we ran into a problem during ibd_start() and ran into * some other problem during undoing our partial work, we can't * do anything about it. Ignore any errors we might get from - * ibd_undo_m_start() and just return the original error we got. - */ - (void) ibd_undo_m_start(state); + * ibd_undo_start() and just return the original error we got. + */ + (void) ibd_undo_start(state, LINK_STATE_DOWN); return (err); } @@ -4674,15 +4746,13 @@ static void ibd_m_stop(void *arg) { - ibd_state_t *state = arg; - - /* - * Since ibd_m_stop() doesn't expect any return, we cannot - * fail even if we run into some problem with ibd_undo_m_start(). - * The best we can do is to leave it in a good state, so - * perhaps a future unplumb will succeed. - */ - (void) ibd_undo_m_start(state); + ibd_state_t *state = (ibd_state_t *)arg; + + ibd_set_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS); + + (void) ibd_undo_start(state, state->id_link_state); + + ibd_clr_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS); } /*
--- a/usr/src/uts/common/sys/ib/clients/ibd/ibd.h Wed Aug 12 22:17:19 2009 -0700 +++ b/usr/src/uts/common/sys/ib/clients/ibd/ibd.h Thu Aug 13 11:52:11 2009 -0400 @@ -297,7 +297,6 @@ uint16_t id_pkix; uint8_t id_port; ibt_mcg_info_t *id_mcinfo; - boolean_t id_bgroup_created; mac_handle_t id_mh; mac_resource_handle_t id_rh; @@ -363,6 +362,9 @@ int id_hca_res_lkey_capab; ibt_lkey_t id_res_lkey; + boolean_t id_bgroup_created; + kmutex_t id_macst_lock; + kcondvar_t id_macst_cv; uint32_t id_mac_state; } ibd_state_t;