Mercurial > illumos > illumos-gate
changeset 4154:bd1265f2f9de
6546475 rds_if_lookup_by_name fails for clrprivnet interface
6546482 ioctl call fails with EINTR
6546498 Panic at rds_handle_portup_event+0x98 due to hcap being NULL
6546511 crash dump showed multiple instances of rdsib driver loaded
6546528 RDS fails to failover sessions across HCAs (card failover)
6546543 Multiple path up/down calls corrupt rds_path_map causing system to panic
author | agiri |
---|---|
date | Thu, 03 May 2007 08:24:50 -0700 |
parents | 20265a755883 |
children | 3e38fab0382c |
files | usr/src/uts/common/io/ib/clients/rds/rds_ioctl.c usr/src/uts/common/io/ib/clients/rds/rdsib.c usr/src/uts/common/io/ib/clients/rds/rdsib_buf.c usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c usr/src/uts/common/io/ib/clients/rds/rdsib_sc.c usr/src/uts/common/sys/ib/clients/rds/rdsib_buf.h usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h |
diffstat | 9 files changed, 427 insertions(+), 108 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/io/ib/clients/rds/rds_ioctl.c Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/io/ib/clients/rds/rds_ioctl.c Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -60,6 +60,7 @@ vnode_t *kvp, *vp; TIUSER *tiptr; struct strioctl iocb; + k_sigset_t smask; int err = 0; if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, @@ -79,7 +80,9 @@ iocb.ic_timout = 0; iocb.ic_len = len; iocb.ic_dp = arg; + sigintr(&smask, 0); err = kstr_ioctl(vp, I_STR, (intptr_t)&iocb); + sigunintr(&smask); (void) t_kclose(tiptr, 0); VN_RELE(kvp); return (err); @@ -91,6 +94,7 @@ dl_info_req_t *info_req; union DL_primitives *dl_prim; mblk_t *mp; + k_sigset_t smask; int error; if ((mp = allocb(sizeof (dl_info_req_t), BPRI_MED)) == NULL) { @@ -103,12 +107,16 @@ mp->b_wptr += sizeof (dl_info_req_t); info_req->dl_primitive = DL_INFO_REQ; + sigintr(&smask, 0); if ((error = ldi_putmsg(lh, mp)) != 0) { + sigunintr(&smask); return (error); } if ((error = ldi_getmsg(lh, &mp, (timestruc_t *)NULL)) != 0) { + sigunintr(&smask); return (error); } + sigunintr(&smask); dl_prim = (union DL_primitives *)(uintptr_t)mp->b_rptr; switch (dl_prim->dl_primitive) { @@ -131,7 +139,12 @@ } -static boolean_t +/* + * Return 0 if the interface is IB. + * Return error (>0) if any error is encountered during processing. + * Return -1 if the interface is not IB and no error. + */ +static int rds_is_ib_interface(char *name) { @@ -156,33 +169,35 @@ /* * null name. */ - return (B_FALSE); + return (-1); } if (strncmp("lo", name, i) == 0) { /* * loopback interface is considered RDS capable */ - return (B_TRUE); + return (0); } (void) strncat((dev_path + sizeof ("/dev/") -1), name, i); ret = ldi_open_by_name(dev_path, FREAD|FWRITE, kcred, &lh, rds_li); if (ret != 0) { - return (B_FALSE); + return (ret); } ret = rds_dl_info(lh, &info); - (void) ldi_close(lh, FREAD|FWRITE, kcred); - - if (ret != 0 || (info.dl_mac_type != DL_IB && - !rds_transport_ops->rds_transport_if_lookup_by_name(name))) { - return (B_FALSE); + if (ret != 0) { + return (ret); } - return (B_TRUE); + if (info.dl_mac_type != DL_IB && + !rds_transport_ops->rds_transport_if_lookup_by_name(name)) { + return (-1); + } + + return (0); } void @@ -226,8 +241,14 @@ ifr = kifc.ifc_req; n = num_ifs; for (num_ifs = 0; n > 0; ifr++) { - if (rds_is_ib_interface(ifr->ifr_name)) { + err = rds_is_ib_interface(ifr->ifr_name); + if (err == 0) { num_ifs++; + } else if (err > 0) { + num_ifs = 0; + break; + } else { + err = 0; } n--; } @@ -277,17 +298,21 @@ for (; num_ifs > 0 && (int)((uintptr_t)mp1->b_wptr - (uintptr_t)mp1->b_rptr) < ubuf_size; num_ifs--, ifr++) { - if (rds_is_ib_interface(ifr->ifr_name)) { + err = rds_is_ib_interface(ifr->ifr_name); + if (err == 0) { ifr->ifr_addr.sa_family = AF_INET_OFFLOAD; bcopy((caddr_t)ifr, ptr, sizeof (struct ifreq)); ptr++; mp1->b_wptr = (uchar_t *)ptr; + } else if (err > 0) { + break; + } else { + err = 0; } } STRUCT_FSET(ifc, ifc_len, (int)((uintptr_t)mp1->b_wptr - (uintptr_t)mp1->b_rptr)); - kmem_free(kifc.ifc_buf, kifc.ifc_len); } break; @@ -431,7 +456,7 @@ sin = (struct sockaddr_in *)(uintptr_t)&ifr->ifr_addr; if ((sin->sin_addr.s_addr == addr) && - rds_is_ib_interface(ifr->ifr_name)) { + (rds_is_ib_interface(ifr->ifr_name) == 0)) { ret = B_TRUE; break; }
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib.c Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib.c Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -270,6 +270,7 @@ TASKQ_DEFAULTPRI, 0); if (rds_taskq == NULL) { RDS_DPRINTF1(LABEL, "ddi_taskq_create failed for rds_taskq"); + rdsib_dev_info = NULL; return (DDI_FAILURE); } @@ -278,6 +279,7 @@ cmn_err(CE_CONT, "ddi_create_minor_node failed: %d", ret); ddi_taskq_destroy(rds_taskq); rds_taskq = NULL; + rdsib_dev_info = NULL; return (DDI_FAILURE); } @@ -313,6 +315,8 @@ rds_taskq = NULL; } + rdsib_dev_info = NULL; + RDS_DPRINTF4("rdsib_detach", "return"); return (DDI_SUCCESS);
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_buf.c Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_buf.c Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -523,6 +523,107 @@ return (0); } +int +rds_reinit_send_pool(rds_ep_t *ep, ib_guid_t hca_guid) +{ + rds_buf_t *bp; + rds_hca_t *hcap; + ibt_mr_attr_t mem_attr; + ibt_mr_desc_t mem_desc; + rds_bufpool_t *spool; + int ret; + + RDS_DPRINTF2("rds_reinit_send_pool", "Enter: EP(%p)", ep); + + spool = &ep->ep_sndpool; + ASSERT(spool->pool_memp != NULL); + + /* deregister the send pool memory from the previous HCA */ + hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); + if (hcap == NULL) { + RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found", + ep->ep_hca_guid); + } else { + if (ep->ep_snd_mrhdl != NULL) { + (void) ibt_deregister_mr(hcap->hca_hdl, + ep->ep_snd_mrhdl); + ep->ep_snd_mrhdl = NULL; + ep->ep_snd_lkey = 0; + } + + if ((ep->ep_type == RDS_EP_TYPE_DATA) && + (ep->ep_ackhdl != NULL)) { + (void) ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl); + ep->ep_ackhdl = NULL; + ep->ep_ack_rkey = 0; + } + + ep->ep_hca_guid = NULL; + } + + /* get the hcap for the new HCA */ + hcap = rds_get_hcap(rdsib_statep, hca_guid); + if (hcap == NULL) { + RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found", + hca_guid); + return (-1); + } + + /* register the send memory */ + mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)spool->pool_memp; + mem_attr.mr_len = spool->pool_memsize; + mem_attr.mr_as = NULL; + mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; + + ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, + &mem_attr, &ep->ep_snd_mrhdl, &mem_desc); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rds_reinit_send_pool", + "EP(%p): ibt_register_mr failed: %d", ep, ret); + return (-1); + } + ep->ep_snd_lkey = mem_desc.md_lkey; + + /* register the acknowledgement space */ + if (ep->ep_type == RDS_EP_TYPE_DATA) { + mem_attr.mr_vaddr = (ib_vaddr_t)ep->ep_ack_addr; + mem_attr.mr_len = sizeof (uintptr_t); + mem_attr.mr_as = NULL; + mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | + IBT_MR_ENABLE_REMOTE_WRITE; + + ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, + &mem_attr, &ep->ep_ackhdl, &mem_desc); + if (ret != IBT_SUCCESS) { + RDS_DPRINTF2("rds_reinit_send_pool", + "EP(%p): ibt_register_mr for ack failed: %d", + ep, ret); + (void) ibt_deregister_mr(hcap->hca_hdl, + ep->ep_snd_mrhdl); + ep->ep_snd_mrhdl = NULL; + ep->ep_snd_lkey = 0; + return (-1); + } + ep->ep_ack_rkey = mem_desc.md_rkey; + + /* update the LKEY in the acknowledgement WR */ + ep->ep_ackds.ds_key = ep->ep_snd_lkey; + } + + /* update the LKEY in each buffer */ + bp = spool->pool_headp; + while (bp) { + bp->buf_ds.ds_key = ep->ep_snd_lkey; + bp = bp->buf_nextp; + } + + ep->ep_hca_guid = hca_guid; + + RDS_DPRINTF2("rds_reinit_send_pool", "Return: EP(%p)", ep); + + return (0); +} + void rds_free_recv_pool(rds_ep_t *ep) {
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -103,7 +103,6 @@ rds_session_t *sp; rds_ep_t *ep; ibt_channel_hdl_t chanhdl; - rds_hca_t *hcap; int ret; RDS_DPRINTF2("rds_handle_cm_req", "Enter"); @@ -152,6 +151,16 @@ return (IBT_CM_REJECT); } + /* + * RDS needs more time to process a failover REQ so send an MRA. + * Otherwise, the remote may retry the REQ and fail the connection. + */ + if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) { + RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA"); + (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, + 10000000 /* 10 sec */, NULL, 0); + } + /* Is there a session to the destination node? */ rw_enter(&statep->rds_sessionlock, RW_READER); sp = rds_session_lkup(statep, cmp.cmp_localip, rgid.gid_guid); @@ -199,21 +208,6 @@ sp->session_myip = cmp.cmp_remip; sp->session_lgid = lgid; sp->session_rgid = rgid; - hcap = rds_gid_to_hcap(statep, lgid); - - /* change the data channel */ - mutex_enter(&sp->session_dataep.ep_lock); - sp->session_dataep.ep_myip = cmp.cmp_remip; - sp->session_dataep.ep_hca_guid = - hcap->hca_guid; - mutex_exit(&sp->session_dataep.ep_lock); - - /* change the control channel */ - mutex_enter(&sp->session_ctrlep.ep_lock); - sp->session_ctrlep.ep_myip = cmp.cmp_remip; - sp->session_ctrlep.ep_hca_guid = - hcap->hca_guid; - mutex_exit(&sp->session_ctrlep.ep_lock); } } } @@ -237,23 +231,22 @@ /* move the session to init state */ rw_enter(&sp->session_lock, RW_WRITER); - sp->session_state = RDS_SESSION_STATE_INIT; + ret = rds_session_reinit(sp, lgid); sp->session_myip = cmp.cmp_remip; sp->session_lgid = lgid; sp->session_rgid = rgid; - hcap = rds_gid_to_hcap(statep, lgid); - - /* change the data channel */ - mutex_enter(&sp->session_dataep.ep_lock); - sp->session_dataep.ep_myip = cmp.cmp_remip; - sp->session_dataep.ep_hca_guid = hcap->hca_guid; - mutex_exit(&sp->session_dataep.ep_lock); - - /* change the control channel */ - mutex_enter(&sp->session_ctrlep.ep_lock); - sp->session_ctrlep.ep_myip = cmp.cmp_remip; - sp->session_ctrlep.ep_hca_guid = hcap->hca_guid; - mutex_exit(&sp->session_ctrlep.ep_lock); + if (ret != 0) { + rds_session_fini(sp); + sp->session_state = RDS_SESSION_STATE_FAILED; + RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " + "RDS_SESSION_STATE_FAILED", sp); + rw_exit(&sp->session_lock); + return (IBT_CM_REJECT); + } else { + sp->session_state = RDS_SESSION_STATE_INIT; + RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " + "RDS_SESSION_STATE_INIT", sp); + } if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) { ep = &sp->session_ctrlep; @@ -333,15 +326,6 @@ */ ASSERT(sp->session_type == RDS_SESSION_ACTIVE); ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; - ep->ep_myip = cmp.cmp_remip; - hcap = rds_gid_to_hcap(statep, lgid); - ep->ep_hca_guid = hcap->hca_guid; - - /* change the control channel too */ - mutex_enter(&sp->session_ctrlep.ep_lock); - sp->session_ctrlep.ep_myip = cmp.cmp_remip; - sp->session_ctrlep.ep_hca_guid = hcap->hca_guid; - mutex_exit(&sp->session_dataep.ep_lock); rw_enter(&sp->session_lock, RW_WRITER); sp->session_type = RDS_SESSION_PASSIVE; @@ -565,6 +549,15 @@ sp->session_state = RDS_SESSION_STATE_ERROR; RDS_DPRINTF3("rds_handle_cm_event_failure", "SP(%p) State RDS_SESSION_STATE_ERROR", sp); + + /* + * Store the cm_channel for freeing later + * Active side frees it on ibt_open_rc_channel + * failure + */ + if (ep->ep_chanhdl == NULL) { + ep->ep_chanhdl = evp->cm_channel; + } rw_exit(&sp->session_lock); /* @@ -788,6 +781,7 @@ ep->ep_recvcq = NULL; (void) ibt_free_cq(ep->ep_sendcq); ep->ep_sendcq = NULL; + return (-1); } *chanhdl = hdl; @@ -795,7 +789,7 @@ RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep, *chanhdl); - return (ret); + return (0); } int
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_ep.c Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -304,6 +304,31 @@ return (0); } +static int +rds_ep_reinit(rds_ep_t *ep, ib_guid_t hca_guid) +{ + int ret; + + RDS_DPRINTF3("rds_ep_reinit", "Enter: EP(%p) Type: %d", + ep, ep->ep_type); + + /* Re-initialize send pool */ + ret = rds_reinit_send_pool(ep, hca_guid); + if (ret != 0) { + RDS_DPRINTF2("rds_ep_reinit", + "EP(%p): rds_reinit_send_pool failed: %d", ep, ret); + return (-1); + } + + /* free all the receive buffers in the pool */ + rds_free_recv_pool(ep); + + RDS_DPRINTF3("rds_ep_reinit", "Return: EP(%p) Type: %d", + ep, ep->ep_type); + + return (0); +} + void rds_session_fini(rds_session_t *sp) { @@ -354,6 +379,74 @@ return (0); } +/* + * This should be called before moving a session from ERROR state to + * INIT state. This will update the HCA keys incase the session has moved from + * one HCA to another. + */ +int +rds_session_reinit(rds_session_t *sp, ib_gid_t lgid) +{ + rds_hca_t *hcap, *hcap1; + int ret; + + RDS_DPRINTF2("rds_session_reinit", "Enter: SP(0x%p)", sp); + + /* CALLED WITH SESSION WRITE LOCK */ + + hcap = rds_gid_to_hcap(rdsib_statep, lgid); + if (hcap == NULL) { + RDS_DPRINTF1("rds_session_reinit", "SGID is on an " + "uninitialized HCA: %llx", lgid.gid_guid); + return (-1); + } + + hcap1 = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); + if (hcap1 == NULL) { + RDS_DPRINTF1("rds_session_reinit", "Seems like HCA %llx " + "is unplugged", sp->session_lgid.gid_guid); + } else if (hcap->hca_guid == hcap1->hca_guid) { + /* + * No action is needed as the session did not move across + * HCAs + */ + RDS_DPRINTF2("rds_session_reinit", "Failover on the same HCA"); + return (0); + } + + RDS_DPRINTF2("rds_session_reinit", "Failover across HCAs"); + + /* re-initialize the control channel */ + ret = rds_ep_reinit(&sp->session_ctrlep, hcap->hca_guid); + if (ret != 0) { + RDS_DPRINTF2("rds_session_reinit", + "SP(%p): Ctrl EP(%p) re-initialization failed", + sp, &sp->session_ctrlep); + return (-1); + } + + RDS_DPRINTF2("rds_session_reinit", "SP(%p) Control EP(%p)", + sp, &sp->session_ctrlep); + + /* re-initialize the data channel */ + ret = rds_ep_reinit(&sp->session_dataep, hcap->hca_guid); + if (ret != 0) { + RDS_DPRINTF2("rds_session_reinit", + "SP(%p): Data EP(%p) re-initialization failed", + sp, &sp->session_dataep); + return (-1); + } + + RDS_DPRINTF2("rds_session_reinit", "SP(%p) Data EP(%p)", + sp, &sp->session_dataep); + + sp->session_lgid = lgid; + + RDS_DPRINTF2("rds_session_reinit", "Return: SP(0x%p)", sp); + + return (0); +} + static int rds_session_connect(rds_session_t *sp) { @@ -409,7 +502,7 @@ ret = rds_open_rc_channel(ep, &pinfo, IBT_BLOCKING, &datachan); if (ret != IBT_SUCCESS) { RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " - "failed: %d", ret); + "failed: %d", ep, ret); return (-1); } sp->session_dataep.ep_chanhdl = datachan; @@ -442,6 +535,9 @@ return (-1); } + RDS_DPRINTF2(LABEL, "Session (%p) 0x%x <--> 0x%x is CONNECTED", + sp, sp->session_myip, sp->session_remip); + RDS_DPRINTF2("rds_session_connect", "Return SP(%p)", sp); return (0); @@ -637,6 +733,8 @@ if (sp->session_type == RDS_SESSION_ACTIVE) { rds_session_fini(sp); sp->session_state = RDS_SESSION_STATE_FAILED; + RDS_DPRINTF3("rds_failover_session", + "SP(%p) State RDS_SESSION_STATE_FAILED", sp); } else { RDS_DPRINTF2("rds_failover_session", "SP(%p) has become passive", sp); @@ -662,9 +760,21 @@ } /* move the session to init state */ - sp->session_state = RDS_SESSION_STATE_INIT; + ret = rds_session_reinit(sp, lgid); sp->session_lgid = lgid; sp->session_rgid = rgid; + if (ret != 0) { + rds_session_fini(sp); + sp->session_state = RDS_SESSION_STATE_FAILED; + RDS_DPRINTF3("rds_failover_session", + "SP(%p) State RDS_SESSION_STATE_FAILED", sp); + rw_exit(&sp->session_lock); + return; + } else { + sp->session_state = RDS_SESSION_STATE_INIT; + RDS_DPRINTF3("rds_failover_session", + "SP(%p) State RDS_SESSION_STATE_INIT", sp); + } rw_exit(&sp->session_lock); rds_session_open(sp); @@ -887,9 +997,6 @@ return; } - RDS_DPRINTF2(LABEL, "Session (%p) 0x%x <--> 0x%x is CONNECTED", - sp, sp->session_myip, sp->session_remip); - RDS_DPRINTF2("rds_session_open", "Return: SP(%p)", sp); }
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -184,7 +184,7 @@ uint_t ix, hcaix, nhcas; int ret; - RDS_DPRINTF4("rdsib_open_ib", "enter"); + RDS_DPRINTF4("rdsib_open_ib", "enter: statep %p", rdsib_statep); ASSERT(rdsib_statep != NULL); if (rdsib_statep == NULL) { @@ -309,7 +309,7 @@ } } - RDS_DPRINTF4("rdsib_open_ib", "return"); + RDS_DPRINTF4("rdsib_open_ib", "return: statep %p", rdsib_statep); return (0); } @@ -320,10 +320,10 @@ void rdsib_close_ib() { - rds_hca_t *hcap; + rds_hca_t *hcap, *nextp; int ret; - RDS_DPRINTF4("rds_close_ib", "enter"); + RDS_DPRINTF2("rds_close_ib", "enter: statep %p", rdsib_statep); if (rdsib_statep->rds_srvhdl != NULL) { (void) ibt_unbind_all_services(rdsib_statep->rds_srvhdl); @@ -334,10 +334,15 @@ /* close and destroy all the sessions */ rds_close_sessions(NULL); - /* Release all IB resources */ + /* Release all HCA resources */ + rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); hcap = rdsib_statep->rds_hcalistp; + rdsib_statep->rds_hcalistp = NULL; + rdsib_statep->rds_nhcas = 0; + rw_exit(&rdsib_statep->rds_hca_lock); + while (hcap != NULL) { - rdsib_statep->rds_hcalistp = hcap->hca_nextp; + nextp = hcap->hca_nextp; ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); ASSERT(ret == IBT_SUCCESS); @@ -348,7 +353,7 @@ ASSERT(ret == IBT_SUCCESS); kmem_free(hcap, sizeof (rds_hca_t)); - hcap = rdsib_statep->rds_hcalistp; + hcap = nextp; } /* Deregister with IBTF */ @@ -357,7 +362,7 @@ rdsib_statep->rds_ibhdl = NULL; } - RDS_DPRINTF4("rds_close_ib", "return"); + RDS_DPRINTF2("rds_close_ib", "return: statep %p", rdsib_statep); } /* Return hcap, given the hca guid */ @@ -387,20 +392,33 @@ rds_hca_t * rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) { - ibt_node_info_t nodeinfo; - int ret; + rds_hca_t *hcap; + uint_t ix; RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx", statep, gid.gid_prefix, gid.gid_guid); - ret = ibt_gid_to_node_info(gid, &nodeinfo); - if (ret != IBT_SUCCESS) { - RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx " - "failed", gid.gid_prefix, gid.gid_guid); - return (NULL); + rw_enter(&statep->rds_hca_lock, RW_READER); + + hcap = statep->rds_hcalistp; + while (hcap != NULL) { + for (ix = 0; ix < hcap->hca_nports; ix++) { + if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == + gid.gid_prefix) && + (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid == + gid.gid_guid)) { + RDS_DPRINTF4("rds_gid_to_hcap", + "gid found in hcap: 0x%p", hcap); + rw_exit(&statep->rds_hca_lock); + return (hcap); + } + } + hcap = hcap->hca_nextp; } - return (rds_get_hcap(statep, nodeinfo.n_node_guid)); + rw_exit(&statep->rds_hca_lock); + + return (NULL); } /* This is called from the send CQ handler */ @@ -1053,18 +1071,23 @@ ibt_cq_attr_t scqattr, rcqattr; ibt_rc_chan_alloc_args_t chanargs; ibt_channel_hdl_t chanhdl; + rds_session_t *sp; rds_hca_t *hcap; RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d", ep, hca_port); - /* get the hcap for the HCA hosting this channel */ - hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); - if (hcap == NULL) { - RDS_DPRINTF2("rds_ep_alloc_rc_channel", - "HCA (0x%llx) not found", ep->ep_hca_guid); - return (NULL); - } + /* Update the EP with the right IP address and HCA guid */ + sp = ep->ep_sp; + ASSERT(sp != NULL); + rw_enter(&sp->session_lock, RW_READER); + mutex_enter(&ep->ep_lock); + ep->ep_myip = sp->session_myip; + ep->ep_remip = sp->session_remip; + hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); + ep->ep_hca_guid = hcap->hca_guid; + mutex_exit(&ep->ep_lock); + rw_exit(&sp->session_lock); /* reset taskqpending flag here */ ep->ep_recvqp.qp_taskqpending = B_FALSE; @@ -1217,11 +1240,15 @@ ib_gid_t gid; int ret; - RDS_DPRINTF2("rds_handle_portup_event", "Enter: GUID: 0x%llx", - event->ev_hca_guid); + RDS_DPRINTF2("rds_handle_portup_event", + "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); hcap = rds_get_hcap(statep, event->ev_hca_guid); - ASSERT(hcap != NULL); + if (hcap == NULL) { + RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " + "not in our list", event->ev_hca_guid); + return; + } ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); if (ret != IBT_SUCCESS) {
--- a/usr/src/uts/common/io/ib/clients/rds/rdsib_sc.c Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/io/ib/clients/rds/rdsib_sc.c Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -64,8 +64,8 @@ ipaddr_t ribd_ip; struct rds_path_record_s *up; struct rds_path_record_s *downp; - char lifname[LIFNAMSIZ]; - char rifname[LIFNAMSIZ]; + char lifname[MAXNAMELEN]; + char rifname[MAXNAMELEN]; } rds_path_record_t; typedef struct rds_node_record_s { @@ -79,6 +79,43 @@ kmutex_t rds_pathmap_lock; rds_node_record_t *rds_pathmap = NULL; +static boolean_t +rds_validate_interface(rds_path_t *path) +{ + char devname[MAXNAMELEN]; + uint_t instance; + + /* separate devname and instance number */ + if (ddi_parse(path->local.ifname, devname, &instance) != DDI_SUCCESS) { + RDS_DPRINTF2("rds_validate_interface", + "local: %s is not right", path->local.ifname); + return (B_FALSE); + } + + /* don't care if it is not IPoIB interface */ + if (strcmp(devname, "ibd") != 0) { + RDS_DPRINTF2("rds_validate_interface", + "local: %s is not IB interface", devname); + return (B_FALSE); + } + + /* separate devname and instance number */ + if (ddi_parse(path->remote.ifname, devname, &instance) != DDI_SUCCESS) { + RDS_DPRINTF2("rds_validate_interface", + "remote: %s is not right", path->remote.ifname); + return (B_FALSE); + } + + /* don't care if it is not IPoIB interface */ + if (strcmp(devname, "ibd") != 0) { + RDS_DPRINTF2("rds_validate_interface", + "remote: %s is not IB interface", devname); + return (B_FALSE); + } + + return (B_TRUE); +} + /* * Called by SC on discovering a new path */ @@ -91,11 +128,8 @@ ASSERT(path != NULL); /* don't care if it is not IPoIB interface */ - if ((bcmp(path->local.ifname, "ibd", 3) != 0) || - (bcmp(path->remote.ifname, "ibd", 3) != 0)) { - RDS_DPRINTF3("rds_path_up", - "(%s | %s) Not IPoIB interface, ignore", - path->local.ifname, path->remote.ifname); + if (rds_validate_interface(path) == B_FALSE) { + RDS_DPRINTF2("rds_path_up", "NOT IB interface"); return; } @@ -164,11 +198,8 @@ ASSERT(path != NULL); /* don't care if it is not IPoIB interface */ - if ((bcmp(path->local.ifname, "ibd", 3) != 0) || - (bcmp(path->remote.ifname, "ibd", 3) != 0)) { - RDS_DPRINTF3("rds_path_down", - "(%s | %s) Not IPoIB interface, ignore", - path->local.ifname, path->remote.ifname); + if (rds_validate_interface(path) == B_FALSE) { + RDS_DPRINTF2("rds_path_down", "NOT IB interface"); return; } @@ -227,7 +258,7 @@ } else { /* this is the first node record */ ASSERT(p == rds_pathmap); - rds_pathmap = p; + rds_pathmap = p->nextp; } if (p->nextp) { @@ -276,10 +307,37 @@ { rds_node_record_t *p; rds_path_record_t *p1; + char devname[MAXNAMELEN]; + uint_t instance; + + if (ddi_parse(if_name, devname, &instance) != DDI_SUCCESS) { + RDS_DPRINTF2("rds_if_lookup_by_name", + "if_name: %s is not right", if_name); + return (B_FALSE); + } mutex_enter(&rds_pathmap_lock); + if (rds_pathmap == NULL) { + /* SC is not configured */ + RDS_DPRINTF2("rds_if_lookup_by_name", "Pathmap is NULL"); + mutex_exit(&rds_pathmap_lock); + return (B_FALSE); + } + + /* + * Sun Cluster always names its interconnect virtual network interface + * as clprivnetx, so return TRUE if there is atleast one node record + * and the interface name is clprivnet something. + */ + if (strcmp(devname, "clprivnet") == 0) { + /* clprivnet address */ + mutex_exit(&rds_pathmap_lock); + return (B_TRUE); + } + p = rds_pathmap; + while (p != NULL) { p1 = p->downp; while ((p1 != NULL) && strcmp(if_name, p1->lifname)) {
--- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_buf.h Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/sys/ib/clients/rds/rdsib_buf.h Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -156,6 +156,7 @@ int rds_init_recv_caches(rds_state_t *statep); void rds_free_recv_caches(rds_state_t *statep); int rds_init_send_pool(struct rds_ep_s *ep); +int rds_reinit_send_pool(struct rds_ep_s *ep, ib_guid_t hca_guid); void rds_free_send_pool(struct rds_ep_s *ep); int rds_init_recv_pool(struct rds_ep_s *ep); void rds_free_recv_pool(struct rds_ep_s *ep);
--- a/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h Thu May 03 03:28:00 2007 -0700 +++ b/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h Thu May 03 08:24:50 2007 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -149,7 +149,7 @@ * (5) Failure in rds_session_init() * (6) rds_sendmsg(3SOCKET)/Incoming CM REQ * (7) Failure in rds_session_open() - * (8) rds_session_close() and rds_get_ibaddr() + * (8) rds_session_close(), rds_get_ibaddr() and rds_session_reinit() * (9) rds_session_close() and rds_session_fini() * (9) rds_cleanup_passive_session() and rds_passive_session_fini() * (10) Connection Error/Incoming REQ @@ -309,6 +309,7 @@ rds_session_t *rds_session_create(rds_state_t *statep, ipaddr_t destip, ipaddr_t srcip, ibt_cm_req_rcv_t *reqp, uint8_t type); int rds_session_init(rds_session_t *sp); +int rds_session_reinit(rds_session_t *sp, ib_gid_t lgid); void rds_session_open(rds_session_t *sp); void rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode, uint_t wait); @@ -320,6 +321,7 @@ void rds_received_msg(rds_ep_t *ep, rds_buf_t *bp); void rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cp); void rds_handle_send_error(rds_ep_t *ep); +void rds_session_fini(rds_session_t *sp); void rds_passive_session_fini(rds_session_t *sp); void rds_cleanup_passive_session(void *arg);