Mercurial > illumos > illumos-gate
changeset 3340:04f89288c276
6491652 MBLK_GETLABEL() doesn't belong in <sys/strsun.h>
6495541 stale IRE/ARP cache entries can remain after DL_NOTE_PHYS_ADDR
6499894 IPSQ framework should allow quiescing via ip_rput()
6499904 ip_rput_dlpi_writer() may panic under low memory
6503948 ill_dl_up() harboring bogus code
author | meem |
---|---|
date | Wed, 27 Dec 2006 21:32:46 -0800 |
parents | 77189550ce71 |
children | 52e5b1750941 |
files | usr/src/uts/common/inet/ip.h usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip6_if.c usr/src/uts/common/inet/ip/ip_if.c usr/src/uts/common/inet/ip/ip_ndp.c usr/src/uts/common/inet/ip_if.h usr/src/uts/common/sys/stream.h usr/src/uts/common/sys/strsun.h |
diffstat | 8 files changed, 503 insertions(+), 513 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/inet/ip.h Wed Dec 27 05:35:34 2006 -0800 +++ b/usr/src/uts/common/inet/ip.h Wed Dec 27 21:32:46 2006 -0800 @@ -1326,13 +1326,13 @@ * Table of ipif_t members and their protection * * ipif_next ill_g_lock ill_g_lock - * ipif_ill ipsq + down ipif write once - * ipif_id ipsq + down ipif write once + * ipif_ill ipsq + down ipif write once + * ipif_id ipsq + down ipif write once * ipif_mtu ipsq - * ipif_v6lcl_addr ipsq + down ipif up ipif - * ipif_v6src_addr ipsq + down ipif up ipif - * ipif_v6subnet ipsq + down ipif up ipif - * ipif_v6net_mask ipsq + down ipif up ipif + * ipif_v6lcl_addr ipsq + down ipif up ipif + * ipif_v6src_addr ipsq + down ipif up ipif + * ipif_v6subnet ipsq + down ipif up ipif + * ipif_v6net_mask ipsq + down ipif up ipif * * ipif_v6brd_addr * ipif_v6pp_dst_addr @@ -1404,6 +1404,7 @@ #define CONN_CLOSE 1 /* No mi_copy */ #define COPYOUT 2 /* do an mi_copyout if needed */ #define NO_COPYOUT 3 /* do an mi_copy_done */ +#define IPI2MODE(ipi) ((ipi)->ipi_flags & IPI_GET_CMD ? COPYOUT : NO_COPYOUT) /* * The IP-MT design revolves around the serialization object ipsq_t. @@ -1428,14 +1429,15 @@ typedef struct ipsq_s { kmutex_t ipsq_lock; int ipsq_reentry_cnt; - kthread_t *ipsq_writer; /* current owner (thread id) */ + kthread_t *ipsq_writer; /* current owner (thread id) */ int ipsq_flags; mblk_t *ipsq_xopq_mphead; /* list of excl ops mostly ioctls */ mblk_t *ipsq_xopq_mptail; mblk_t *ipsq_mphead; /* msgs on ipsq linked thru b_next */ mblk_t *ipsq_mptail; /* msgs on ipsq linked thru b_next */ + int ipsq_current_ioctl; /* current ioctl, or 0 if no ioctl */ + ipif_t *ipsq_current_ipif; /* ipif associated with current op */ ipif_t *ipsq_pending_ipif; /* ipif associated w. ipsq_pending_mp */ - ipif_t *ipsq_current_ipif; /* ipif associated with current ioctl */ mblk_t *ipsq_pending_mp; /* current ioctl mp while waiting for */ /* response from another module */ struct ipsq_s *ipsq_next; /* list of all syncq's (ipsq_g_list) */ @@ -1444,7 +1446,7 @@ boolean_t ipsq_split; /* ipsq may need to be split */ int ipsq_waitfor; /* Values encoded below */ char ipsq_name[LIFNAMSIZ+1]; /* same as phyint_groupname */ - int ipsq_last_cmd; /* debugging aid */ + #ifdef ILL_DEBUG int ipsq_depth; /* debugging aid */ pc_t ipsq_stack[IP_STACK_DEPTH]; /* debugging aid */ @@ -1990,11 +1992,11 @@ * ill_ipif_up_count ill_lock + ipsq ill_lock * ill_max_frag ipsq Write once * - * ill_name ill_g_lock + ipsq Write once - * ill_name_length ill_g_lock + ipsq Write once + * ill_name ill_g_lock + ipsq Write once + * ill_name_length ill_g_lock + ipsq Write once * ill_ndd_name ipsq Write once * ill_net_type ipsq Write once - * ill_ppa ill_g_lock + ipsq Write once + * ill_ppa ill_g_lock + ipsq Write once * ill_sap ipsq + down ill Write once * ill_sap_length ipsq + down ill Write once * ill_phys_addr_length ipsq + down ill Write once @@ -2020,9 +2022,8 @@ * ill_down_mp ipsq ipsq * ill_dlpi_deferred ipsq ipsq * ill_dlpi_pending ipsq and ill_lock ipsq or ill_lock - * ill_phys_addr_mp ipsq ipsq - * ill_phys_addr ipsq up ill - * ill_ick ipsq + down ill only when ill is up + * ill_phys_addr_mp ipsq + down ill only when ill is up + * ill_phys_addr ipsq + down ill only when ill is up * * ill_state_flags ill_lock ill_lock * exclusive bit flags ipsq_t ipsq_t @@ -2041,7 +2042,7 @@ * ill_max_mtu * * ill_reachable_time ipsq + ill_lock ill_lock - * ill_reachable_retrans_time ipsq + ill_lock ill_lock + * ill_reachable_retrans_time ipsq + ill_lock ill_lock * ill_max_buf ipsq + ill_lock ill_lock * * Next 2 fields need ill_lock because of the get ioctls. They should not @@ -2063,12 +2064,12 @@ * ill_mrtun_refcnt ill_lock ill_lock * ill_srcif_refcnt ill_lock ill_lock * ill_srcif_table ill_lock ill_lock - * ill_nd_lla_mp ill_lock ill_lock - * ill_nd_lla ill_lock ill_lock - * ill_nd_lla_len ill_lock ill_lock + * ill_nd_lla_mp ipsq + down ill only when ill is up + * ill_nd_lla ipsq + down ill only when ill is up + * ill_nd_lla_len ipsq + down ill only when ill is up * ill_phys_addr_pend ipsq + down ill only when ill is up * ill_ifname_pending_err ipsq ipsq - * ill_avl_byppa ipsq, ill_g_lock Write once + * ill_avl_byppa ipsq, ill_g_lock write once * * ill_fastpath_list ill_lock ill_lock * ill_refcnt ill_lock ill_lock @@ -3289,8 +3290,7 @@ extern void ip_quiesce_conn(conn_t *); extern void ip_reprocess_ioctl(ipsq_t *, queue_t *, mblk_t *, void *); extern void ip_restart_optmgmt(ipsq_t *, queue_t *, mblk_t *, void *); -extern void ip_ioctl_finish(queue_t *, mblk_t *, int, int, ipif_t *, - ipsq_t *); +extern void ip_ioctl_finish(queue_t *, mblk_t *, int, int, ipsq_t *); extern boolean_t ip_cmpbuf(const void *, uint_t, boolean_t, const void *, uint_t);
--- a/usr/src/uts/common/inet/ip/ip.c Wed Dec 27 05:35:34 2006 -0800 +++ b/usr/src/uts/common/inet/ip/ip.c Wed Dec 27 21:32:46 2006 -0800 @@ -15436,9 +15436,8 @@ ipif_t *ipif = NULL; mblk_t *mp1 = NULL; conn_t *connp = NULL; - t_uscalar_t physaddr_req; + t_uscalar_t paddrreq; mblk_t *mp_hw; - union DL_primitives *dlp; boolean_t success; boolean_t ioctl_aborted = B_FALSE; boolean_t log = B_TRUE; @@ -15492,13 +15491,13 @@ * We don't complete the IOCTL until all three DL_PARs * have been attempted, so set *_len to 0 and break. */ - physaddr_req = ill->ill_phys_addr_pend; + paddrreq = ill->ill_phys_addr_pend; ill_dlpi_done(ill, DL_PHYS_ADDR_REQ); - if (physaddr_req == DL_IPV6_TOKEN) { + if (paddrreq == DL_IPV6_TOKEN) { ill->ill_token_length = 0; log = B_FALSE; break; - } else if (physaddr_req == DL_IPV6_LINK_LAYER_ADDR) { + } else if (paddrreq == DL_IPV6_LINK_LAYER_ADDR) { ill->ill_nd_lla_len = 0; log = B_FALSE; break; @@ -15672,6 +15671,10 @@ mp1 = ipsq_pending_mp_get(ipsq, &connp); if (mp1 == NULL) break; + /* + * Because mp1 was added by ill_dl_up(), and it always + * passes a valid connp, connp must be valid here. + */ ASSERT(connp != NULL); q = CONNP_TO_WQ(connp); @@ -15730,8 +15733,7 @@ * in ip_rput(). If there's an error, we * complete it here. */ - err = ipif_ndp_up(ipif, &ipif->ipif_v6lcl_addr, - B_FALSE); + err = ipif_ndp_up(ipif, &ipif->ipif_v6lcl_addr); if (err == 0) { if (ill->ill_flags & ILLF_XRESOLV) { mutex_enter(&connp->conn_lock); @@ -15811,148 +15813,15 @@ boolean_t need_ire_walk_v4 = B_FALSE; boolean_t need_ire_walk_v6 = B_FALSE; - /* - * Change the address everywhere we need to. - * What we're getting here is a link-level addr or phys addr. - * The new addr is at notify + notify->dl_addr_offset - * The address length is notify->dl_addr_length; - */ switch (notify->dl_notification) { case DL_NOTE_PHYS_ADDR: - mp_hw = copyb(mp); - if (mp_hw == NULL) { - err = ENOMEM; - break; - } - dlp = (union DL_primitives *)mp_hw->b_rptr; - /* - * We currently don't support changing - * the token via DL_NOTIFY_IND. - * When we do support it, we have to consider - * what the implications are with respect to - * the token and the link local address. - */ - mutex_enter(&ill->ill_lock); - if (dlp->notify_ind.dl_data == - DL_IPV6_LINK_LAYER_ADDR) { - if (ill->ill_nd_lla_mp != NULL) - freemsg(ill->ill_nd_lla_mp); - ill->ill_nd_lla_mp = mp_hw; - ill->ill_nd_lla = (uchar_t *)mp_hw->b_rptr + - dlp->notify_ind.dl_addr_offset; - ill->ill_nd_lla_len = - dlp->notify_ind.dl_addr_length - - ABS(ill->ill_sap_length); - mutex_exit(&ill->ill_lock); - break; - } else if (dlp->notify_ind.dl_data == - DL_CURR_PHYS_ADDR) { - if (ill->ill_phys_addr_mp != NULL) - freemsg(ill->ill_phys_addr_mp); - ill->ill_phys_addr_mp = mp_hw; - ill->ill_phys_addr = (uchar_t *)mp_hw->b_rptr + - dlp->notify_ind.dl_addr_offset; - ill->ill_phys_addr_length = - dlp->notify_ind.dl_addr_length - - ABS(ill->ill_sap_length); - if (ill->ill_isv6 && - !(ill->ill_flags & ILLF_XRESOLV)) { - if (ill->ill_nd_lla_mp != NULL) - freemsg(ill->ill_nd_lla_mp); - ill->ill_nd_lla_mp = copyb(mp_hw); - ill->ill_nd_lla = (uchar_t *) - ill->ill_nd_lla_mp->b_rptr + - dlp->notify_ind.dl_addr_offset; - ill->ill_nd_lla_len = - ill->ill_phys_addr_length; - } - } - mutex_exit(&ill->ill_lock); - /* - * Send out gratuitous arp request for our new - * hardware address. - */ - for (ipif = ill->ill_ipif; ipif != NULL; - ipif = ipif->ipif_next) { - if (!(ipif->ipif_flags & IPIF_UP)) - continue; - if (ill->ill_isv6) { - ipif_ndp_down(ipif); - /* - * Set B_TRUE to enable - * ipif_ndp_up() to send out - * unsolicited advertisements. - */ - err = ipif_ndp_up(ipif, - &ipif->ipif_v6lcl_addr, - B_TRUE); - if (err) { - ip1dbg(( - "ip_rput_dlpi_writer: " - "Failed to update ndp " - "err %d\n", err)); - } - } else { - /* - * IPv4 ARP case - * - * Set Res_act_move, as we only want - * ipif_resolver_up to send an - * AR_ENTRY_ADD request up to - * ARP. - */ - err = ipif_resolver_up(ipif, - Res_act_move); - if (err) { - ip1dbg(( - "ip_rput_dlpi_writer: " - "Failed to update arp " - "err %d\n", err)); - } - } - } - /* - * Allow "fall through" to the DL_NOTE_FASTPATH_FLUSH - * case so that all old fastpath information can be - * purged from IRE caches. - */ - /* FALLTHRU */ + err = ill_set_phys_addr(ill, mp); + break; + case DL_NOTE_FASTPATH_FLUSH: - /* - * Any fastpath probe sent henceforth will get the - * new fp mp. So we first delete any ires that are - * waiting for the fastpath. Then walk all ires and - * delete the ire or delete the fp mp. In the case of - * IRE_MIPRTUN and IRE_BROADCAST it is difficult to - * recreate the ire's without going through a complex - * ipif up/down dance. So we don't delete the ire - * itself, but just the nce_fp_mp for these 2 ire's - * In the case of the other ire's we delete the ire's - * themselves. Access to nce_fp_mp is completely - * protected by ire_lock for IRE_MIPRTUN and - * IRE_BROADCAST. Deleting the ire is preferable in the - * other cases for performance. - */ - if (ill->ill_isv6) { - nce_fastpath_list_dispatch(ill, NULL, NULL); - ndp_walk(ill, (pfi_t)ndp_fastpath_flush, - NULL); - } else { - ire_fastpath_list_dispatch(ill, NULL, NULL); - ire_walk_ill_v4(MATCH_IRE_WQ | MATCH_IRE_TYPE, - IRE_CACHE | IRE_BROADCAST, - ire_fastpath_flush, NULL, ill); - mutex_enter(&ire_mrtun_lock); - if (ire_mrtun_count != 0) { - mutex_exit(&ire_mrtun_lock); - ire_walk_ill_mrtun(MATCH_IRE_WQ, - IRE_MIPRTUN, ire_fastpath_flush, - NULL, ill); - } else { - mutex_exit(&ire_mrtun_lock); - } - } - break; + ill_fastpath_flush(ill); + break; + case DL_NOTE_SDU_SIZE: /* * Change the MTU size of the interface, of all @@ -16114,64 +15983,42 @@ } case DL_PHYS_ADDR_ACK: { /* - * We should have an IOCTL waiting on this when request - * sent by ill_dl_phys. - * However, ill_dl_phys was called on an ill queue (from - * SIOCSLIFNAME), thus conn_pending_ill is not set. But the - * ioctl is known to be pending on ill_wq. - * There are two additional phys_addr_req's sent to the - * driver to get the token and lla. ill_phys_addr_pend - * keeps track of the last one sent so we know which - * response we are dealing with. ill_dlpi_done will - * update ill_phys_addr_pend when it sends the next req. - * We don't complete the IOCTL until all three DL_PARs - * have been attempted. - * - * We don't need any lock to update ill_nd_lla* fields, - * since the ill is not yet up, We grab the lock just - * for uniformity with other code that accesses ill_nd_lla. - */ - physaddr_req = ill->ill_phys_addr_pend; + * As part of plumbing the interface via SIOCSLIFNAME, + * ill_dl_phys() will queue a series of DL_PHYS_ADDR_REQs, + * whose answers we receive here. As each answer is received, + * we call ill_dlpi_done() to dispatch the next request as + * we're processing the current one. Once all answers have + * been received, we use ipsq_pending_mp_get() to dequeue the + * outstanding IOCTL and reply to it. (Because ill_dl_phys() + * is invoked from an ill queue, conn_oper_pending_ill is not + * available, but we know the ioctl is pending on ill_wq.) + */ + uint_t paddrlen, paddroff; + + paddrreq = ill->ill_phys_addr_pend; + paddrlen = ((dl_phys_addr_ack_t *)mp->b_rptr)->dl_addr_length; + paddroff = ((dl_phys_addr_ack_t *)mp->b_rptr)->dl_addr_offset; + ill_dlpi_done(ill, DL_PHYS_ADDR_REQ); - if (physaddr_req == DL_IPV6_TOKEN || - physaddr_req == DL_IPV6_LINK_LAYER_ADDR) { - if (physaddr_req == DL_IPV6_TOKEN) { - /* - * bcopy to low-order bits of ill_token - * - * XXX Temporary hack - currently, - * all known tokens are 64 bits, - * so I'll cheat for the moment. - */ - dlp = (union DL_primitives *)mp->b_rptr; - - mutex_enter(&ill->ill_lock); - bcopy((uchar_t *)(mp->b_rptr + - dlp->physaddr_ack.dl_addr_offset), - (void *)&ill->ill_token.s6_addr32[2], - dlp->physaddr_ack.dl_addr_length); - ill->ill_token_length = - dlp->physaddr_ack.dl_addr_length; - mutex_exit(&ill->ill_lock); - } else { - ASSERT(ill->ill_nd_lla_mp == NULL); - mp_hw = copyb(mp); - if (mp_hw == NULL) { - err = ENOMEM; - break; - } - dlp = (union DL_primitives *)mp_hw->b_rptr; - mutex_enter(&ill->ill_lock); - ill->ill_nd_lla_mp = mp_hw; - ill->ill_nd_lla = (uchar_t *)mp_hw->b_rptr + - dlp->physaddr_ack.dl_addr_offset; - ill->ill_nd_lla_len = - dlp->physaddr_ack.dl_addr_length; - mutex_exit(&ill->ill_lock); - } - break; - } - ASSERT(physaddr_req == DL_CURR_PHYS_ADDR); + if (paddrreq == DL_IPV6_TOKEN) { + /* + * bcopy to low-order bits of ill_token + * + * XXX Temporary hack - currently, all known tokens + * are 64 bits, so I'll cheat for the moment. + */ + bcopy(mp->b_rptr + paddroff, + &ill->ill_token.s6_addr32[2], paddrlen); + ill->ill_token_length = paddrlen; + break; + } else if (paddrreq == DL_IPV6_LINK_LAYER_ADDR) { + ASSERT(ill->ill_nd_lla_mp == NULL); + ill_set_ndmp(ill, mp, paddroff, paddrlen); + mp = NULL; + break; + } + + ASSERT(paddrreq == DL_CURR_PHYS_ADDR); ASSERT(ill->ill_phys_addr_mp == NULL); if (!ill->ill_ifname_pending) break; @@ -16192,61 +16039,47 @@ ill->ill_ifname_pending_err = 0; break; } - /* - * Get the interface token. If the zeroth interface - * address is zero then set the address to the link local - * address - */ - mp_hw = copyb(mp); - if (mp_hw == NULL) { - err = ENOMEM; - break; - } - dlp = (union DL_primitives *)mp_hw->b_rptr; - ill->ill_phys_addr_mp = mp_hw; - ill->ill_phys_addr = (uchar_t *)mp_hw->b_rptr + - dlp->physaddr_ack.dl_addr_offset; - if (dlp->physaddr_ack.dl_addr_length == 0 || - ill->ill_phys_addr_length == 0 || + + ill->ill_phys_addr_mp = mp; + ill->ill_phys_addr = mp->b_rptr + paddroff; + mp = NULL; + + /* + * If paddrlen is zero, the DLPI provider doesn't support + * physical addresses. The other two tests were historical + * workarounds for bugs in our former PPP implementation, but + * now other things have grown dependencies on them -- e.g., + * the tun module specifies a dl_addr_length of zero in its + * DL_BIND_ACK, but then specifies an incorrect value in its + * DL_PHYS_ADDR_ACK. These bogus checks need to be removed, + * but only after careful testing ensures that all dependent + * broken DLPI providers have been fixed. + */ + if (paddrlen == 0 || ill->ill_phys_addr_length == 0 || ill->ill_phys_addr_length == IP_ADDR_LEN) { - /* - * Compatibility: atun driver returns a length of 0. - * ipdptp has an ill_phys_addr_length of zero(from - * DL_BIND_ACK) but a non-zero length here. - * ipd has an ill_phys_addr_length of 4(from - * DL_BIND_ACK) but a non-zero length here. - */ ill->ill_phys_addr = NULL; - } else if (dlp->physaddr_ack.dl_addr_length != - ill->ill_phys_addr_length) { - ip0dbg(("DL_PHYS_ADDR_ACK: " - "Address length mismatch %d %d\n", - dlp->physaddr_ack.dl_addr_length, - ill->ill_phys_addr_length)); + } else if (paddrlen != ill->ill_phys_addr_length) { + ip0dbg(("DL_PHYS_ADDR_ACK: got addrlen %d, expected %d", + paddrlen, ill->ill_phys_addr_length)); err = EINVAL; break; } - mutex_enter(&ill->ill_lock); + if (ill->ill_nd_lla_mp == NULL) { - ill->ill_nd_lla_mp = copyb(mp_hw); - if (ill->ill_nd_lla_mp == NULL) { + if ((mp_hw = copyb(ill->ill_phys_addr_mp)) == NULL) { err = ENOMEM; - mutex_exit(&ill->ill_lock); - break; - } - ill->ill_nd_lla = - (uchar_t *)ill->ill_nd_lla_mp->b_rptr + - dlp->physaddr_ack.dl_addr_offset; - ill->ill_nd_lla_len = ill->ill_phys_addr_length; - } - mutex_exit(&ill->ill_lock); + break; + } + ill_set_ndmp(ill, mp_hw, paddroff, paddrlen); + } + + /* + * Set the interface token. If the zeroth interface address + * is unspecified, then set it to the link local address. + */ if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token)) (void) ill_setdefaulttoken(ill); - /* - * If the ill zero interface has a zero address assign - * it the proper link local address. - */ ASSERT(ill->ill_ipif->ipif_id == 0); if (ipif != NULL && IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr)) @@ -16270,30 +16103,29 @@ } freemsg(mp); - if (mp1) { - struct iocblk *iocp; - int mode; - - /* - * Complete the waiting IOCTL. For SIOCLIFADDIF or - * SIOCSLIFNAME do a copyout. - */ - iocp = (struct iocblk *)mp1->b_rptr; - - if (iocp->ioc_cmd == SIOCLIFADDIF || - iocp->ioc_cmd == SIOCSLIFNAME) - mode = COPYOUT; - else - mode = NO_COPYOUT; - /* - * The ioctl must complete now without EINPROGRESS - * since ipsq_pending_mp_get has removed the ioctl mblk - * from ipsq_pending_mp. Otherwise the ioctl will be - * stuck for ever in the ipsq. + if (mp1 != NULL) { + /* + * The operation must complete without EINPROGRESS + * since ipsq_pending_mp_get() has removed the mblk + * from ipsq_pending_mp. Otherwise, the operation + * will be stuck forever in the ipsq. */ ASSERT(err != EINPROGRESS); - ip_ioctl_finish(q, mp1, err, mode, ipif, ipsq); - + + switch (ipsq->ipsq_current_ioctl) { + case 0: + ipsq_current_finish(ipsq); + break; + + case SIOCLIFADDIF: + case SIOCSLIFNAME: + ip_ioctl_finish(q, mp1, err, COPYOUT, ipsq); + break; + + default: + ip_ioctl_finish(q, mp1, err, NO_COPYOUT, ipsq); + break; + } } } @@ -16363,7 +16195,6 @@ if (ta->ifta_flags & (IFTUN_SRC | IFTUN_DST)) { ipif_set_tun_llink(ill, ta); } - } if (mp1 != NULL) { /* @@ -16379,11 +16210,9 @@ mp1->b_cont->b_prev; } inet_freemsg(mp1); - ASSERT(ipsq->ipsq_current_ipif != NULL); ASSERT(connp != NULL); ip_ioctl_finish(CONNP_TO_WQ(connp), mp, - iocp->ioc_error, NO_COPYOUT, - ipsq->ipsq_current_ipif, ipsq); + iocp->ioc_error, NO_COPYOUT, ipsq); } else { ASSERT(connp == NULL); putnext(q, mp); @@ -16415,7 +16244,7 @@ mp->b_datap->db_type = M_IOCDATA; ASSERT(connp != NULL); ip_ioctl_finish(CONNP_TO_WQ(connp), mp, - iocp->ioc_error, COPYOUT, NULL, NULL); + iocp->ioc_error, COPYOUT, NULL); } else { ASSERT(connp == NULL); putnext(q, mp); @@ -16430,7 +16259,6 @@ switch (iocp->ioc_cmd) { int mode; - ipif_t *ipif; case DL_IOC_HDR_INFO: /* @@ -16471,12 +16299,9 @@ iocp->ioc_id); mode = COPYOUT; ipsq = NULL; - ipif = NULL; } else { mp1 = ipsq_pending_mp_get(ipsq, &connp); mode = NO_COPYOUT; - ASSERT(ipsq->ipsq_current_ipif != NULL); - ipif = ipsq->ipsq_current_ipif; } if (mp1 != NULL) { /* @@ -16496,7 +16321,7 @@ iocp->ioc_error = EINVAL; ASSERT(connp != NULL); ip_ioctl_finish(CONNP_TO_WQ(connp), mp, - iocp->ioc_error, mode, ipif, ipsq); + iocp->ioc_error, mode, ipsq); } else { ASSERT(connp == NULL); putnext(q, mp); @@ -26829,7 +26654,6 @@ { struct iocblk *iocp; mblk_t *mp1; - ipif_t *ipif; ip_ioctl_cmd_t *ipip; int err; sin_t *sin; @@ -26842,9 +26666,8 @@ mp1 = mp->b_cont->b_cont; ipip = ip_sioctl_lookup(iocp->ioc_cmd); if (ipip->ipi_cmd == SIOCSLIFNAME || ipip->ipi_cmd == IF_UNITSEL) { - ill_t *ill; - /* - * Special case where ipsq_current_ipif may not be set. + /* + * Special case where ipsq_current_ipif is not set: * ill_phyint_reinit merged the v4 and v6 into a single ipsq. * ill could also have become part of a ipmp group in the * process, we are here as were not able to complete the @@ -26852,13 +26675,11 @@ * exclusive on the new ipsq, In such a case ipsq_current_ipif * will not be set so we need to set it. */ - ill = (ill_t *)q->q_ptr; - ipsq->ipsq_current_ipif = ill->ill_ipif; - ipsq->ipsq_last_cmd = ipip->ipi_cmd; - } - - ipif = ipsq->ipsq_current_ipif; - ASSERT(ipif != NULL); + ill_t *ill = q->q_ptr; + ipsq_current_start(ipsq, ill->ill_ipif, ipip->ipi_cmd); + } + ASSERT(ipsq->ipsq_current_ipif != NULL); + if (ipip->ipi_cmd_type == IF_CMD) { /* This a old style SIOC[GS]IF* command */ ifr = (struct ifreq *)mp1->b_rptr; @@ -26871,13 +26692,10 @@ sin = NULL; } - err = (*ipip->ipi_func_restart)(ipif, sin, q, mp, ipip, - (void *)mp1->b_rptr); - - /* SIOCLIFREMOVEIF could have removed the ipif */ - ip_ioctl_finish(q, mp, err, - ipip->ipi_flags & IPI_GET_CMD ? COPYOUT : NO_COPYOUT, - ipip->ipi_cmd == SIOCLIFREMOVEIF ? NULL : ipif, ipsq); + err = (*ipip->ipi_func_restart)(ipsq->ipsq_current_ipif, sin, q, mp, + ipip, mp1->b_rptr); + + ip_ioctl_finish(q, mp, err, IPI2MODE(ipip), ipsq); } /* @@ -26928,9 +26746,7 @@ */ if (ipip->ipi_cmd == SIOCLIFADDIF) { err = ip_sioctl_addif(NULL, NULL, q, mp, NULL, NULL); - ip_ioctl_finish(q, mp, err, - ipip->ipi_flags & IPI_GET_CMD ? COPYOUT : NO_COPYOUT, - NULL, NULL); + ip_ioctl_finish(q, mp, err, IPI2MODE(ipip), NULL); return; } @@ -26946,9 +26762,7 @@ err = ip_extract_lifreq_cmn(q, mp, ipip->ipi_cmd_type, ipip->ipi_flags, &ci, ip_process_ioctl); if (err != 0) { - ip_ioctl_finish(q, mp, err, - ipip->ipi_flags & IPI_GET_CMD ? - COPYOUT : NO_COPYOUT, NULL, NULL); + ip_ioctl_finish(q, mp, err, IPI2MODE(ipip), NULL); return; } ASSERT(ci.ci_ipif != NULL); @@ -26961,9 +26775,7 @@ */ err = ip_extract_tunreq(q, mp, &ci.ci_ipif, ip_process_ioctl); if (err != 0) { - ip_ioctl_finish(q, mp, err, - ipip->ipi_flags & IPI_GET_CMD ? - COPYOUT : NO_COPYOUT, NULL, NULL); + ip_ioctl_finish(q, mp, err, IPI2MODE(ipip), NULL); return; } ASSERT(ci.ci_ipif != NULL); @@ -26987,10 +26799,8 @@ err = ip_extract_msfilter(q, mp, &ci.ci_ipif, ip_process_ioctl); if (err != 0) { - ip_ioctl_finish(q, mp, err, - ipip->ipi_flags & IPI_GET_CMD ? - COPYOUT : NO_COPYOUT, NULL, NULL); - return; + ip_ioctl_finish(q, mp, err, IPI2MODE(ipip), + NULL); } break; } @@ -27015,9 +26825,7 @@ ci.ci_lifr); if (ci.ci_ipif != NULL) ipif_refrele(ci.ci_ipif); - ip_ioctl_finish(q, mp, err, - ipip->ipi_flags & IPI_GET_CMD ? COPYOUT : NO_COPYOUT, - NULL, NULL); + ip_ioctl_finish(q, mp, err, IPI2MODE(ipip), NULL); return; } @@ -27038,12 +26846,8 @@ if (ipsq == NULL) return; - mutex_enter(&ipsq->ipsq_lock); - ASSERT(ipsq->ipsq_current_ipif == NULL); - ipsq->ipsq_current_ipif = ci.ci_ipif; - ipsq->ipsq_last_cmd = ipip->ipi_cmd; - mutex_exit(&ipsq->ipsq_lock); - mutex_enter(&(ci.ci_ipif)->ipif_ill->ill_lock); + ipsq_current_start(ipsq, ci.ci_ipif, ipip->ipi_cmd); + /* * For most set ioctls that come here, this serves as a single point * where we set the IPIF_CHANGING flag. This ensures that there won't @@ -27057,6 +26861,7 @@ * sets the IPIF_CONDEMNED flag internally after identifying the right * ipif to operate on. */ + mutex_enter(&(ci.ci_ipif)->ipif_ill->ill_lock); if (ipip->ipi_cmd != SIOCLIFREMOVEIF && ipip->ipi_cmd != SIOCLIFFAILOVER && ipip->ipi_cmd != SIOCLIFFAILBACK && @@ -27069,13 +26874,9 @@ * either queued and waiting for some reason or has * already completed. */ - err = (*ipip->ipi_func)(ci.ci_ipif, ci.ci_sin, q, mp, ipip, - ci.ci_lifr); - - /* SIOCLIFREMOVEIF could have removed the ipif */ - ip_ioctl_finish(q, mp, err, - ipip->ipi_flags & IPI_GET_CMD ? COPYOUT : NO_COPYOUT, - ipip->ipi_cmd == SIOCLIFREMOVEIF ? NULL : ci.ci_ipif, ipsq); + err = (*ipip->ipi_func)(ci.ci_ipif, ci.ci_sin, q, mp, ipip, ci.ci_lifr); + + ip_ioctl_finish(q, mp, err, IPI2MODE(ipip), ipsq); if (entered_ipsq) ipsq_exit(ipsq, B_TRUE, B_TRUE); @@ -27086,11 +26887,9 @@ * do mi_copyout/mi_copy_done. */ void -ip_ioctl_finish(queue_t *q, mblk_t *mp, int err, int mode, - ipif_t *ipif, ipsq_t *ipsq) +ip_ioctl_finish(queue_t *q, mblk_t *mp, int err, int mode, ipsq_t *ipsq) { conn_t *connp = NULL; - hook_nic_event_t *info; if (err == EINPROGRESS) return; @@ -27113,7 +26912,7 @@ break; default: - /* An ioctl aborted through a conn close would take this path */ + ASSERT(mode == CONN_CLOSE); /* aborted through CONN_CLOSE */ break; } @@ -27123,46 +26922,8 @@ if (connp != NULL) CONN_OPER_PENDING_DONE(connp); - /* - * If the ioctl were an exclusive ioctl it would have set - * IPIF_CHANGING at the start of the ioctl which is undone here. - */ - if (ipif != NULL) { - mutex_enter(&(ipif)->ipif_ill->ill_lock); - ipif->ipif_state_flags &= ~IPIF_CHANGING; - - /* - * Unhook the nic event message from the ill and enqueue it into - * the nic event taskq. - */ - if ((info = ipif->ipif_ill->ill_nic_event_info) != NULL) { - if (ddi_taskq_dispatch(eventq_queue_nic, - ip_ne_queue_func, (void *)info, DDI_SLEEP) - == DDI_FAILURE) { - ip2dbg(("ip_ioctl_finish: ddi_taskq_dispatch" - "failed\n")); - if (info->hne_data != NULL) - kmem_free(info->hne_data, - info->hne_datalen); - kmem_free(info, sizeof (hook_nic_event_t)); - } - - ipif->ipif_ill->ill_nic_event_info = NULL; - } - - mutex_exit(&(ipif)->ipif_ill->ill_lock); - } - - /* - * Clear the current ipif in the ipsq at the completion of the ioctl. - * Note that a non-null ipsq_current_ipif prevents new ioctls from - * entering the ipsq - */ - if (ipsq != NULL) { - mutex_enter(&ipsq->ipsq_lock); - ipsq->ipsq_current_ipif = NULL; - mutex_exit(&ipsq->ipsq_lock); - } + if (ipsq != NULL) + ipsq_current_finish(ipsq); } /* @@ -28715,18 +28476,30 @@ freeb(mp); } - /* We should have an IOCTL waiting on this. */ ipsq = ill->ill_phyint->phyint_ipsq; ipif = ipsq->ipsq_pending_ipif; mp1 = ipsq_pending_mp_get(ipsq, &connp); - ASSERT(!((mp1 != NULL) ^ (ipif != NULL))); + ASSERT(!((mp1 != NULL) ^ (ipif != NULL))); if (mp1 == NULL) { /* bringup was aborted by the user */ freemsg(mp2); return; } - ASSERT(connp != NULL); - q = CONNP_TO_WQ(connp); + + /* + * If an IOCTL is waiting on this (ipsq_current_ioctl != 0), then we + * must have an associated conn_t. Otherwise, we're bringing this + * interface back up as part of handling an asynchronous event (e.g., + * physical address change). + */ + if (ipsq->ipsq_current_ioctl != 0) { + ASSERT(connp != NULL); + q = CONNP_TO_WQ(connp); + } else { + ASSERT(connp == NULL); + q = ill->ill_rq; + } + /* * If the DL_BIND_REQ fails, it is noted * in arc_name_offset. @@ -28752,18 +28525,19 @@ return; } - if (ill->ill_up_ipifs) { + if (ill->ill_up_ipifs) ill_group_cleanup(ill); - } - - /* - * The ioctl must complete now without EINPROGRESS - * since ipsq_pending_mp_get has removed the ioctl mblk - * from ipsq_pending_mp. Otherwise the ioctl will be - * stuck for ever in the ipsq. + + /* + * The operation must complete without EINPROGRESS since + * ipsq_pending_mp_get() has removed the mblk from ipsq_pending_mp. + * Otherwise, the operation will be stuck forever in the ipsq. */ ASSERT(err != EINPROGRESS); - ip_ioctl_finish(q, mp1, err, NO_COPYOUT, ipif, ipsq); + if (ipsq->ipsq_current_ioctl != 0) + ip_ioctl_finish(q, mp1, err, NO_COPYOUT, ipsq); + else + ipsq_current_finish(ipsq); } /* Allocate the private structure */
--- a/usr/src/uts/common/inet/ip/ip6_if.c Wed Dec 27 05:35:34 2006 -0800 +++ b/usr/src/uts/common/inet/ip/ip6_if.c Wed Dec 27 21:32:46 2006 -0800 @@ -1055,17 +1055,9 @@ int i; in6_addr_t v6addr, v6mask; - /* - * Though we execute on the ipsq, we need to hold the ill_lock - * to prevent readers from seeing partially updated values - * while we do the update. - */ - mutex_enter(&ill->ill_lock); if (!MEDIA_V6INTFID(ill->ill_media, ill->ill_phys_addr_length, - ill->ill_phys_addr, &v6addr)) { - mutex_exit(&ill->ill_lock); + ill->ill_phys_addr, &v6addr)) return (B_FALSE); - } (void) ip_plen_to_mask_v6(IPV6_TOKEN_LEN, &v6mask); @@ -1075,7 +1067,6 @@ V6_MASK_COPY(v6addr, v6mask, ill->ill_token); ill->ill_token_length = IPV6_TOKEN_LEN; - mutex_exit(&ill->ill_lock); return (B_TRUE); } @@ -1364,7 +1355,7 @@ * as writer.) */ int -ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr, boolean_t macaddr_change) +ipif_ndp_up(ipif_t *ipif, const in6_addr_t *addr) { ill_t *ill = ipif->ipif_ill; int err = 0; @@ -1408,8 +1399,7 @@ uchar_t *hw_addr = NULL; /* Permanent entries don't need NUD */ - flags = NCE_F_PERMANENT; - flags |= NCE_F_NONUD; + flags = NCE_F_PERMANENT | NCE_F_NONUD; if (ill->ill_flags & ILLF_ROUTER) flags |= NCE_F_ISROUTER; @@ -1419,7 +1409,7 @@ if (ill->ill_net_type == IRE_IF_RESOLVER) { hw_addr = ill->ill_nd_lla; - if (ill->ill_move_in_progress || macaddr_change) { + if (ill->ill_move_in_progress) { /* * Addresses are failing over to this ill. * Don't wait for NUD to see this change.
--- a/usr/src/uts/common/inet/ip/ip_if.c Wed Dec 27 05:35:34 2006 -0800 +++ b/usr/src/uts/common/inet/ip/ip_if.c Wed Dec 27 21:32:46 2006 -0800 @@ -193,6 +193,7 @@ static void ill_phyint_free(ill_t *ill); static void ill_phyint_reinit(ill_t *ill); static void ill_set_nce_router_flags(ill_t *, boolean_t); +static void ill_set_phys_addr_tail(ipsq_t *, queue_t *, mblk_t *, void *); static void ill_signal_ipsq_ills(ipsq_t *, boolean_t); static boolean_t ill_split_ipsq(ipsq_t *cur_sq); static void ill_stq_cache_delete(ire_t *, char *); @@ -991,8 +992,8 @@ ill->ill_frag_ptr = NULL; ill->ill_frag_hash_tbl = NULL; } - if (ill->ill_nd_lla_mp != NULL) - freemsg(ill->ill_nd_lla_mp); + + freemsg(ill->ill_nd_lla_mp); /* Free all retained control messages. */ mpp = &ill->ill_first_mp_to_free; do { @@ -1213,19 +1214,28 @@ ipsq_pending_mp_add(conn_t *connp, ipif_t *ipif, queue_t *q, mblk_t *add_mp, int waitfor) { - ipsq_t *ipsq; + ipsq_t *ipsq = ipif->ipif_ill->ill_phyint->phyint_ipsq; ASSERT(IAM_WRITER_IPIF(ipif)); ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock)); ASSERT((add_mp->b_next == NULL) && (add_mp->b_prev == NULL)); + ASSERT(ipsq->ipsq_pending_mp == NULL); + /* + * The caller may be using a different ipif than the one passed into + * ipsq_current_start() (e.g., suppose an ioctl that came in on the V4 + * ill needs to wait for the V6 ill to quiesce). So we can't ASSERT + * that `ipsq_current_ipif == ipif'. + */ + ASSERT(ipsq->ipsq_current_ipif != NULL); + /* * M_IOCDATA from ioctls, M_IOCTL from tunnel ioctls, - * M_ERROR/M_HANGUP from driver + * M_ERROR/M_HANGUP/M_PROTO/M_PCPROTO from the driver. */ ASSERT((DB_TYPE(add_mp) == M_IOCDATA) || (DB_TYPE(add_mp) == M_IOCTL) || - (DB_TYPE(add_mp) == M_ERROR) || (DB_TYPE(add_mp) == M_HANGUP)); - - ipsq = ipif->ipif_ill->ill_phyint->phyint_ipsq; + (DB_TYPE(add_mp) == M_ERROR) || (DB_TYPE(add_mp) == M_HANGUP) || + (DB_TYPE(add_mp) == M_PROTO) || (DB_TYPE(add_mp) == M_PCPROTO)); + if (connp != NULL) { ASSERT(MUTEX_HELD(&connp->conn_lock)); /* @@ -1248,17 +1258,7 @@ add_mp->b_queue = q; ipsq->ipsq_pending_mp = add_mp; ipsq->ipsq_waitfor = waitfor; - /* - * ipsq_current_ipif is needed to restart the operation from - * ipif_ill_refrele_tail when the last reference to the ipi/ill - * is gone. Since this is not an ioctl ipsq_current_ipif has not - * been set until now. - */ - if (DB_TYPE(add_mp) == M_ERROR || DB_TYPE(add_mp) == M_HANGUP) { - ASSERT(ipsq->ipsq_current_ipif == NULL); - ipsq->ipsq_current_ipif = ipif; - ipsq->ipsq_last_cmd = DB_TYPE(add_mp); - } + if (connp != NULL) connp->conn_oper_pending_ill = ipif->ipif_ill; mutex_exit(&ipsq->ipsq_lock); @@ -1352,11 +1352,18 @@ ipsq->ipsq_pending_ipif = NULL; ipsq->ipsq_waitfor = 0; ipsq->ipsq_current_ipif = NULL; + ipsq->ipsq_current_ioctl = 0; mutex_exit(&ipsq->ipsq_lock); if (DB_TYPE(mp) == M_IOCTL || DB_TYPE(mp) == M_IOCDATA) { - ip_ioctl_finish(q, mp, ENXIO, connp != NULL ? CONN_CLOSE : - NO_COPYOUT, connp != NULL ? ipif : NULL, NULL); + if (connp == NULL) { + ip_ioctl_finish(q, mp, ENXIO, NO_COPYOUT, NULL); + } else { + ip_ioctl_finish(q, mp, ENXIO, CONN_CLOSE, NULL); + mutex_enter(&ipif->ipif_ill->ill_lock); + ipif->ipif_state_flags &= ~IPIF_CHANGING; + mutex_exit(&ipif->ipif_ill->ill_lock); + } } else { /* * IP-MT XXX In the case of TLI/XTI bind / optmgmt this can't @@ -1397,7 +1404,7 @@ mp->b_next = NULL; mp->b_prev = NULL; mp->b_queue = NULL; - ip_ioctl_finish(q, mp, ENXIO, NO_COPYOUT, NULL, NULL); + ip_ioctl_finish(q, mp, ENXIO, NO_COPYOUT, NULL); mutex_enter(&ill->ill_lock); } ill->ill_pending_ipif = NULL; @@ -1468,7 +1475,7 @@ curr->b_queue = NULL; if (DB_TYPE(curr) == M_IOCTL || DB_TYPE(curr) == M_IOCDATA) { ip_ioctl_finish(q, curr, ENXIO, connp != NULL ? - CONN_CLOSE : NO_COPYOUT, NULL, NULL); + CONN_CLOSE : NO_COPYOUT, NULL); } else { /* * IP-MT XXX In the case of TLI/XTI bind / optmgmt @@ -1613,7 +1620,7 @@ } ill_down_tail(ill); freemsg(mp); - ipsq->ipsq_current_ipif = NULL; + ipsq_current_finish(ipsq); } /* @@ -1624,11 +1631,9 @@ boolean_t ill_down_start(queue_t *q, mblk_t *mp) { - ill_t *ill; + ill_t *ill = q->q_ptr; ipif_t *ipif; - ill = q->q_ptr; - ASSERT(IAM_WRITER_ILL(ill)); for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) @@ -1637,16 +1642,15 @@ ill_down(ill); (void) ipsq_pending_mp_cleanup(ill, NULL); - mutex_enter(&ill->ill_lock); - /* - * Atomically test and add the pending mp if references are - * still active. - */ + + ipsq_current_start(ill->ill_phyint->phyint_ipsq, ill->ill_ipif, 0); + + /* + * Atomically test and add the pending mp if references are active. + */ + mutex_enter(&ill->ill_lock); if (!ill_is_quiescent(ill)) { - /* - * Get rid of any pending mps and cleanup. Call will - * not fail since we are passing a null connp. - */ + /* call cannot fail since `conn_t *' argument is NULL */ (void) ipsq_pending_mp_add(NULL, ill->ill_ipif, ill->ill_rq, mp, ILL_DOWN); mutex_exit(&ill->ill_lock); @@ -4936,24 +4940,21 @@ int ill_dls_info(struct sockaddr_dl *sdl, const ipif_t *ipif) { - size_t length; + size_t len; ill_t *ill = ipif->ipif_ill; sdl->sdl_family = AF_LINK; sdl->sdl_index = ill->ill_phyint->phyint_ifindex; - sdl->sdl_type = ipif->ipif_type; + sdl->sdl_type = ill->ill_type; (void) ipif_get_name(ipif, sdl->sdl_data, sizeof (sdl->sdl_data)); - length = mi_strlen(sdl->sdl_data); - ASSERT(length < 256); - sdl->sdl_nlen = (uchar_t)length; + len = strlen(sdl->sdl_data); + ASSERT(len < 256); + sdl->sdl_nlen = (uchar_t)len; sdl->sdl_alen = ill->ill_phys_addr_length; - mutex_enter(&ill->ill_lock); - if (ill->ill_phys_addr_length != 0 && ill->ill_phys_addr != NULL) { - bcopy(ill->ill_phys_addr, &sdl->sdl_data[length], - ill->ill_phys_addr_length); - } - mutex_exit(&ill->ill_lock); sdl->sdl_slen = 0; + if (ill->ill_phys_addr_length != 0 && ill->ill_phys_addr != NULL) + bcopy(ill->ill_phys_addr, &sdl->sdl_data[len], sdl->sdl_alen); + return (sizeof (struct sockaddr_dl)); } @@ -6219,6 +6220,7 @@ conn_t *connp; ipsq_t *ipsq; ipif_t *ipif; + dl_notify_ind_t *dlindp; ASSERT(MUTEX_HELD(&ill->ill_lock)); @@ -6295,17 +6297,34 @@ ASSERT(mp != NULL); switch (mp->b_datap->db_type) { + case M_PCPROTO: + case M_PROTO: + /* + * For now, only DL_NOTIFY_IND messages can use this facility. + */ + dlindp = (dl_notify_ind_t *)mp->b_rptr; + ASSERT(dlindp->dl_primitive == DL_NOTIFY_IND); + + switch (dlindp->dl_notification) { + case DL_NOTE_PHYS_ADDR: + qwriter_ip(NULL, ill, ill->ill_rq, mp, + ill_set_phys_addr_tail, CUR_OP, B_TRUE); + return; + default: + ASSERT(0); + } + break; + case M_ERROR: case M_HANGUP: - (void) qwriter_ip(NULL, ill, ill->ill_rq, mp, - ipif_all_down_tail, CUR_OP, B_TRUE); + qwriter_ip(NULL, ill, ill->ill_rq, mp, ipif_all_down_tail, + CUR_OP, B_TRUE); return; case M_IOCTL: case M_IOCDATA: - (void) qwriter_ip(NULL, ill, - (connp != NULL ? CONNP_TO_WQ(connp) : ill->ill_wq), mp, - ip_reprocess_ioctl, CUR_OP, B_TRUE); + qwriter_ip(NULL, ill, (connp != NULL ? CONNP_TO_WQ(connp) : + ill->ill_wq), mp, ip_reprocess_ioctl, CUR_OP, B_TRUE); return; default: @@ -7978,6 +7997,68 @@ } /* + * Start the current exclusive operation on `ipsq'; associate it with `ipif' + * and `ioccmd'. + */ +void +ipsq_current_start(ipsq_t *ipsq, ipif_t *ipif, int ioccmd) +{ + ASSERT(IAM_WRITER_IPSQ(ipsq)); + + mutex_enter(&ipsq->ipsq_lock); + ASSERT(ipsq->ipsq_current_ipif == NULL); + ASSERT(ipsq->ipsq_current_ioctl == 0); + ipsq->ipsq_current_ipif = ipif; + ipsq->ipsq_current_ioctl = ioccmd; + mutex_exit(&ipsq->ipsq_lock); +} + +/* + * Finish the current exclusive operation on `ipsq'. Note that other + * operations will not be able to proceed until an ipsq_exit() is done. + */ +void +ipsq_current_finish(ipsq_t *ipsq) +{ + ipif_t *ipif = ipsq->ipsq_current_ipif; + hook_nic_event_t *info; + + ASSERT(IAM_WRITER_IPSQ(ipsq)); + + /* + * For SIOCSLIFREMOVEIF, the ipif has been already been blown away + * (but we're careful to never set IPIF_CHANGING in that case). + */ + if (ipsq->ipsq_current_ioctl != SIOCLIFREMOVEIF) { + mutex_enter(&ipif->ipif_ill->ill_lock); + ipif->ipif_state_flags &= ~IPIF_CHANGING; + /* + * Unhook the nic event message from the ill and enqueue it + * into the nic event taskq. + */ + if ((info = ipif->ipif_ill->ill_nic_event_info) != NULL) { + if (ddi_taskq_dispatch(eventq_queue_nic, + ip_ne_queue_func, info, DDI_SLEEP) == DDI_FAILURE) { + ip2dbg(("ipsq_current_finish: " + "ddi_taskq_dispatch failed\n")); + if (info->hne_data != NULL) + kmem_free(info->hne_data, + info->hne_datalen); + kmem_free(info, sizeof (hook_nic_event_t)); + } + ipif->ipif_ill->ill_nic_event_info = NULL; + } + mutex_exit(&ipif->ipif_ill->ill_lock); + } + + mutex_enter(&ipsq->ipsq_lock); + ASSERT(ipsq->ipsq_current_ipif != NULL); + ipsq->ipsq_current_ipif = NULL; + ipsq->ipsq_current_ioctl = 0; + mutex_exit(&ipsq->ipsq_lock); +} + +/* * The ill is closing. Flush all messages on the ipsq that originated * from this ill. Usually there wont' be any messages on the ipsq_xopq_mphead * for this ill since ipsq_enter could not have entered until then. @@ -10556,7 +10637,7 @@ ire_refrele(ire); freemsg(mp); ip_ioctl_finish(q, orig_ioc_mp, - EINVAL, NO_COPYOUT, NULL, NULL); + EINVAL, NO_COPYOUT, NULL); return; } *flagsp |= ATF_COM; @@ -10582,7 +10663,7 @@ /* Ditch the internal IOCTL. */ freemsg(mp); ire_refrele(ire); - ip_ioctl_finish(q, orig_ioc_mp, 0, COPYOUT, NULL, NULL); + ip_ioctl_finish(q, orig_ioc_mp, 0, COPYOUT, NULL); return; } } @@ -10626,7 +10707,7 @@ if (iocp->ioc_error || iocp->ioc_cmd != AR_ENTRY_SQUERY) { err = iocp->ioc_error; freemsg(mp); - ip_ioctl_finish(q, orig_ioc_mp, err, NO_COPYOUT, NULL, NULL); + ip_ioctl_finish(q, orig_ioc_mp, err, NO_COPYOUT, NULL); return; } @@ -10639,8 +10720,8 @@ if ((ill->ill_phys_addr_length + ill->ill_name_length) > sizeof (xar->xarp_ha.sdl_data)) { freemsg(mp); - ip_ioctl_finish(q, orig_ioc_mp, EINVAL, - NO_COPYOUT, NULL, NULL); + ip_ioctl_finish(q, orig_ioc_mp, EINVAL, NO_COPYOUT, + NULL); return; } } @@ -10667,7 +10748,7 @@ /* Ditch the internal IOCTL. */ freemsg(mp); /* Complete the original. */ - ip_ioctl_finish(q, orig_ioc_mp, 0, COPYOUT, NULL, NULL); + ip_ioctl_finish(q, orig_ioc_mp, 0, COPYOUT, NULL); } /* @@ -13853,9 +13934,8 @@ * Copy the new hardware address and length into * arp_add_mp to be sent to ARP. */ - area->area_hw_addr_length = - ill->ill_phys_addr_length; - bcopy((char *)ill->ill_phys_addr, + area->area_hw_addr_length = ill->ill_phys_addr_length; + bcopy(ill->ill_phys_addr, ((char *)area + area->area_hw_addr_offset), area->area_hw_addr_length); } @@ -14950,6 +15030,7 @@ */ mutex_enter(&old_ipsq->ipsq_lock); old_ipsq->ipsq_current_ipif = NULL; + old_ipsq->ipsq_current_ioctl = 0; mutex_exit(&old_ipsq->ipsq_lock); return (EINPROGRESS); } @@ -19757,7 +19838,7 @@ /* Skip arp/ndp for any loopback interface. */ if (ill->ill_wq != NULL) { - conn_t *connp = Q_TO_CONN(q); + conn_t *connp = CONN_Q(q) ? Q_TO_CONN(q) : NULL; ipsq_t *ipsq = ill->ill_phyint->phyint_ipsq; if (!ill->ill_dl_up) { @@ -19783,13 +19864,15 @@ * EINPROGRESS and we will complete in ip_arp_done. */ - ASSERT(connp != NULL); + ASSERT(connp != NULL || !CONN_Q(q)); ASSERT(ipsq->ipsq_pending_mp == NULL); - mutex_enter(&connp->conn_lock); + if (connp != NULL) + mutex_enter(&connp->conn_lock); mutex_enter(&ill->ill_lock); success = ipsq_pending_mp_add(connp, ipif, q, mp, 0); mutex_exit(&ill->ill_lock); - mutex_exit(&connp->conn_lock); + if (connp != NULL) + mutex_exit(&connp->conn_lock); if (!success) return (EINTR); @@ -19802,8 +19885,7 @@ * That ioctl will complete in ip_rput. */ if (isv6) { - err = ipif_ndp_up(ipif, &ipif->ipif_v6lcl_addr, - B_FALSE); + err = ipif_ndp_up(ipif, &ipif->ipif_v6lcl_addr); if (err != 0) { if (err != EINPROGRESS) mp = ipsq_pending_mp_get(ipsq, &connp); @@ -19887,17 +19969,14 @@ * Record state needed to complete this operation when the * DL_BIND_ACK shows up. Also remember the pre-allocated mblks. */ - if (WR(q)->q_next == NULL) { - connp = Q_TO_CONN(q); - mutex_enter(&connp->conn_lock); - } else { - connp = NULL; - } + ASSERT(WR(q)->q_next == NULL); + connp = Q_TO_CONN(q); + + mutex_enter(&connp->conn_lock); mutex_enter(&ipif->ipif_ill->ill_lock); success = ipsq_pending_mp_add(connp, ipif, q, mp, 0); mutex_exit(&ipif->ipif_ill->ill_lock); - if (connp != NULL) - mutex_exit(&connp->conn_lock); + mutex_exit(&connp->conn_lock); if (!success) goto bad; @@ -19943,10 +20022,8 @@ * groups. */ - if (bind_mp != NULL) - freemsg(bind_mp); - if (unbind_mp != NULL) - freemsg(unbind_mp); + freemsg(bind_mp); + freemsg(unbind_mp); return (ENOMEM); } @@ -22981,13 +23058,13 @@ return (EINPROGRESS); /* - * Need to set the ipsq_current_ipif now, if we have changed ipsq - * due to the phyint merge in ill_phyint_reinit. - */ - ASSERT(ipsq->ipsq_current_ipif == NULL || - ipsq->ipsq_current_ipif == ipif); - ipsq->ipsq_current_ipif = ipif; - ipsq->ipsq_last_cmd = SIOCSLIFNAME; + * If ill_phyint_reinit() changed our ipsq, then start on the new ipsq. + */ + if (ipsq->ipsq_current_ipif == NULL) + ipsq_current_start(ipsq, ipif, SIOCSLIFNAME); + else + ASSERT(ipsq->ipsq_current_ipif == ipif); + error = ipif_set_values_tail(ill, ipif, mp, q); ipsq_exit(ipsq, B_TRUE, B_TRUE); if (error != 0 && error != EINPROGRESS) { @@ -24188,3 +24265,150 @@ ill_refrele(ill); return (ipif); } + +/* + * Flush the fastpath by deleting any IRE's that are waiting for the fastpath, + * and any IRE's that are using the fastpath. There are two exceptions: + * IRE_MIPRTUN and IRE_BROADCAST are difficult to recreate, so instead we just + * nuke their nce_fp_mp's; see ire_fastpath_flush() for details. + */ +void +ill_fastpath_flush(ill_t *ill) +{ + if (ill->ill_isv6) { + nce_fastpath_list_dispatch(ill, NULL, NULL); + ndp_walk(ill, (pfi_t)ndp_fastpath_flush, NULL); + } else { + ire_fastpath_list_dispatch(ill, NULL, NULL); + ire_walk_ill_v4(MATCH_IRE_WQ | MATCH_IRE_TYPE, + IRE_CACHE | IRE_BROADCAST, ire_fastpath_flush, NULL, ill); + mutex_enter(&ire_mrtun_lock); + if (ire_mrtun_count != 0) { + mutex_exit(&ire_mrtun_lock); + ire_walk_ill_mrtun(MATCH_IRE_WQ, IRE_MIPRTUN, + ire_fastpath_flush, NULL, ill); + } else { + mutex_exit(&ire_mrtun_lock); + } + } +} + +/* + * Set the physical address information for `ill' to the contents of the + * dl_notify_ind_t pointed to by `mp'. Must be called as writer, and will be + * asynchronous if `ill' cannot immediately be quiesced -- in which case + * EINPROGRESS will be returned. + */ +int +ill_set_phys_addr(ill_t *ill, mblk_t *mp) +{ + ipsq_t *ipsq = ill->ill_phyint->phyint_ipsq; + dl_notify_ind_t *dlindp = (dl_notify_ind_t *)mp->b_rptr; + + ASSERT(IAM_WRITER_IPSQ(ipsq)); + + if (dlindp->dl_data != DL_IPV6_LINK_LAYER_ADDR && + dlindp->dl_data != DL_CURR_PHYS_ADDR) { + /* Changing DL_IPV6_TOKEN is not yet supported */ + return (0); + } + + /* + * We need to store up to two copies of `mp' in `ill'. Due to the + * design of ipsq_pending_mp_add(), we can't pass them as separate + * arguments to ill_set_phys_addr_tail(). Instead, chain them + * together here, then pull 'em apart in ill_set_phys_addr_tail(). + */ + if ((mp = copyb(mp)) == NULL || (mp->b_cont = copyb(mp)) == NULL) { + freemsg(mp); + return (ENOMEM); + } + + ipsq_current_start(ipsq, ill->ill_ipif, 0); + + /* + * If we can quiesce the ill, then set the address. If not, then + * ill_set_phys_addr_tail() will be called from ipif_ill_refrele_tail(). + */ + ill_down_ipifs(ill, NULL, 0, B_FALSE); + mutex_enter(&ill->ill_lock); + if (!ill_is_quiescent(ill)) { + /* call cannot fail since `conn_t *' argument is NULL */ + (void) ipsq_pending_mp_add(NULL, ill->ill_ipif, ill->ill_rq, + mp, ILL_DOWN); + mutex_exit(&ill->ill_lock); + return (EINPROGRESS); + } + mutex_exit(&ill->ill_lock); + + ill_set_phys_addr_tail(ipsq, ill->ill_rq, mp, NULL); + return (0); +} + +/* + * Once the ill associated with `q' has quiesced, set its physical address + * information to the values in `addrmp'. Note that two copies of `addrmp' + * are passed (linked by b_cont), since we sometimes need to save two distinct + * copies in the ill_t, and our context doesn't permit sleeping or allocation + * failure (we'll free the other copy if it's not needed). Since the ill_t + * is quiesced, we know any stale IREs with the old address information have + * already been removed, so we don't need to call ill_fastpath_flush(). + */ +/* ARGSUSED */ +static void +ill_set_phys_addr_tail(ipsq_t *ipsq, queue_t *q, mblk_t *addrmp, void *dummy) +{ + ill_t *ill = q->q_ptr; + mblk_t *addrmp2 = unlinkb(addrmp); + dl_notify_ind_t *dlindp = (dl_notify_ind_t *)addrmp->b_rptr; + uint_t addrlen, addroff; + + ASSERT(IAM_WRITER_IPSQ(ipsq)); + mutex_enter(&ill->ill_lock); + ASSERT(ill_is_quiescent(ill)); + mutex_exit(&ill->ill_lock); + + addroff = dlindp->dl_addr_offset; + addrlen = dlindp->dl_addr_length - ABS(ill->ill_sap_length); + + switch (dlindp->dl_data) { + case DL_IPV6_LINK_LAYER_ADDR: + ill_set_ndmp(ill, addrmp, addroff, addrlen); + freemsg(addrmp2); + break; + + case DL_CURR_PHYS_ADDR: + freemsg(ill->ill_phys_addr_mp); + ill->ill_phys_addr = addrmp->b_rptr + addroff; + ill->ill_phys_addr_mp = addrmp; + ill->ill_phys_addr_length = addrlen; + + if (ill->ill_isv6 && !(ill->ill_flags & ILLF_XRESOLV)) + ill_set_ndmp(ill, addrmp2, addroff, addrlen); + else + freemsg(addrmp2); + break; + default: + ASSERT(0); + } + + /* + * If there are ipifs to bring up, ill_up_ipifs() will return nonzero, + * and ipsq_current_finish() will be called by ip_rput_dlpi_writer() + * or ip_arp_done() when the last ipif is brought up. + */ + if (ill_up_ipifs(ill, q, addrmp) == 0) + ipsq_current_finish(ipsq); +} + +/* + * Helper routine for setting the ill_nd_lla fields. + */ +void +ill_set_ndmp(ill_t *ill, mblk_t *ndmp, uint_t addroff, uint_t addrlen) +{ + freemsg(ill->ill_nd_lla_mp); + ill->ill_nd_lla = ndmp->b_rptr + addroff; + ill->ill_nd_lla_mp = ndmp; + ill->ill_nd_lla_len = addrlen; +}
--- a/usr/src/uts/common/inet/ip/ip_ndp.c Wed Dec 27 05:35:34 2006 -0800 +++ b/usr/src/uts/common/inet/ip/ip_ndp.c Wed Dec 27 21:32:46 2006 -0800 @@ -1514,7 +1514,7 @@ mutex_exit(&ill->ill_lock); ipif->ipif_was_dup = B_TRUE; - if (ipif_ndp_up(ipif, addr, B_FALSE) != EINPROGRESS) + if (ipif_ndp_up(ipif, addr) != EINPROGRESS) (void) ipif_up_done_v6(ipif); } freeb(mp); @@ -2361,7 +2361,6 @@ hw_addr = NULL; if (!(flag & NDP_PROBE)) { - mutex_enter(&hwaddr_ill->ill_lock); hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : hwaddr_ill->ill_phys_addr; if (hw_addr != NULL) { @@ -2369,7 +2368,6 @@ opt->nd_opt_len = (uint8_t)plen; bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); } - mutex_exit(&hwaddr_ill->ill_lock); } if (hw_addr == NULL) { /* If there's no link layer address option, then strip it. */
--- a/usr/src/uts/common/inet/ip_if.h Wed Dec 27 05:35:34 2006 -0800 +++ b/usr/src/uts/common/inet/ip_if.h Wed Dec 27 21:32:46 2006 -0800 @@ -176,6 +176,7 @@ extern void ill_fastpath_ack(ill_t *, mblk_t *); extern void ill_fastpath_nack(ill_t *); extern int ill_fastpath_probe(ill_t *, mblk_t *); +extern void ill_fastpath_flush(ill_t *); extern void ill_frag_prune(ill_t *, uint_t); extern void ill_frag_free_pkts(ill_t *, ipfb_t *, ipf_t *, int); extern time_t ill_frag_timeout(ill_t *, time_t); @@ -183,8 +184,9 @@ extern int ill_nominate_mcast_rcv(ill_group_t *); extern boolean_t ill_setdefaulttoken(ill_t *); extern void ill_restart_dad(ill_t *, boolean_t); +extern int ill_set_phys_addr(ill_t *, mblk_t *); +extern void ill_set_ndmp(ill_t *, mblk_t *, uint_t, uint_t); -extern void ill_lock_ills(ill_t **, int); extern mblk_t *ill_pending_mp_get(ill_t *, conn_t **, uint_t); extern boolean_t ill_pending_mp_add(ill_t *, conn_t *, mblk_t *); extern boolean_t ill_is_quiescent(ill_t *ill); @@ -241,7 +243,7 @@ extern void ipif_down_tail(ipif_t *); extern void ipif_multicast_up(ipif_t *); extern void ipif_ndp_down(ipif_t *); -extern int ipif_ndp_up(ipif_t *, const in6_addr_t *, boolean_t); +extern int ipif_ndp_up(ipif_t *, const in6_addr_t *); extern int ipif_ndp_setup_multicast(ipif_t *, struct nce_s **); extern int ipif_up_done(ipif_t *); extern int ipif_up_done_v6(ipif_t *); @@ -263,6 +265,8 @@ extern int illgrp_insert(ill_group_t **, ill_t *, char *, ill_group_t *, boolean_t); +extern void ipsq_current_start(ipsq_t *, ipif_t *, int); +extern void ipsq_current_finish(ipsq_t *); extern void ipsq_enq(ipsq_t *, queue_t *, mblk_t *, ipsq_func_t, int, ill_t *); extern boolean_t ipsq_enter(ill_t *, boolean_t);
--- a/usr/src/uts/common/sys/stream.h Wed Dec 27 05:35:34 2006 -0800 +++ b/usr/src/uts/common/sys/stream.h Wed Dec 27 21:32:46 2006 -0800 @@ -361,6 +361,9 @@ */ #define DB_TCI(mp) ((mp)->b_datap->db_struioun.cksum.pad) +#define MBLK_GETLABEL(mp) \ + (DB_CRED(mp) != NULL ? crgetlabel(DB_CRED(mp)) : NULL) + /* * Message block descriptor */
--- a/usr/src/uts/common/sys/strsun.h Wed Dec 27 05:35:34 2006 -0800 +++ b/usr/src/uts/common/sys/strsun.h Wed Dec 27 21:32:46 2006 -0800 @@ -54,9 +54,6 @@ #define MBLKIN(mp, off, len) (((off) <= MBLKL(mp)) && \ (((mp)->b_rptr + (off) + (len)) <= (mp)->b_wptr)) -#define MBLK_GETLABEL(mp) \ - (DB_CRED(mp) != NULL ? crgetlabel(DB_CRED(mp)) : NULL) - #ifdef _KERNEL extern void mcopyin(mblk_t *, void *, size_t, void *); extern void mcopyout(mblk_t *, void *, size_t, void *, mblk_t *);