Mercurial > illumos > illumos-gate
changeset 4084:1838f1bb1fda
6521408 ip_rput_process_forward() declared twice in ip.c
6545032 race in NCE creation can render off-link hosts unreachable
author | sowmini |
---|---|
date | Mon, 23 Apr 2007 09:35:29 -0700 |
parents | 08c82bb85ef2 |
children | f5fd2bc7debd |
files | usr/src/uts/common/inet/ip.h usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip_ire.c usr/src/uts/common/inet/ip/ip_ndp.c usr/src/uts/common/inet/ip_ndp.h |
diffstat | 5 files changed, 73 insertions(+), 78 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/inet/ip.h Mon Apr 23 09:19:26 2007 -0700 +++ b/usr/src/uts/common/inet/ip.h Mon Apr 23 09:35:29 2007 -0700 @@ -433,19 +433,22 @@ /* * NCE_EXPIRED is TRUE when we have a non-permanent nce that was * found to be REACHABLE more than ip_ire_arp_interval ms ago. - * This macro is used to trigger re-arp of existing nce_t entries - * so that they can be updated with the latest information. - * nce's will get cleaned up (or updated with new info) in the following - * circumstances: + * This macro is used to age existing nce_t entries. The + * nce's will get cleaned up in the following circumstances: * - ip_ire_trash_reclaim will free nce's using ndp_cache_reclaim * when memory is low, * - ip_arp_news, when updates are received. - * - if the nce is NCE_EXPIRED(), it will be re-initialized to ND_INITIAL, - * so that a new arp request will be triggered. + * - if the nce is NCE_EXPIRED(), it will deleted, so that a new + * arp request will need to be triggered from an ND_INITIAL nce. + * + * Note that the nce state transition follows the pattern: + * ND_INITIAL -> ND_INCOMPLETE -> ND_REACHABLE + * after which the nce is deleted when it has expired. + * * nce_last is the timestamp that indicates when the nce_res_mp in the * nce_t was last updated to a valid link-layer address. nce_last gets * modified/updated : - * - when the nce is created or reinit-ed + * - when the nce is created * - every time we get a sane arp response for the nce. */ #define NCE_EXPIRED(nce, ipst) (nce->nce_last > 0 && \
--- a/usr/src/uts/common/inet/ip/ip.c Mon Apr 23 09:19:26 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip.c Mon Apr 23 09:35:29 2007 -0700 @@ -771,9 +771,6 @@ static void ip_rput_process_forward(queue_t *, mblk_t *, ire_t *, ipha_t *, ill_t *, boolean_t); - -static void ip_rput_process_forward(queue_t *, mblk_t *, ire_t *, - ipha_t *, ill_t *, boolean_t); ipaddr_t ip_g_all_ones = IP_HOST_MASK; /* How long, in seconds, we allow frags to hang around. */ @@ -8556,14 +8553,6 @@ */ res_mp = save_ire->ire_nce->nce_res_mp; ire_fp_mp = NULL; - /* - * save_ire's nce_fp_mp can't change since it is - * not an IRE_MIPRTUN or IRE_BROADCAST - * LOCK_IRE_FP_MP does not do any useful work in - * the case of IRE_CACHE. So we don't use it below. - */ - if (save_ire->ire_stq == dst_ill->ill_wq) - ire_fp_mp = save_ire->ire_nce->nce_fp_mp; /* * Check cached gateway IRE for any security @@ -28119,17 +28108,15 @@ &fake_ire->ire_gateway_addr : &fake_ire->ire_addr), B_FALSE)) != NULL) { /* - * cleanup: just reset nce. + * cleanup: * We check for refcnt 2 (one for the nce * hash list + 1 for the ref taken by - * ndp_lookup_v4) to ensure that there are + * ndp_lookup_v4) to check that there are * no ire's pointing at the nce. */ - if (nce->nce_refcnt == 2) { - nce = nce_reinit(nce); - } - if (nce != NULL) - NCE_REFRELE(nce); + if (nce->nce_refcnt == 2) + ndp_delete(nce); + NCE_REFRELE(nce); } freeb(mp1); /* dl_unitdata response */ freemsg(mp); /* fake ire */
--- a/usr/src/uts/common/inet/ip/ip_ire.c Mon Apr 23 09:19:26 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip_ire.c Mon Apr 23 09:35:29 2007 -0700 @@ -77,7 +77,6 @@ struct kmem_cache *rt_entry_cache; - /* * Synchronization notes: * @@ -2893,8 +2892,8 @@ * that has prohibited the addition of incomplete ire's. If this * parameter is set, and we find an nce that is in a state other * than ND_REACHABLE, we fail the add. Note that nce_state could be - * something other than ND_REACHABLE if nce_reinit has just - * kicked in and reset the nce. + * something other than ND_REACHABLE if the nce had just expired and + * the ire_create preceding the ire_add added a new ND_INITIAL nce. */ int ire_add(ire_t **irep, queue_t *q, mblk_t *mp, ipsq_func_t func, @@ -3396,15 +3395,18 @@ * if the nce is NCE_F_CONDEMNED, or if it is not ND_REACHABLE * and the caller has prohibited the addition of incomplete * ire's, we fail the add. Note that nce_state could be - * something other than ND_REACHABLE if nce_reinit has just - * kicked in and reset the nce. + * something other than ND_REACHABLE if the nce had + * just expired and the ire_create preceding the + * ire_add added a new ND_INITIAL nce. */ if ((nce == NULL) || (nce->nce_flags & NCE_F_CONDEMNED) || (!allow_unresolved && (nce->nce_state != ND_REACHABLE))) { - if (nce != NULL) + if (nce != NULL) { + DTRACE_PROBE1(ire__bad__nce, nce_t *, nce); mutex_exit(&nce->nce_lock); + } ire_atomic_end(irb_ptr, ire); mutex_exit(&ipst->ips_ndp4->ndp_g_lock); if (nce != NULL) @@ -6808,17 +6810,30 @@ nce_state = ND_REACHABLE; } else { nce_state = ND_INITIAL; - if (fp_mp) - freemsg(fp_mp); - fp_mp = NULL; + ASSERT(fp_mp == NULL); } nce_flags = 0; } +retry_nce: err = ndp_lookup_then_add(ire_ill, NULL, &addr4, &mask4, NULL, 0, nce_flags, nce_state, &arpce, fp_mp, res_mp); + if (err == EEXIST && NCE_EXPIRED(arpce, ipst)) { + /* + * We looked up an expired nce. + * Go back and try to create one again. + * The res_mp should be intact for the EEXIST case, + * i.e., there are no new references to it, and there + * is no need to copyb it. + */ + ndp_delete(arpce); + NCE_REFRELE(arpce); + arpce = NULL; + goto retry_nce; + } + ip1dbg(("ire 0x%p addr 0x%lx mask 0x%lx type 0x%x; " "found nce 0x%p err %d\n", (void *)ire, (ulong_t)addr4, (ulong_t)mask4, ire->ire_type, (void *)arpce, err)); @@ -6885,18 +6900,11 @@ */ NCE_REFHOLD_TO_REFHOLD_NOTR(ire->ire_nce); } else { - if (NCE_EXPIRED(arpce, ipst)) - arpce = nce_reinit(arpce); - if (arpce != NULL) { - /* - * We are not using this nce_t just yet so release - * the ref taken in ndp_lookup_then_add_v4() - */ - NCE_REFRELE(arpce); - } else { - ip0dbg(("can't reinit arpce for ill 0x%p;\n", - (void *)ire_ill)); - } + /* + * We are not using this nce_t just yet so release + * the ref taken in ndp_lookup_then_add_v4() + */ + NCE_REFRELE(arpce); } return (0); }
--- a/usr/src/uts/common/inet/ip/ip_ndp.c Mon Apr 23 09:19:26 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip_ndp.c Mon Apr 23 09:35:29 2007 -0700 @@ -2511,6 +2511,7 @@ uchar_t *h; uchar_t *m = flags_buf; in6_addr_t v6addr; + uint64_t now; /* * Lock the nce to protect nce_res_mp from being changed @@ -2521,9 +2522,21 @@ * In addition, make sure that the mblk has enough space * before writing to it. If is doesn't, allocate a new one. */ - if (nce->nce_ipversion == IPV4_VERSION) - /* Don't include v4 nce_ts in NDP cache entry report */ + if (nce->nce_ipversion == IPV4_VERSION) { + /* + * Don't include v4 NCEs in NDP cache entry report. + * But sanity check for lingering ND_INITIAL entries + * when we do 'ndd -get /dev/ip ip_ndp_cache_report' + */ + if (nce->nce_state == ND_INITIAL) { + + now = TICK_TO_MSEC(lbolt64); + if (now - nce->nce_init_time > 120000) { + DTRACE_PROBE1(nce__stuck, nce_t *, nce); + } + } return; + } ASSERT(ill != NULL); v6addr = nce->nce_mask; @@ -3695,8 +3708,16 @@ nce_thread_exit(nce_t *nce, caddr_t arg) { th_trace_t *th_trace; + uint64_t now; mutex_enter(&nce->nce_lock); + if (nce->nce_state == ND_INITIAL) { + + now = TICK_TO_MSEC(lbolt64); + if (now - nce->nce_init_time > 120000) { + DTRACE_PROBE1(nce__stuck, nce_t *, nce); + } + } th_trace = th_trace_nce_lookup(nce); if (th_trace == NULL) { @@ -3865,10 +3886,13 @@ nce->nce_ll_extract_start = hw_extract_start; nce->nce_fp_mp = (fp_mp? fp_mp : NULL); nce->nce_res_mp = template; - if (state == ND_REACHABLE) + if (state == ND_REACHABLE) { nce->nce_last = TICK_TO_MSEC(lbolt64); - else + } else { nce->nce_last = 0; + if (state == ND_INITIAL) + nce->nce_init_time = TICK_TO_MSEC(lbolt64); + } nce->nce_qd_mp = NULL; nce->nce_mp = mp; if (hw_addr != NULL) @@ -3928,33 +3952,6 @@ } } -nce_t * -nce_reinit(nce_t *nce) -{ - nce_t *newnce = NULL; - in_addr_t nce_addr, nce_mask; - ip_stack_t *ipst = nce->nce_ill->ill_ipst; - - IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); - IN6_V4MAPPED_TO_IPADDR(&nce->nce_mask, nce_mask); - /* - * delete the old one. this will get rid of any ire's pointing - * at this nce. - */ - ndp_delete(nce); - /* - * create a new nce with the same addr and mask. - */ - mutex_enter(&ipst->ips_ndp4->ndp_g_lock); - (void) ndp_add_v4(nce->nce_ill, NULL, &nce_addr, &nce_mask, NULL, 0, 0, - ND_INITIAL, &newnce, NULL, NULL); - mutex_exit(&ipst->ips_ndp4->ndp_g_lock); - /* - * refrele the old nce. - */ - NCE_REFRELE(nce); - return (newnce); -} /* * ndp_walk routine to delete all entries that have a given destination or
--- a/usr/src/uts/common/inet/ip_ndp.h Mon Apr 23 09:19:26 2007 -0700 +++ b/usr/src/uts/common/inet/ip_ndp.h Mon Apr 23 09:35:29 2007 -0700 @@ -77,6 +77,7 @@ uchar_t nce_ipversion; /* IPv4(ARP)/IPv6(NDP) version */ uint_t nce_defense_count; /* number of NDP conflicts */ uint_t nce_defense_time; /* last time defended (secs) */ + uint64_t nce_init_time; /* time when it was set to ND_INITIAL */ #ifdef NCE_DEBUG th_trace_t *nce_trace[IP_TR_HASH_MAX]; boolean_t nce_trace_disable; /* True when alloc fails */ @@ -331,7 +332,6 @@ boolean_t (*)(nce_t *, void *), void *); extern void nce_queue_mp_common(nce_t *, mblk_t *, boolean_t); extern void ndp_flush_qd_mp(nce_t *); -extern nce_t *nce_reinit(nce_t *); extern void nce_delete_hw_changed(nce_t *, void *); extern void nce_fastpath(nce_t *);