# HG changeset patch # User dr146992 # Date 1189818853 25200 # Node ID 7a15930aae3c1a90f38ac21fac16aab2d4248f8e # Parent bd0f88647368c3ea74a1141b67abcccbdcd3c1ff 6588495 IP can use the wrong interface for filtering/qos 6599516 locking in fr_natderef causes lock contention and performance drop diff -r bd0f88647368 -r 7a15930aae3c usr/src/uts/common/inet/ip/ip.c --- a/usr/src/uts/common/inet/ip/ip.c Fri Sep 14 16:10:56 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip.c Fri Sep 14 18:14:13 2007 -0700 @@ -13035,7 +13035,7 @@ /* IP options present */ if (u1) { goto ipoptions; - } else { + } else if (!mctl_present) { /* Check the IP header checksum. */ if (IS_IP_HDR_HWCKSUM(mctl_present, mp, ill)) { /* Clear the IP header h/w cksum flag */ @@ -16435,9 +16435,8 @@ } /* Get the ill_index of the outgoing ILL */ - ill_index = ire->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; - - out_ill = ire->ire_ipif->ipif_ill; + out_ill = ire_to_ill(ire); + ill_index = out_ill->ill_phyint->phyint_ifindex; DTRACE_PROBE4(ip4__forwarding__start, ill_t *, in_ill, ill_t *, out_ill, ipha_t *, ipha, mblk_t *, mp); @@ -23141,7 +23140,7 @@ } } - out_ill = ire->ire_ipif->ipif_ill; + out_ill = ire_to_ill(ire); DTRACE_PROBE4(ip4__physical__out__start, ill_t *, NULL, ill_t *, out_ill, @@ -23341,7 +23340,7 @@ * is considered to be the "output" side and * ip_wput_local to be the "input" side. */ - out_ill = ire->ire_ipif->ipif_ill; + out_ill = ire_to_ill(ire); DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, ill_t *, out_ill, @@ -23367,7 +23366,7 @@ return; } - out_ill = ire->ire_ipif->ipif_ill; + out_ill = ire_to_ill(ire); DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, ill_t *, out_ill, @@ -25680,7 +25679,7 @@ ASSERT(q != NULL); /* PFHooks: LOOPBACK_OUT */ - out_ill = ire->ire_ipif->ipif_ill; + out_ill = ire_to_ill(ire); DTRACE_PROBE4(ip6__loopback__out__start, ill_t *, NULL, ill_t *, out_ill, @@ -26016,7 +26015,7 @@ ipha->ipha_src = ire->ire_src_addr; /* PFHooks: LOOPBACK_OUT */ - out_ill = ire->ire_ipif->ipif_ill; + out_ill = ire_to_ill(ire); DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, ill_t *, out_ill, @@ -29793,7 +29792,7 @@ mp->b_prev = NULL; /* set up ill index for outbound qos processing */ - out_ill = ire->ire_ipif->ipif_ill; + out_ill = ire_to_ill(ire); ill_index = out_ill->ill_phyint->phyint_ifindex; first_mp = ip_wput_attach_llhdr(mp, ire, proc, ill_index); diff -r bd0f88647368 -r 7a15930aae3c usr/src/uts/common/inet/ipf/ip_nat.c --- a/usr/src/uts/common/inet/ipf/ip_nat.c Fri Sep 14 16:10:56 2007 -0700 +++ b/usr/src/uts/common/inet/ipf/ip_nat.c Fri Sep 14 18:14:13 2007 -0700 @@ -1587,8 +1587,6 @@ if (logtype != 0 && ifs->ifs_nat_logging != 0) nat_log(nat, logtype, ifs); - MUTEX_ENTER(&ifs->ifs_ipf_nat_new); - /* * Take it as a general indication that all the pointers are set if * nat_pnext is set. @@ -1629,11 +1627,18 @@ fr_deletequeueentry(&nat->nat_tqe); - nat->nat_ref--; - if (nat->nat_ref > 0) { - MUTEX_EXIT(&ifs->ifs_ipf_nat_new); + MUTEX_ENTER(&nat->nat_lock); + if (nat->nat_ref > 1) { + nat->nat_ref--; + MUTEX_EXIT(&nat->nat_lock); return; } + MUTEX_EXIT(&nat->nat_lock); + + /* + * At this point, nat_ref is 1, doing "--" would make it 0.. + */ + nat->nat_ref = 0; #ifdef IPFILTER_SYNC if (nat->nat_sync) @@ -1667,7 +1672,6 @@ aps_free(nat->nat_aps, ifs); ifs->ifs_nat_stats.ns_inuse--; - MUTEX_EXIT(&ifs->ifs_ipf_nat_new); /* * If there's a fragment table entry too for this nat entry, then @@ -4746,6 +4750,14 @@ /* */ /* Decrement the reference counter for this NAT table entry and free it if */ /* there are no more things using it. */ +/* */ +/* IF nat_ref == 1 when this function is called, then we have an orphan nat */ +/* structure *because* it only gets called on paths _after_ nat_ref has been*/ +/* incremented. If nat_ref == 1 then we shouldn't decrement it here */ +/* because nat_delete() will do that and send nat_ref to -1. */ +/* */ +/* Holding the lock on nat_lock is required to serialise nat_delete() being */ +/* called from a NAT flush ioctl with a deref happening because of a packet.*/ /* ------------------------------------------------------------------------ */ void fr_natderef(natp, ifs) nat_t **natp; @@ -4755,10 +4767,17 @@ nat = *natp; *natp = NULL; + + MUTEX_ENTER(&nat->nat_lock); + if (nat->nat_ref > 1) { + nat->nat_ref--; + MUTEX_EXIT(&nat->nat_lock); + return; + } + MUTEX_EXIT(&nat->nat_lock); + WRITE_ENTER(&ifs->ifs_ipf_nat); - nat->nat_ref--; - if (nat->nat_ref == 0) - nat_delete(nat, NL_EXPIRE, ifs); + nat_delete(nat, NL_EXPIRE, ifs); RWLOCK_EXIT(&ifs->ifs_ipf_nat); } diff -r bd0f88647368 -r 7a15930aae3c usr/src/uts/common/inet/ipf/ip_state.c --- a/usr/src/uts/common/inet/ipf/ip_state.c Fri Sep 14 16:10:56 2007 -0700 +++ b/usr/src/uts/common/inet/ipf/ip_state.c Fri Sep 14 18:14:13 2007 -0700 @@ -2868,9 +2868,15 @@ * If it is still in use by something else, do not go any further, * but note that at this point it is now an orphan. */ - is->is_ref--; - if (is->is_ref > 0) + MUTEX_ENTER(&is->is_lock); + if (is->is_ref > 1) { + is->is_ref--; + MUTEX_EXIT(&is->is_lock); return; + } + MUTEX_EXIT(&is->is_lock); + + is->is_ref = 0; if (is->is_tqehead[0] != NULL) { if (fr_deletetimeoutqueue(is->is_tqehead[0]) == 0) @@ -3823,24 +3829,23 @@ fin = fin; /* LINT */ is = *isp; *isp = NULL; - WRITE_ENTER(&ifs->ifs_ipf_state); - is->is_ref--; - if (is->is_ref == 0) { - is->is_ref++; /* To counter ref-- in fr_delstate() */ - fr_delstate(is, ISL_EXPIRE, ifs); + + MUTEX_ENTER(&is->is_lock); + if (is->is_ref > 1) { + is->is_ref--; + MUTEX_EXIT(&is->is_lock); #ifndef _KERNEL -#if 0 - } else if (((fin->fin_out == 1) || (eol == 1)) && - ((ostate == IPF_TCPS_LAST_ACK) && - (nstate == IPF_TCPS_TIME_WAIT))) { - ; -#else - } else if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || + if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { + fr_delstate(is, ISL_ORPHAN, ifs); + } #endif - fr_delstate(is, ISL_ORPHAN, ifs); -#endif + return; } + MUTEX_EXIT(&is->is_lock); + + WRITE_ENTER(&ifs->ifs_ipf_state); + fr_delstate(is, ISL_EXPIRE, ifs); RWLOCK_EXIT(&ifs->ifs_ipf_state); }