# HG changeset patch # User carlsonj # Date 1189509966 25200 # Node ID e0c678e511a7ef109e134e75531cb91148fa736d # Parent 7b32314731c617f35c5be59c81421c8c35c66eba 6203568 accumulating number of threads behind rw_lock in ire_walk_ill_tables IRB_REFRELE 6591083 IP instances sis_check added stray entries to sparc ip module makefile diff -r 7b32314731c6 -r e0c678e511a7 usr/src/cmd/mdb/common/modules/ip/ip.c --- a/usr/src/cmd/mdb/common/modules/ip/ip.c Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/cmd/mdb/common/modules/ip/ip.c Tue Sep 11 04:26:06 2007 -0700 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -73,6 +74,14 @@ ill_if_t ill_if; } illif_walk_data_t; +typedef struct th_walk_data { + uint_t thw_non_zero_only; + boolean_t thw_match; + uintptr_t thw_matchkey; + uintptr_t thw_ipst; + clock_t thw_lbolt; +} th_walk_data_t; + static int iphdr(uintptr_t, uint_t, int, const mdb_arg_t *); static int ip6hdr(uintptr_t, uint_t, int, const mdb_arg_t *); @@ -130,6 +139,44 @@ return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata)); } +int +th_hash_walk_init(mdb_walk_state_t *wsp) +{ + GElf_Sym sym; + list_node_t *next; + + if (wsp->walk_addr == NULL) { + if (mdb_lookup_by_obj("ip", "ip_thread_list", &sym) == 0) { + wsp->walk_addr = sym.st_value; + } else { + mdb_warn("unable to locate ip_thread_list\n"); + return (WALK_ERR); + } + } + + if (mdb_vread(&next, sizeof (next), + wsp->walk_addr + offsetof(list_t, list_head) + + offsetof(list_node_t, list_next)) == -1 || + next == NULL) { + mdb_warn("non-DEBUG image; cannot walk th_hash list\n"); + return (WALK_ERR); + } + + if (mdb_layered_walk("list", wsp) == -1) { + mdb_warn("can't walk 'list'"); + return (WALK_ERR); + } else { + return (WALK_NEXT); + } +} + +int +th_hash_walk_step(mdb_walk_state_t *wsp) +{ + return (wsp->walk_callback(wsp->walk_addr, wsp->walk_layer, + wsp->walk_cbdata)); +} + /* * Called with walk_addr being the address of ips_ill_g_heads */ @@ -266,7 +313,7 @@ #endif if (mdb_pwalk("illif_stack", wsp->walk_callback, - wsp->walk_cbdata, kaddr) == -1) { + wsp->walk_cbdata, kaddr) == -1) { mdb_warn("couldn't walk 'illif_stack' for ips_ill_g_heads %p", kaddr); return (WALK_ERR); @@ -396,7 +443,7 @@ } #ifdef DEBUG mdb_printf("DEBUG: ire_ctable_walk_step: ips_ip_cache_table_size %u\n", - cache_table_size); + cache_table_size); #endif kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table); @@ -422,7 +469,7 @@ #endif if (mdb_pwalk("ire_next", (mdb_walk_cb_t)ire_format, &verbose, - kaddr) == -1) { + kaddr) == -1) { mdb_warn("can't walk 'ire_next' for ire %p", kaddr); return (WALK_ERR); } @@ -1048,6 +1095,120 @@ mdb_printf("\t-v\tbe verbose (more descriptive)\n"); } +/* + * This is called by ::th_trace (via a callback) when walking the th_hash + * list. It calls modent to find the entries. + */ +/* ARGSUSED */ +static int +modent_summary(uintptr_t addr, const void *data, void *private) +{ + th_walk_data_t *thw = private; + const struct mod_hash_entry *mhe = data; + th_trace_t th; + + if (mdb_vread(&th, sizeof (th), (uintptr_t)mhe->mhe_val) == -1) { + mdb_warn("failed to read th_trace_t %p", mhe->mhe_val); + return (WALK_ERR); + } + + if (th.th_refcnt == 0 && thw->thw_non_zero_only) + return (WALK_NEXT); + + if (!thw->thw_match) { + mdb_printf("%?p %?p %?p %8d %?p\n", thw->thw_ipst, mhe->mhe_key, + mhe->mhe_val, th.th_refcnt, th.th_id); + } else if (thw->thw_matchkey == (uintptr_t)mhe->mhe_key) { + int i, j, k; + tr_buf_t *tr; + + mdb_printf("Object %p in IP stack %p:\n", mhe->mhe_key, + thw->thw_ipst); + i = th.th_trace_lastref; + mdb_printf("\tThread %p refcnt %d:\n", th.th_id, + th.th_refcnt); + for (j = TR_BUF_MAX; j > 0; j--) { + tr = th.th_trbuf + i; + if (tr->tr_depth == 0 || tr->tr_depth > TR_STACK_DEPTH) + break; + mdb_printf("\t T%+ld:\n", tr->tr_time - + thw->thw_lbolt); + for (k = 0; k < tr->tr_depth; k++) + mdb_printf("\t\t%a\n", tr->tr_stack[k]); + if (--i < 0) + i = TR_BUF_MAX - 1; + } + } + return (WALK_NEXT); +} + +/* + * This is called by ::th_trace (via a callback) when walking the th_hash + * list. It calls modent to find the entries. + */ +/* ARGSUSED */ +static int +th_hash_summary(uintptr_t addr, const void *data, void *private) +{ + const th_hash_t *thh = data; + th_walk_data_t *thw = private; + + thw->thw_ipst = (uintptr_t)thh->thh_ipst; + return (mdb_pwalk("modent", modent_summary, private, + (uintptr_t)thh->thh_hash)); +} + +/* + * Print or summarize the th_trace_t structures. + */ +static int +th_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + th_walk_data_t thw; + + (void) memset(&thw, 0, sizeof (thw)); + + if (mdb_getopts(argc, argv, + 'n', MDB_OPT_SETBITS, TRUE, &thw.thw_non_zero_only, + NULL) != argc) + return (DCMD_USAGE); + + if (!(flags & DCMD_ADDRSPEC)) { + /* + * No address specified. Walk all of the th_hash_t in the + * system, and summarize the th_trace_t entries in each. + */ + mdb_printf("%?s %?s %?s %8s %?s\n", + "IPSTACK", "OBJECT", "TRACE", "REFCNT", "THREAD"); + thw.thw_match = B_FALSE; + } else { + thw.thw_match = B_TRUE; + thw.thw_matchkey = addr; + if (mdb_readvar(&thw.thw_lbolt, + mdb_prop_postmortem ? "panic_lbolt" : "lbolt") == -1) { + mdb_warn("failed to read lbolt"); + return (DCMD_ERR); + } + } + if (mdb_pwalk("th_hash", th_hash_summary, &thw, NULL) == -1) { + mdb_warn("can't walk th_hash entries"); + return (DCMD_ERR); + } + return (DCMD_OK); +} + +static void +th_trace_help(void) +{ + mdb_printf("If given an address of an ill_t, ipif_t, ire_t, or nce_t, " + "print the\n" + "corresponding th_trace_t structure in detail. Otherwise, if no " + "address is\n" + "given, then summarize all th_trace_t structures.\n\n"); + mdb_printf("Options:\n" + "\t-n\tdisplay only entries with non-zero th_refcnt\n"); +} + static const mdb_dcmd_t dcmds[] = { { "illif", "?[-P v4 | v6]", "display or filter IP Lower Level InterFace structures", illif, @@ -1060,6 +1221,8 @@ { "tcphdr", ":", "display a TCP header", tcphdr }, { "udphdr", ":", "display an UDP header", udphdr }, { "sctphdr", ":", "display an SCTP header", sctphdr }, + { "th_trace", "?[-n]", "display th_trace_t structures", th_trace, + th_trace_help }, { NULL } }; @@ -1077,6 +1240,8 @@ ire_next_walk_init, ire_next_walk_step, NULL }, { "ip_stacks", "walk all the ip_stack_t", ip_stacks_walk_init, ip_stacks_walk_step, NULL }, + { "th_hash", "walk all the th_hash_t entries", + th_hash_walk_init, th_hash_walk_step, NULL }, { NULL } }; diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/disp/thread.c --- a/usr/src/uts/common/disp/thread.c Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/disp/thread.c Tue Sep 11 04:26:06 2007 -0700 @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -43,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -75,8 +73,6 @@ #include #include #include -#include -#include struct kmem_cache *thread_cache; /* cache of free threads */ struct kmem_cache *lwp_cache; /* cache of free lwps */ @@ -561,23 +557,14 @@ } } -/* - * This is a function which is called from thread_exit - * that can be used to debug reference count issues in IP. - */ -void (*ip_cleanup_func)(void); - void -thread_exit() +thread_exit(void) { kthread_t *t = curthread; if ((t->t_proc_flag & TP_ZTHREAD) != 0) cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called"); - if (ip_cleanup_func != NULL) - (*ip_cleanup_func)(); - tsd_exit(); /* Clean up this thread's TSD */ kcpc_passivate(); /* clean up performance counter state */ diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ip.h --- a/usr/src/uts/common/inet/ip.h Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ip.h Tue Sep 11 04:26:06 2007 -0700 @@ -58,11 +58,9 @@ #include #include #include +#include #ifdef DEBUG -#define ILL_DEBUG -#define IRE_DEBUG -#define NCE_DEBUG #define CONN_DEBUG #endif @@ -1235,21 +1233,26 @@ * do not allow the granularity need to trace refrences to ipif/ill/ire's. This * mechanism should be revisited once dtrace is available. */ -#define IP_STACK_DEPTH 15 +#define TR_STACK_DEPTH 14 typedef struct tr_buf_s { int tr_depth; - pc_t tr_stack[IP_STACK_DEPTH]; + clock_t tr_time; + pc_t tr_stack[TR_STACK_DEPTH]; } tr_buf_t; typedef struct th_trace_s { - struct th_trace_s *th_next; - struct th_trace_s **th_prev; + int th_refcnt; + uint_t th_trace_lastref; kthread_t *th_id; - int th_refcnt; - uint_t th_trace_lastref; #define TR_BUF_MAX 38 - tr_buf_t th_trbuf[TR_BUF_MAX]; + tr_buf_t th_trbuf[TR_BUF_MAX]; } th_trace_t; + +typedef struct th_hash_s { + list_node_t thh_link; + mod_hash_t *thh_hash; + ip_stack_t *thh_ipst; +} th_hash_t; #endif /* The following are ipif_state_flags */ @@ -1316,11 +1319,7 @@ zoneid_t ipif_zoneid; /* zone ID number */ timeout_id_t ipif_recovery_id; /* Timer for DAD recovery */ -#ifdef ILL_DEBUG -#define IP_TR_HASH_MAX 64 - th_trace_t *ipif_trace[IP_TR_HASH_MAX]; - boolean_t ipif_trace_disable; /* True when alloc fails */ -#endif + boolean_t ipif_trace_disable; /* True when alloc fails */ } ipif_t; /* @@ -1380,20 +1379,16 @@ #define IP_TR_HASH(tid) ((((uintptr_t)tid) >> 6) & (IP_TR_HASH_MAX - 1)) -#ifdef ILL_DEBUG +#ifdef DEBUG #define IPIF_TRACE_REF(ipif) ipif_trace_ref(ipif) #define ILL_TRACE_REF(ill) ill_trace_ref(ill) #define IPIF_UNTRACE_REF(ipif) ipif_untrace_ref(ipif) #define ILL_UNTRACE_REF(ill) ill_untrace_ref(ill) -#define ILL_TRACE_CLEANUP(ill) ill_trace_cleanup(ill) -#define IPIF_TRACE_CLEANUP(ipif) ipif_trace_cleanup(ipif) #else #define IPIF_TRACE_REF(ipif) #define ILL_TRACE_REF(ill) #define IPIF_UNTRACE_REF(ipif) #define ILL_UNTRACE_REF(ill) -#define ILL_TRACE_CLEANUP(ill) -#define IPIF_TRACE_CLEANUP(ipif) #endif /* IPv4 compatability macros */ @@ -1462,12 +1457,13 @@ boolean_t ipsq_split; /* ipsq may need to be split */ int ipsq_waitfor; /* Values encoded below */ char ipsq_name[LIFNAMSIZ+1]; /* same as phyint_groupname */ - -#ifdef ILL_DEBUG + ip_stack_t *ipsq_ipst; /* Does not have a netstack_hold */ + +#ifdef DEBUG int ipsq_depth; /* debugging aid */ - pc_t ipsq_stack[IP_STACK_DEPTH]; /* debugging aid */ +#define IPSQ_STACK_DEPTH 15 + pc_t ipsq_stack[IPSQ_STACK_DEPTH]; /* debugging aid */ #endif - ip_stack_t *ipsq_ipst; /* Does not have a netstack_hold */ } ipsq_t; /* ipsq_flags */ @@ -1968,10 +1964,7 @@ t_uscalar_t ill_dlpi_pending; /* Last DLPI primitive issued */ uint_t ill_usesrc_ifindex; /* use src addr from this ILL */ struct ill_s *ill_usesrc_grp_next; /* Next ILL in the usesrc group */ -#ifdef ILL_DEBUG - th_trace_t *ill_trace[IP_TR_HASH_MAX]; boolean_t ill_trace_disable; /* True when alloc fails */ -#endif zoneid_t ill_zoneid; ip_stack_t *ill_ipst; /* Corresponds to a netstack_hold */ } ill_t; @@ -2319,26 +2312,21 @@ * holding the lock. Currently ip_wput does this for caching IRE_CACHEs. */ -#ifndef IRE_DEBUG - -#define IRE_REFHOLD_NOTR(ire) IRE_REFHOLD(ire) +#ifdef DEBUG +#define IRE_UNTRACE_REF(ire) ire_untrace_ref(ire); +#define IRE_TRACE_REF(ire) ire_trace_ref(ire); +#else #define IRE_UNTRACE_REF(ire) #define IRE_TRACE_REF(ire) - -#else +#endif #define IRE_REFHOLD_NOTR(ire) { \ atomic_add_32(&(ire)->ire_refcnt, 1); \ ASSERT((ire)->ire_refcnt != 0); \ } -#define IRE_UNTRACE_REF(ire) ire_untrace_ref(ire); -#define IRE_TRACE_REF(ire) ire_trace_ref(ire); -#endif - #define IRE_REFHOLD(ire) { \ - atomic_add_32(&(ire)->ire_refcnt, 1); \ - ASSERT((ire)->ire_refcnt != 0); \ + IRE_REFHOLD_NOTR(ire); \ IRE_TRACE_REF(ire); \ } @@ -2358,30 +2346,19 @@ * To avoid bloating the code, we use the function "ire_refrele" * which essentially calls the macro. */ -#ifndef IRE_DEBUG -#define IRE_REFRELE(ire) { \ +#define IRE_REFRELE_NOTR(ire) { \ ASSERT((ire)->ire_refcnt != 0); \ membar_exit(); \ if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0) \ ire_inactive(ire); \ } -#define IRE_REFRELE_NOTR(ire) IRE_REFRELE(ire) -#else + #define IRE_REFRELE(ire) { \ - if (ire->ire_bucket != NULL) \ - ire_untrace_ref(ire); \ - ASSERT((ire)->ire_refcnt != 0); \ - membar_exit(); \ - if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0) \ - ire_inactive(ire); \ + if (ire->ire_bucket != NULL) { \ + IRE_UNTRACE_REF(ire); \ + } \ + IRE_REFRELE_NOTR(ire); \ } -#define IRE_REFRELE_NOTR(ire) { \ - ASSERT((ire)->ire_refcnt != 0); \ - membar_exit(); \ - if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0) \ - ire_inactive(ire); \ -} -#endif /* * Bump up the reference count on the hash bucket - IRB to @@ -2517,11 +2494,8 @@ uint_t ire_stq_ifindex; uint_t ire_defense_count; /* number of ARP conflicts */ uint_t ire_defense_time; /* last time defended (secs) */ + boolean_t ire_trace_disable; /* True when alloc fails */ ip_stack_t *ire_ipst; /* Does not have a netstack_hold */ -#ifdef IRE_DEBUG - th_trace_t *ire_trace[IP_TR_HASH_MAX]; - boolean_t ire_trace_disable; /* True when alloc fails */ -#endif } ire_t; /* IPv4 compatiblity macros */ @@ -3067,6 +3041,9 @@ #endif extern int ip_debug; +extern uint_t ip_thread_data; +extern krwlock_t ip_thread_rwlock; +extern list_t ip_thread_list; #ifdef IP_DEBUG #include @@ -3195,14 +3172,13 @@ extern void ire_inactive(ire_t *); extern boolean_t irb_inactive(irb_t *); extern ire_t *ire_unlink(irb_t *); -#ifdef IRE_DEBUG -extern void ire_trace_ref(ire_t *ire); -extern void ire_untrace_ref(ire_t *ire); -extern void ire_thread_exit(ire_t *ire, caddr_t); -#endif -#ifdef ILL_DEBUG -extern void ill_trace_cleanup(ill_t *); -extern void ipif_trace_cleanup(ipif_t *); + +#ifdef DEBUG +extern boolean_t th_trace_ref(const void *, ip_stack_t *); +extern void th_trace_unref(const void *); +extern void th_trace_cleanup(const void *, boolean_t); +extern void ire_trace_ref(ire_t *); +extern void ire_untrace_ref(ire_t *); #endif extern int ip_srcid_insert(const in6_addr_t *, zoneid_t, ip_stack_t *); diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ip/ip.c --- a/usr/src/uts/common/inet/ip/ip.c Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip.c Tue Sep 11 04:26:06 2007 -0700 @@ -154,6 +154,15 @@ int ip_modclose_ackwait_ms = 3000; /* + * It would be nice to have these present only in DEBUG systems, but the + * current design of the global symbol checking logic requires them to be + * unconditionally present. + */ +uint_t ip_thread_data; /* TSD key for debug support */ +krwlock_t ip_thread_rwlock; +list_t ip_thread_list; + +/* * Structure to represent a linked list of msgblks. Used by ip_snmp_ functions. */ @@ -5757,6 +5766,12 @@ ip_ire_g_fini(); inet_minor_destroy(ip_minor_arena); +#ifdef DEBUG + list_destroy(&ip_thread_list); + rw_destroy(&ip_thread_rwlock); + tsd_destroy(&ip_thread_data); +#endif + netstack_unregister(NS_IP); } @@ -5888,6 +5903,23 @@ } /* + * This function is called from the TSD destructor, and is used to debug + * reference count issues in IP. See block comment in for + * details. + */ +static void +ip_thread_exit(void *phash) +{ + th_hash_t *thh = phash; + + rw_enter(&ip_thread_rwlock, RW_WRITER); + list_remove(&ip_thread_list, thh); + rw_exit(&ip_thread_rwlock); + mod_hash_destroy_hash(thh->thh_hash); + kmem_free(thh, sizeof (*thh)); +} + +/* * Called when the IP kernel module is loaded into the kernel */ void @@ -5916,9 +5948,11 @@ ip_ire_g_init(); ip_net_g_init(); -#ifdef ILL_DEBUG - /* Default cleanup function */ - ip_cleanup_func = ip_thread_exit; +#ifdef DEBUG + tsd_create(&ip_thread_data, ip_thread_exit); + rw_init(&ip_thread_rwlock, NULL, RW_DEFAULT, NULL); + list_create(&ip_thread_list, sizeof (th_hash_t), + offsetof(th_hash_t, thh_link)); #endif /* diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ip/ip_if.c --- a/usr/src/uts/common/inet/ip/ip_if.c Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip_if.c Tue Sep 11 04:26:06 2007 -0700 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -255,6 +256,11 @@ static void conn_cleanup_stale_ire(conn_t *, caddr_t); +#ifdef DEBUG +static void ill_trace_cleanup(const ill_t *); +static void ipif_trace_cleanup(const ipif_t *); +#endif + /* * if we go over the memory footprint limit more than once in this msec * interval, we'll start pruning aggressively. @@ -937,11 +943,14 @@ } while (mpp++ != &ill->ill_last_mp_to_free); ill_free_mib(ill); + +#ifdef DEBUG + ill_trace_cleanup(ill); +#endif + /* Drop refcnt here */ netstack_rele(ill->ill_ipst->ips_netstack); ill->ill_ipst = NULL; - - ILL_TRACE_CLEANUP(ill); } static void @@ -4654,8 +4663,9 @@ ipsq->ipsq_writer = curthread; ipsq->ipsq_reentry_cnt = 1; ipsq->ipsq_ipst = ill->ill_ipst; /* No netstack_hold */ -#ifdef ILL_DEBUG - ipsq->ipsq_depth = getpcstack((pc_t *)ipsq->ipsq_stack, IP_STACK_DEPTH); +#ifdef DEBUG + ipsq->ipsq_depth = getpcstack((pc_t *)ipsq->ipsq_stack, + IPSQ_STACK_DEPTH); #endif (void) strcpy(ipsq->ipsq_name, ill->ill_name); return (B_TRUE); @@ -5035,7 +5045,7 @@ ill->ill_phyint->phyint_ipsq->ipsq_writer = NULL; ill->ill_phyint->phyint_ipsq->ipsq_reentry_cnt--; ASSERT(ill->ill_phyint->phyint_ipsq->ipsq_reentry_cnt == 0); -#ifdef ILL_DEBUG +#ifdef DEBUG ill->ill_phyint->phyint_ipsq->ipsq_depth = 0; #endif ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE); @@ -6441,9 +6451,9 @@ } } -#ifdef ILL_DEBUG +#ifdef DEBUG /* Reuse trace buffer from beginning (if reached the end) and record trace */ -void +static void th_trace_rrecord(th_trace_t *th_trace) { tr_buf_t *tr_buf; @@ -6455,301 +6465,226 @@ lastref = 0; th_trace->th_trace_lastref = lastref; tr_buf = &th_trace->th_trbuf[lastref]; - tr_buf->tr_depth = getpcstack(tr_buf->tr_stack, IP_STACK_DEPTH); -} - -th_trace_t * -th_trace_ipif_lookup(ipif_t *ipif) -{ - int bucket_id; - th_trace_t *th_trace; - - ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock)); - - bucket_id = IP_TR_HASH(curthread); - ASSERT(bucket_id < IP_TR_HASH_MAX); - - for (th_trace = ipif->ipif_trace[bucket_id]; th_trace != NULL; - th_trace = th_trace->th_next) { - if (th_trace->th_id == curthread) - return (th_trace); - } - return (NULL); -} - -void -ipif_trace_ref(ipif_t *ipif) -{ - int bucket_id; - th_trace_t *th_trace; - - ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock)); - - if (ipif->ipif_trace_disable) - return; - - /* - * Attempt to locate the trace buffer for the curthread. - * If it does not exist, then allocate a new trace buffer - * and link it in list of trace bufs for this ipif, at the head - */ - th_trace = th_trace_ipif_lookup(ipif); - if (th_trace == NULL) { - bucket_id = IP_TR_HASH(curthread); - th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), - KM_NOSLEEP); - if (th_trace == NULL) { - ipif->ipif_trace_disable = B_TRUE; - ipif_trace_cleanup(ipif); - return; - } - th_trace->th_id = curthread; - th_trace->th_next = ipif->ipif_trace[bucket_id]; - th_trace->th_prev = &ipif->ipif_trace[bucket_id]; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = &th_trace->th_next; - ipif->ipif_trace[bucket_id] = th_trace; - } - ASSERT(th_trace->th_refcnt >= 0 && - th_trace->th_refcnt < TR_BUF_MAX -1); - th_trace->th_refcnt++; - th_trace_rrecord(th_trace); -} - -void -ipif_untrace_ref(ipif_t *ipif) + tr_buf->tr_time = lbolt; + tr_buf->tr_depth = getpcstack(tr_buf->tr_stack, TR_STACK_DEPTH); +} + +static void +th_trace_free(void *value) +{ + th_trace_t *th_trace = value; + + ASSERT(th_trace->th_refcnt == 0); + kmem_free(th_trace, sizeof (*th_trace)); +} + +/* + * Find or create the per-thread hash table used to track object references. + * The ipst argument is NULL if we shouldn't allocate. + * + * Accesses per-thread data, so there's no need to lock here. + */ +static mod_hash_t * +th_trace_gethash(ip_stack_t *ipst) +{ + th_hash_t *thh; + + if ((thh = tsd_get(ip_thread_data)) == NULL && ipst != NULL) { + mod_hash_t *mh; + char name[256]; + size_t objsize, rshift; + int retv; + + if ((thh = kmem_alloc(sizeof (*thh), KM_NOSLEEP)) == NULL) + return (NULL); + (void) snprintf(name, sizeof (name), "th_trace_%p", curthread); + + /* + * We use mod_hash_create_extended here rather than the more + * obvious mod_hash_create_ptrhash because the latter has a + * hard-coded KM_SLEEP, and we'd prefer to fail rather than + * block. + */ + objsize = MAX(MAX(sizeof (ill_t), sizeof (ipif_t)), + MAX(sizeof (ire_t), sizeof (nce_t))); + rshift = highbit(objsize); + mh = mod_hash_create_extended(name, 64, mod_hash_null_keydtor, + th_trace_free, mod_hash_byptr, (void *)rshift, + mod_hash_ptrkey_cmp, KM_NOSLEEP); + if (mh == NULL) { + kmem_free(thh, sizeof (*thh)); + return (NULL); + } + thh->thh_hash = mh; + thh->thh_ipst = ipst; + /* + * We trace ills, ipifs, ires, and nces. All of these are + * per-IP-stack, so the lock on the thread list is as well. + */ + rw_enter(&ip_thread_rwlock, RW_WRITER); + list_insert_tail(&ip_thread_list, thh); + rw_exit(&ip_thread_rwlock); + retv = tsd_set(ip_thread_data, thh); + ASSERT(retv == 0); + } + return (thh != NULL ? thh->thh_hash : NULL); +} + +boolean_t +th_trace_ref(const void *obj, ip_stack_t *ipst) { th_trace_t *th_trace; - - ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock)); - - if (ipif->ipif_trace_disable) - return; - th_trace = th_trace_ipif_lookup(ipif); - ASSERT(th_trace != NULL); - ASSERT(th_trace->th_refcnt > 0); - - th_trace->th_refcnt--; - th_trace_rrecord(th_trace); -} - -th_trace_t * -th_trace_ill_lookup(ill_t *ill) -{ - th_trace_t *th_trace; - int bucket_id; - - ASSERT(MUTEX_HELD(&ill->ill_lock)); - - bucket_id = IP_TR_HASH(curthread); - ASSERT(bucket_id < IP_TR_HASH_MAX); - - for (th_trace = ill->ill_trace[bucket_id]; th_trace != NULL; - th_trace = th_trace->th_next) { - if (th_trace->th_id == curthread) - return (th_trace); - } - return (NULL); -} - -void -ill_trace_ref(ill_t *ill) -{ - int bucket_id; - th_trace_t *th_trace; - - ASSERT(MUTEX_HELD(&ill->ill_lock)); - if (ill->ill_trace_disable) - return; - /* - * Attempt to locate the trace buffer for the curthread. - * If it does not exist, then allocate a new trace buffer - * and link it in list of trace bufs for this ill, at the head - */ - th_trace = th_trace_ill_lookup(ill); - if (th_trace == NULL) { - bucket_id = IP_TR_HASH(curthread); - th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), - KM_NOSLEEP); - if (th_trace == NULL) { - ill->ill_trace_disable = B_TRUE; - ill_trace_cleanup(ill); - return; - } + mod_hash_t *mh; + mod_hash_val_t val; + + if ((mh = th_trace_gethash(ipst)) == NULL) + return (B_FALSE); + + /* + * Attempt to locate the trace buffer for this obj and thread. + * If it does not exist, then allocate a new trace buffer and + * insert into the hash. + */ + if (mod_hash_find(mh, (mod_hash_key_t)obj, &val) == MH_ERR_NOTFOUND) { + th_trace = kmem_zalloc(sizeof (th_trace_t), KM_NOSLEEP); + if (th_trace == NULL) + return (B_FALSE); + th_trace->th_id = curthread; - th_trace->th_next = ill->ill_trace[bucket_id]; - th_trace->th_prev = &ill->ill_trace[bucket_id]; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = &th_trace->th_next; - ill->ill_trace[bucket_id] = th_trace; - } + if (mod_hash_insert(mh, (mod_hash_key_t)obj, + (mod_hash_val_t)th_trace) != 0) { + kmem_free(th_trace, sizeof (th_trace_t)); + return (B_FALSE); + } + } else { + th_trace = (th_trace_t *)val; + } + ASSERT(th_trace->th_refcnt >= 0 && th_trace->th_refcnt < TR_BUF_MAX - 1); th_trace->th_refcnt++; th_trace_rrecord(th_trace); + return (B_TRUE); +} + +/* + * For the purpose of tracing a reference release, we assume that global + * tracing is always on and that the same thread initiated the reference hold + * is releasing. + */ +void +th_trace_unref(const void *obj) +{ + int retv; + mod_hash_t *mh; + th_trace_t *th_trace; + mod_hash_val_t val; + + mh = th_trace_gethash(NULL); + retv = mod_hash_find(mh, (mod_hash_key_t)obj, &val); + ASSERT(retv == 0); + th_trace = (th_trace_t *)val; + + ASSERT(th_trace->th_refcnt > 0); + th_trace->th_refcnt--; + th_trace_rrecord(th_trace); +} + +/* + * If tracing has been disabled, then we assume that the reference counts are + * now useless, and we clear them out before destroying the entries. + */ +void +th_trace_cleanup(const void *obj, boolean_t trace_disable) +{ + th_hash_t *thh; + mod_hash_t *mh; + mod_hash_val_t val; + th_trace_t *th_trace; + int retv; + + rw_enter(&ip_thread_rwlock, RW_READER); + for (thh = list_head(&ip_thread_list); thh != NULL; + thh = list_next(&ip_thread_list, thh)) { + if (mod_hash_find(mh = thh->thh_hash, (mod_hash_key_t)obj, + &val) == 0) { + th_trace = (th_trace_t *)val; + if (trace_disable) + th_trace->th_refcnt = 0; + retv = mod_hash_destroy(mh, (mod_hash_key_t)obj); + ASSERT(retv == 0); + } + } + rw_exit(&ip_thread_rwlock); +} + +void +ipif_trace_ref(ipif_t *ipif) +{ + ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock)); + + if (ipif->ipif_trace_disable) + return; + + if (!th_trace_ref(ipif, ipif->ipif_ill->ill_ipst)) { + ipif->ipif_trace_disable = B_TRUE; + ipif_trace_cleanup(ipif); + } +} + +void +ipif_untrace_ref(ipif_t *ipif) +{ + ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock)); + + if (!ipif->ipif_trace_disable) + th_trace_unref(ipif); +} + +void +ill_trace_ref(ill_t *ill) +{ + ASSERT(MUTEX_HELD(&ill->ill_lock)); + + if (ill->ill_trace_disable) + return; + + if (!th_trace_ref(ill, ill->ill_ipst)) { + ill->ill_trace_disable = B_TRUE; + ill_trace_cleanup(ill); + } } void ill_untrace_ref(ill_t *ill) { - th_trace_t *th_trace; - ASSERT(MUTEX_HELD(&ill->ill_lock)); - if (ill->ill_trace_disable) - return; - th_trace = th_trace_ill_lookup(ill); - ASSERT(th_trace != NULL); - ASSERT(th_trace->th_refcnt > 0); - - th_trace->th_refcnt--; - th_trace_rrecord(th_trace); -} - -/* - * Verify that this thread has no refs to the ipif and free - * the trace buffers - */ -/* ARGSUSED */ -void -ipif_thread_exit(ipif_t *ipif, void *dummy) -{ - th_trace_t *th_trace; - - mutex_enter(&ipif->ipif_ill->ill_lock); - - th_trace = th_trace_ipif_lookup(ipif); - if (th_trace == NULL) { - mutex_exit(&ipif->ipif_ill->ill_lock); - return; - } - ASSERT(th_trace->th_refcnt == 0); - /* unlink th_trace and free it */ - *th_trace->th_prev = th_trace->th_next; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = th_trace->th_prev; - th_trace->th_next = NULL; - th_trace->th_prev = NULL; - kmem_free(th_trace, sizeof (th_trace_t)); - - mutex_exit(&ipif->ipif_ill->ill_lock); -} - -/* - * Verify that this thread has no refs to the ill and free - * the trace buffers - */ -/* ARGSUSED */ -void -ill_thread_exit(ill_t *ill, void *dummy) -{ - th_trace_t *th_trace; - - mutex_enter(&ill->ill_lock); - - th_trace = th_trace_ill_lookup(ill); - if (th_trace == NULL) { - mutex_exit(&ill->ill_lock); - return; - } - ASSERT(th_trace->th_refcnt == 0); - /* unlink th_trace and free it */ - *th_trace->th_prev = th_trace->th_next; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = th_trace->th_prev; - th_trace->th_next = NULL; - th_trace->th_prev = NULL; - kmem_free(th_trace, sizeof (th_trace_t)); - - mutex_exit(&ill->ill_lock); -} -#endif - -#ifdef ILL_DEBUG -void -ip_thread_exit_stack(ip_stack_t *ipst) -{ - ill_t *ill; - ipif_t *ipif; - ill_walk_context_t ctx; - - rw_enter(&ipst->ips_ill_g_lock, RW_READER); - ill = ILL_START_WALK_ALL(&ctx, ipst); - for (; ill != NULL; ill = ill_next(&ctx, ill)) { - for (ipif = ill->ill_ipif; ipif != NULL; - ipif = ipif->ipif_next) { - ipif_thread_exit(ipif, NULL); - } - ill_thread_exit(ill, NULL); - } - rw_exit(&ipst->ips_ill_g_lock); - - ire_walk(ire_thread_exit, NULL, ipst); - ndp_walk_common(ipst->ips_ndp4, NULL, nce_thread_exit, NULL, B_FALSE); - ndp_walk_common(ipst->ips_ndp6, NULL, nce_thread_exit, NULL, B_FALSE); -} - -/* - * This is a function which is called from thread_exit - * that can be used to debug reference count issues in IP. See comment in - * on how it is used. - */ -void -ip_thread_exit(void) -{ - netstack_t *ns; - - ns = netstack_get_current(); - if (ns != NULL) { - ip_thread_exit_stack(ns->netstack_ip); - netstack_rele(ns); - } -} - -/* - * Called when ipif is unplumbed or when memory alloc fails - */ -void -ipif_trace_cleanup(ipif_t *ipif) -{ - int i; - th_trace_t *th_trace; - th_trace_t *th_trace_next; - - for (i = 0; i < IP_TR_HASH_MAX; i++) { - for (th_trace = ipif->ipif_trace[i]; th_trace != NULL; - th_trace = th_trace_next) { - th_trace_next = th_trace->th_next; - kmem_free(th_trace, sizeof (th_trace_t)); - } - ipif->ipif_trace[i] = NULL; - } -} - -/* - * Called when ill is unplumbed or when memory alloc fails - */ -void -ill_trace_cleanup(ill_t *ill) -{ - int i; - th_trace_t *th_trace; - th_trace_t *th_trace_next; - - for (i = 0; i < IP_TR_HASH_MAX; i++) { - for (th_trace = ill->ill_trace[i]; th_trace != NULL; - th_trace = th_trace_next) { - th_trace_next = th_trace->th_next; - kmem_free(th_trace, sizeof (th_trace_t)); - } - ill->ill_trace[i] = NULL; - } -} - -#else -void ip_thread_exit(void) {} -#endif + if (!ill->ill_trace_disable) + th_trace_unref(ill); +} + +/* + * Called when ipif is unplumbed or when memory alloc fails. Note that on + * failure, ipif_trace_disable is set. + */ +static void +ipif_trace_cleanup(const ipif_t *ipif) +{ + th_trace_cleanup(ipif, ipif->ipif_trace_disable); +} + +/* + * Called when ill is unplumbed or when memory alloc fails. Note that on + * failure, ill_trace_disable is set. + */ +static void +ill_trace_cleanup(const ill_t *ill) +{ + th_trace_cleanup(ill, ill->ill_trace_disable); +} +#endif /* DEBUG */ void ipif_refhold_locked(ipif_t *ipif) @@ -7732,8 +7667,8 @@ ASSERT(ipsq->ipsq_reentry_cnt == 0); ipsq->ipsq_writer = curthread; ipsq->ipsq_reentry_cnt++; -#ifdef ILL_DEBUG - ipsq->ipsq_depth = getpcstack(ipsq->ipsq_stack, IP_STACK_DEPTH); +#ifdef DEBUG + ipsq->ipsq_depth = getpcstack(ipsq->ipsq_stack, IPSQ_STACK_DEPTH); #endif mutex_exit(&ipsq->ipsq_lock); mutex_exit(&ill->ill_lock); @@ -7816,8 +7751,9 @@ mutex_exit(&ipsq->ipsq_lock); mutex_exit(&ill->ill_lock); RELEASE_CONN_LOCK(q); -#ifdef ILL_DEBUG - ipsq->ipsq_depth = getpcstack(ipsq->ipsq_stack, IP_STACK_DEPTH); +#ifdef DEBUG + ipsq->ipsq_depth = getpcstack(ipsq->ipsq_stack, + IPSQ_STACK_DEPTH); #endif return (ipsq); } @@ -7977,7 +7913,7 @@ ipsq->ipsq_writer = NULL; ipsq->ipsq_reentry_cnt--; ASSERT(ipsq->ipsq_reentry_cnt == 0); -#ifdef ILL_DEBUG +#ifdef DEBUG ipsq->ipsq_depth = 0; #endif mutex_exit(&ipsq->ipsq_lock); @@ -14681,7 +14617,7 @@ newipsq->ipsq_writer = NULL; newipsq->ipsq_reentry_cnt--; ASSERT(newipsq->ipsq_reentry_cnt == 0); -#ifdef ILL_DEBUG +#ifdef DEBUG newipsq->ipsq_depth = 0; #endif @@ -17811,7 +17747,9 @@ } ip_rts_ifmsg(rep_ipif_ptr); ip_rts_newaddrmsg(RTM_DELETE, 0, rep_ipif_ptr); - IPIF_TRACE_CLEANUP(rep_ipif_ptr); +#ifdef DEBUG + ipif_trace_cleanup(rep_ipif_ptr); +#endif mi_free(rep_ipif_ptr); } @@ -19117,7 +19055,9 @@ */ ASSERT(ilm_walk_ipif(ipif) == 0); - IPIF_TRACE_CLEANUP(ipif); +#ifdef DEBUG + ipif_trace_cleanup(ipif); +#endif /* Ask SCTP to take it out of it list */ sctp_update_ipif(ipif, SCTP_IPIF_REMOVE); diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ip/ip_ire.c --- a/usr/src/uts/common/inet/ip/ip_ire.c Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip_ire.c Tue Sep 11 04:26:06 2007 -0700 @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -73,7 +72,6 @@ #include #include -#include struct kmem_cache *rt_entry_cache; @@ -355,11 +353,9 @@ static void ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type, pfv_t func, void *arg, uchar_t vers, ill_t *ill); static void ire_cache_cleanup(irb_t *irb, uint32_t threshold, int cnt); -extern void ill_unlock_ills(ill_t **list, int cnt); static void ip_nce_clookup_and_delete(nce_t *nce, void *arg); -extern void th_trace_rrecord(th_trace_t *); -#ifdef IRE_DEBUG -static void ire_trace_inactive(ire_t *); +#ifdef DEBUG +static void ire_trace_cleanup(const ire_t *); #endif /* @@ -1759,10 +1755,7 @@ } ire->ire_refcnt = 1; ire->ire_ipst = ipst; /* No netstack_hold */ - -#ifdef IRE_DEBUG - bzero(ire->ire_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); -#endif + ire->ire_trace_disable = B_FALSE; return (B_TRUE); } @@ -3792,8 +3785,8 @@ } ire->ire_ipif = NULL; -#ifdef IRE_DEBUG - ire_trace_inactive(ire); +#ifdef DEBUG + ire_trace_cleanup(ire); #endif mutex_destroy(&ire->ire_lock); if (ire->ire_ipversion == IPV6_VERSION) { @@ -5431,147 +5424,40 @@ return (NULL); } -#ifdef IRE_DEBUG -th_trace_t * -th_trace_ire_lookup(ire_t *ire) -{ - int bucket_id; - th_trace_t *th_trace; - - ASSERT(MUTEX_HELD(&ire->ire_lock)); - - bucket_id = IP_TR_HASH(curthread); - ASSERT(bucket_id < IP_TR_HASH_MAX); - - for (th_trace = ire->ire_trace[bucket_id]; th_trace != NULL; - th_trace = th_trace->th_next) { - if (th_trace->th_id == curthread) - return (th_trace); - } - return (NULL); -} - +#ifdef DEBUG void ire_trace_ref(ire_t *ire) { - int bucket_id; - th_trace_t *th_trace; - - /* - * Attempt to locate the trace buffer for the curthread. - * If it does not exist, then allocate a new trace buffer - * and link it in list of trace bufs for this ipif, at the head - */ mutex_enter(&ire->ire_lock); - if (ire->ire_trace_disable == B_TRUE) { + if (ire->ire_trace_disable) { mutex_exit(&ire->ire_lock); return; } - th_trace = th_trace_ire_lookup(ire); - if (th_trace == NULL) { - bucket_id = IP_TR_HASH(curthread); - th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), - KM_NOSLEEP); - if (th_trace == NULL) { - ire->ire_trace_disable = B_TRUE; - mutex_exit(&ire->ire_lock); - ire_trace_inactive(ire); - return; - } - - th_trace->th_id = curthread; - th_trace->th_next = ire->ire_trace[bucket_id]; - th_trace->th_prev = &ire->ire_trace[bucket_id]; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = &th_trace->th_next; - ire->ire_trace[bucket_id] = th_trace; + + if (th_trace_ref(ire, ire->ire_ipst)) { + mutex_exit(&ire->ire_lock); + } else { + ire->ire_trace_disable = B_TRUE; + mutex_exit(&ire->ire_lock); + ire_trace_cleanup(ire); } - ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); - th_trace->th_refcnt++; - th_trace_rrecord(th_trace); - mutex_exit(&ire->ire_lock); -} - -void -ire_trace_free(th_trace_t *th_trace) -{ - /* unlink th_trace and free it */ - *th_trace->th_prev = th_trace->th_next; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = th_trace->th_prev; - th_trace->th_next = NULL; - th_trace->th_prev = NULL; - kmem_free(th_trace, sizeof (th_trace_t)); } void ire_untrace_ref(ire_t *ire) { - th_trace_t *th_trace; - mutex_enter(&ire->ire_lock); - - if (ire->ire_trace_disable == B_TRUE) { - mutex_exit(&ire->ire_lock); - return; - } - - th_trace = th_trace_ire_lookup(ire); - ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); - th_trace_rrecord(th_trace); - th_trace->th_refcnt--; - - if (th_trace->th_refcnt == 0) - ire_trace_free(th_trace); - + if (!ire->ire_trace_disable) + th_trace_unref(ire); mutex_exit(&ire->ire_lock); } static void -ire_trace_inactive(ire_t *ire) +ire_trace_cleanup(const ire_t *ire) { - th_trace_t *th_trace; - int i; - - mutex_enter(&ire->ire_lock); - for (i = 0; i < IP_TR_HASH_MAX; i++) { - while (ire->ire_trace[i] != NULL) { - th_trace = ire->ire_trace[i]; - - /* unlink th_trace and free it */ - ire->ire_trace[i] = th_trace->th_next; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = - &ire->ire_trace[i]; - - th_trace->th_next = NULL; - th_trace->th_prev = NULL; - kmem_free(th_trace, sizeof (th_trace_t)); - } - } - - mutex_exit(&ire->ire_lock); + th_trace_cleanup(ire, ire->ire_trace_disable); } - -/* ARGSUSED */ -void -ire_thread_exit(ire_t *ire, caddr_t arg) -{ - th_trace_t *th_trace; - - mutex_enter(&ire->ire_lock); - th_trace = th_trace_ire_lookup(ire); - if (th_trace == NULL) { - mutex_exit(&ire->ire_lock); - return; - } - ASSERT(th_trace->th_refcnt == 0); - - ire_trace_free(th_trace); - mutex_exit(&ire->ire_lock); -} - -#endif +#endif /* DEBUG */ /* * Generate a message chain with an arp request to resolve the in_ire. diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ip/ip_ndp.c --- a/usr/src/uts/common/inet/ip/ip_ndp.c Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip_ndp.c Tue Sep 11 04:26:06 2007 -0700 @@ -99,7 +99,6 @@ static boolean_t nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender, const in6_addr_t *target, int flag); -extern void th_trace_rrecord(th_trace_t *); static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, nce_t **, nce_t *); @@ -107,15 +106,13 @@ * We track the time of creation of the nce in the nce_init_time field * of IPv4 nce_t entries. If an nce is stuck in the ND_INITIAL state for * more than NCE_STUCK_TIMEOUT milliseconds, trigger the nce-stuck dtrace - * probe to assist in debugging. This probe will be fired from - * nce_thread_exit() for debug kernels, and from nce_report1() when - * 'ndd -get /dev/ip ip_ndp_cache_report' is invoked on both debug and - * non-debug kernels. + * probe to assist in debugging. This probe is fired from from nce_report1() + * when 'ndd -get /dev/ip ip_ndp_cache_report' is invoked. */ #define NCE_STUCK_TIMEOUT 120000 -#ifdef NCE_DEBUG -void nce_trace_inactive(nce_t *); +#ifdef DEBUG +static void nce_trace_cleanup(const nce_t *); #endif #define NCE_HASH_PTR_V4(ipst, addr) \ @@ -241,9 +238,8 @@ ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); } -#ifdef NCE_DEBUG - bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); -#endif + nce->nce_trace_disable = B_FALSE; + /* * Atomically ensure that the ill is not CONDEMNED, before * adding the NCE. @@ -497,8 +493,8 @@ } } while (mpp++ != &nce->nce_last_mp_to_free); -#ifdef NCE_DEBUG - nce_trace_inactive(nce); +#ifdef DEBUG + nce_trace_cleanup(nce); #endif ill = nce->nce_ill; @@ -3517,140 +3513,34 @@ ncc->ncc_host++; } -#ifdef NCE_DEBUG -th_trace_t * -th_trace_nce_lookup(nce_t *nce) -{ - int bucket_id; - th_trace_t *th_trace; - - ASSERT(MUTEX_HELD(&nce->nce_lock)); - - bucket_id = IP_TR_HASH(curthread); - ASSERT(bucket_id < IP_TR_HASH_MAX); - - for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL; - th_trace = th_trace->th_next) { - if (th_trace->th_id == curthread) - return (th_trace); - } - return (NULL); -} - +#ifdef DEBUG void nce_trace_ref(nce_t *nce) { - int bucket_id; - th_trace_t *th_trace; - - /* - * Attempt to locate the trace buffer for the curthread. - * If it does not exist, then allocate a new trace buffer - * and link it in list of trace bufs for this ipif, at the head - */ ASSERT(MUTEX_HELD(&nce->nce_lock)); - if (nce->nce_trace_disable == B_TRUE) + if (nce->nce_trace_disable) return; - th_trace = th_trace_nce_lookup(nce); - if (th_trace == NULL) { - bucket_id = IP_TR_HASH(curthread); - th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t), - KM_NOSLEEP); - if (th_trace == NULL) { - nce->nce_trace_disable = B_TRUE; - nce_trace_inactive(nce); - return; - } - th_trace->th_id = curthread; - th_trace->th_next = nce->nce_trace[bucket_id]; - th_trace->th_prev = &nce->nce_trace[bucket_id]; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = &th_trace->th_next; - nce->nce_trace[bucket_id] = th_trace; + if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { + nce->nce_trace_disable = B_TRUE; + nce_trace_cleanup(nce); } - ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1); - th_trace->th_refcnt++; - th_trace_rrecord(th_trace); } void nce_untrace_ref(nce_t *nce) { - th_trace_t *th_trace; - ASSERT(MUTEX_HELD(&nce->nce_lock)); - if (nce->nce_trace_disable == B_TRUE) - return; - - th_trace = th_trace_nce_lookup(nce); - ASSERT(th_trace != NULL && th_trace->th_refcnt > 0); - - th_trace_rrecord(th_trace); - th_trace->th_refcnt--; + if (!nce->nce_trace_disable) + th_trace_unref(nce); } -void -nce_trace_inactive(nce_t *nce) +static void +nce_trace_cleanup(const nce_t *nce) { - th_trace_t *th_trace; - int i; - - ASSERT(MUTEX_HELD(&nce->nce_lock)); - - for (i = 0; i < IP_TR_HASH_MAX; i++) { - while (nce->nce_trace[i] != NULL) { - th_trace = nce->nce_trace[i]; - - /* unlink th_trace and free it */ - nce->nce_trace[i] = th_trace->th_next; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = - &nce->nce_trace[i]; - - th_trace->th_next = NULL; - th_trace->th_prev = NULL; - kmem_free(th_trace, sizeof (th_trace_t)); - } - } - -} - -/* ARGSUSED */ -int -nce_thread_exit(nce_t *nce, caddr_t arg) -{ - th_trace_t *th_trace; - uint64_t now; - - mutex_enter(&nce->nce_lock); - if (nce->nce_state == ND_INITIAL) { - - now = TICK_TO_MSEC(lbolt64); - if (now - nce->nce_init_time > NCE_STUCK_TIMEOUT) { - DTRACE_PROBE1(nce__stuck, nce_t *, nce); - } - } - th_trace = th_trace_nce_lookup(nce); - - if (th_trace == NULL) { - mutex_exit(&nce->nce_lock); - return (0); - } - - ASSERT(th_trace->th_refcnt == 0); - - /* unlink th_trace and free it */ - *th_trace->th_prev = th_trace->th_next; - if (th_trace->th_next != NULL) - th_trace->th_next->th_prev = th_trace->th_prev; - th_trace->th_next = NULL; - th_trace->th_prev = NULL; - kmem_free(th_trace, sizeof (th_trace_t)); - mutex_exit(&nce->nce_lock); - return (0); + th_trace_cleanup(nce, nce->nce_trace_disable); } #endif @@ -3767,9 +3657,8 @@ mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); -#ifdef NCE_DEBUG - bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX); -#endif + nce->nce_trace_disable = B_FALSE; + if (src_nce != NULL) { /* * src_nce has been provided by the caller. The only diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ip/ipclassifier.c --- a/usr/src/uts/common/inet/ip/ipclassifier.c Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ipclassifier.c Tue Sep 11 04:26:06 2007 -0700 @@ -2425,7 +2425,7 @@ last = 0; ctb = &connp->conn_trace_buf[last]; - ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); + ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); connp->conn_trace_last = last; return (1); } @@ -2443,7 +2443,7 @@ last = 0; ctb = &connp->conn_trace_buf[last]; - ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); + ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); connp->conn_trace_last = last; return (1); } diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ip_if.h --- a/usr/src/uts/common/inet/ip_if.h Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ip_if.h Tue Sep 11 04:26:06 2007 -0700 @@ -471,9 +471,6 @@ extern void conn_delete_ire(conn_t *, caddr_t); /* - * This is a function which is called from thread_exit - * that can be used to debug reference count issues in IP. - * * Notes on reference tracing on ill, ipif, ire, nce data structures: * * The current model of references on an ipif or ill is purely based on threads @@ -487,9 +484,11 @@ * * As a debugging aid, the refhold and refrele functions call into tracing * functions that record the stack trace of the caller and the references - * acquired or released by the calling thread, hashed by the thread id. On - * thread exit, ipif_thread_exit and ill_thread_exit verify that there are no - * outstanding references to the ipif or ill from the exiting thread. + * acquired or released by the calling thread, hashed by the structure address + * in thread-specific-data (TSD). On thread exit, ip_thread_exit destroys the + * hash, and the destructor for the hash entries (th_trace_free) verifies that + * there are no outstanding references to the ipif or ill from the exiting + * thread. * * In the case of ires and nces, the model is slightly different. Typically each * ire pointing to an nce contributes to the nce_refcnt. Similarly a conn_t @@ -500,17 +499,14 @@ * ire_thread_exit, nce_thread_exit does the verification that are no * outstanding references on the ire / nce from the exiting thread. * - * The reference verification is driven from thread_exit() which calls into IP - * via a function pointer ip_cleanup_func into the verification function - * ip_thread_exit. This debugging aid may be helpful in tracing missing - * refrele's on a debug kernel. On a non-debug kernel, these missing refrele's - * are noticeable only when an interface is being unplumbed, and the unplumb - * hangs, long after the missing refrele. On a debug kernel, the traces - * (th_trace_t) which contain the stack backtraces can be examined on a crash - * dump to locate the missing refrele. + * The reference verification is driven from the TSD destructor which calls + * into IP's verification function ip_thread_exit. This debugging aid may be + * helpful in tracing missing refrele's on a debug kernel. On a non-debug + * kernel, these missing refrele's are noticeable only when an interface is + * being unplumbed, and the unplumb hangs, long after the missing refrele. On a + * debug kernel, the traces (th_trace_t) which contain the stack backtraces can + * be examined on a crash dump to locate the missing refrele. */ -extern void (*ip_cleanup_func)(void); -extern void ip_thread_exit(void); #endif /* _KERNEL */ diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ip_ndp.h --- a/usr/src/uts/common/inet/ip_ndp.h Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ip_ndp.h Tue Sep 11 04:26:06 2007 -0700 @@ -78,10 +78,7 @@ uint_t nce_defense_count; /* number of NDP conflicts */ uint_t nce_defense_time; /* last time defended (secs) */ uint64_t nce_init_time; /* time when it was set to ND_INITIAL */ -#ifdef NCE_DEBUG - th_trace_t *nce_trace[IP_TR_HASH_MAX]; boolean_t nce_trace_disable; /* True when alloc fails */ -#endif } nce_t; /* @@ -169,7 +166,7 @@ #define ND_MAX_Q 4 -#ifdef NCE_DEBUG +#ifdef DEBUG #define NCE_TRACE_REF(nce) nce_trace_ref(nce) #define NCE_UNTRACE_REF(nce) nce_untrace_ref(nce) #else @@ -357,11 +354,9 @@ extern int ndp_lookup_then_add_v4(ill_t *, const in_addr_t *, uint16_t, nce_t **, nce_t *); -#ifdef NCE_DEBUG -extern void nce_trace_inactive(nce_t *); +#ifdef DEBUG extern void nce_trace_ref(nce_t *); extern void nce_untrace_ref(nce_t *); -extern int nce_thread_exit(nce_t *, caddr_t); #endif #endif /* _KERNEL */ diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/common/inet/ipclassifier.h --- a/usr/src/uts/common/inet/ipclassifier.h Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/common/inet/ipclassifier.h Tue Sep 11 04:26:06 2007 -0700 @@ -125,8 +125,8 @@ typedef struct { int ctb_depth; -#define IP_STACK_DEPTH 15 - pc_t ctb_stack[IP_STACK_DEPTH]; +#define CONN_STACK_DEPTH 15 + pc_t ctb_stack[CONN_STACK_DEPTH]; } conn_trace_t; struct conn_s { diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/intel/ip/ip.global-objs.debug64 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 Tue Sep 11 04:26:06 2007 -0700 @@ -126,6 +126,9 @@ ip_squeue_profile ip_squeue_worker_wait ip_squeues_per_cpu +ip_thread_data +ip_thread_list +ip_thread_rwlock ip_wput_frag_mdt_min ipcl_bind_fanout_size ipcl_conn_cache diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/intel/ip/ip.global-objs.obj64 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 Tue Sep 11 04:26:06 2007 -0700 @@ -126,6 +126,9 @@ ip_squeue_profile ip_squeue_worker_wait ip_squeues_per_cpu +ip_thread_data +ip_thread_list +ip_thread_rwlock ip_wput_frag_mdt_min ipcl_bind_fanout_size ipcl_conn_cache diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/sparc/ip/Makefile --- a/usr/src/uts/sparc/ip/Makefile Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/sparc/ip/Makefile Tue Sep 11 04:26:06 2007 -0700 @@ -102,8 +102,6 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) $(SISCHECK_DEPS) - pwd; - echo "abc me"; $(ROOTLINK): $(ROOT_STRMOD_DIR) $(ROOTMODULE) -$(RM) $@; ln $(ROOTMODULE) $@ diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/sparc/ip/ip.global-objs.debug64 --- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 Tue Sep 11 04:26:06 2007 -0700 @@ -126,6 +126,9 @@ ip_squeue_profile ip_squeue_worker_wait ip_squeues_per_cpu +ip_thread_data +ip_thread_list +ip_thread_rwlock ip_wput_frag_mdt_min ipcl_bind_fanout_size ipcl_conn_cache diff -r 7b32314731c6 -r e0c678e511a7 usr/src/uts/sparc/ip/ip.global-objs.obj64 --- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 Mon Sep 10 15:47:44 2007 -0700 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 Tue Sep 11 04:26:06 2007 -0700 @@ -126,6 +126,9 @@ ip_squeue_profile ip_squeue_worker_wait ip_squeues_per_cpu +ip_thread_data +ip_thread_list +ip_thread_rwlock ip_wput_frag_mdt_min ipcl_bind_fanout_size ipcl_conn_cache