changeset 5023:e0c678e511a7

6203568 accumulating number of threads behind rw_lock in ire_walk_ill_tables IRB_REFRELE 6591083 IP instances sis_check added stray entries to sparc ip module makefile
author carlsonj
date Tue, 11 Sep 2007 04:26:06 -0700
parents 7b32314731c6
children 84d44b28471b
files usr/src/cmd/mdb/common/modules/ip/ip.c usr/src/uts/common/disp/thread.c usr/src/uts/common/inet/ip.h usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip_if.c usr/src/uts/common/inet/ip/ip_ire.c usr/src/uts/common/inet/ip/ip_ndp.c usr/src/uts/common/inet/ip/ipclassifier.c usr/src/uts/common/inet/ip_if.h usr/src/uts/common/inet/ip_ndp.h usr/src/uts/common/inet/ipclassifier.h usr/src/uts/intel/ip/ip.global-objs.debug64 usr/src/uts/intel/ip/ip.global-objs.obj64 usr/src/uts/sparc/ip/Makefile usr/src/uts/sparc/ip/ip.global-objs.debug64 usr/src/uts/sparc/ip/ip.global-objs.obj64
diffstat 16 files changed, 553 insertions(+), 675 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/ip/ip.c	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/cmd/mdb/common/modules/ip/ip.c	Tue Sep 11 04:26:06 2007 -0700
@@ -44,6 +44,7 @@
 #include <inet/ipclassifier.h>
 #include <inet/mi.h>
 #include <sys/squeue_impl.h>
+#include <sys/modhash_impl.h>
 
 #include <mdb/mdb_modapi.h>
 #include <mdb/mdb_ks.h>
@@ -73,6 +74,14 @@
 	ill_if_t ill_if;
 } illif_walk_data_t;
 
+typedef struct th_walk_data {
+	uint_t		thw_non_zero_only;
+	boolean_t	thw_match;
+	uintptr_t	thw_matchkey;
+	uintptr_t	thw_ipst;
+	clock_t		thw_lbolt;
+} th_walk_data_t;
+
 static int iphdr(uintptr_t, uint_t, int, const mdb_arg_t *);
 static int ip6hdr(uintptr_t, uint_t, int, const mdb_arg_t *);
 
@@ -130,6 +139,44 @@
 	return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata));
 }
 
+int
+th_hash_walk_init(mdb_walk_state_t *wsp)
+{
+	GElf_Sym sym;
+	list_node_t *next;
+
+	if (wsp->walk_addr == NULL) {
+		if (mdb_lookup_by_obj("ip", "ip_thread_list", &sym) == 0) {
+			wsp->walk_addr = sym.st_value;
+		} else {
+			mdb_warn("unable to locate ip_thread_list\n");
+			return (WALK_ERR);
+		}
+	}
+
+	if (mdb_vread(&next, sizeof (next),
+	    wsp->walk_addr + offsetof(list_t, list_head) +
+	    offsetof(list_node_t, list_next)) == -1 ||
+	    next == NULL) {
+		mdb_warn("non-DEBUG image; cannot walk th_hash list\n");
+		return (WALK_ERR);
+	}
+
+	if (mdb_layered_walk("list", wsp) == -1) {
+		mdb_warn("can't walk 'list'");
+		return (WALK_ERR);
+	} else {
+		return (WALK_NEXT);
+	}
+}
+
+int
+th_hash_walk_step(mdb_walk_state_t *wsp)
+{
+	return (wsp->walk_callback(wsp->walk_addr, wsp->walk_layer,
+	    wsp->walk_cbdata));
+}
+
 /*
  * Called with walk_addr being the address of ips_ill_g_heads
  */
@@ -266,7 +313,7 @@
 #endif
 
 	if (mdb_pwalk("illif_stack", wsp->walk_callback,
-		wsp->walk_cbdata, kaddr) == -1) {
+	    wsp->walk_cbdata, kaddr) == -1) {
 		mdb_warn("couldn't walk 'illif_stack' for ips_ill_g_heads %p",
 		    kaddr);
 		return (WALK_ERR);
@@ -396,7 +443,7 @@
 	}
 #ifdef DEBUG
 	mdb_printf("DEBUG: ire_ctable_walk_step: ips_ip_cache_table_size %u\n",
-		cache_table_size);
+	    cache_table_size);
 #endif
 
 	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table);
@@ -422,7 +469,7 @@
 #endif
 
 		if (mdb_pwalk("ire_next", (mdb_walk_cb_t)ire_format, &verbose,
-			kaddr) == -1) {
+		    kaddr) == -1) {
 			mdb_warn("can't walk 'ire_next' for ire %p", kaddr);
 			return (WALK_ERR);
 		}
@@ -1048,6 +1095,120 @@
 	mdb_printf("\t-v\tbe verbose (more descriptive)\n");
 }
 
+/*
+ * This is called by ::th_trace (via a callback) when walking the th_hash
+ * list.  It calls modent to find the entries.
+ */
+/* ARGSUSED */
+static int
+modent_summary(uintptr_t addr, const void *data, void *private)
+{
+	th_walk_data_t *thw = private;
+	const struct mod_hash_entry *mhe = data;
+	th_trace_t th;
+
+	if (mdb_vread(&th, sizeof (th), (uintptr_t)mhe->mhe_val) == -1) {
+		mdb_warn("failed to read th_trace_t %p", mhe->mhe_val);
+		return (WALK_ERR);
+	}
+
+	if (th.th_refcnt == 0 && thw->thw_non_zero_only)
+		return (WALK_NEXT);
+
+	if (!thw->thw_match) {
+		mdb_printf("%?p %?p %?p %8d %?p\n", thw->thw_ipst, mhe->mhe_key,
+		    mhe->mhe_val, th.th_refcnt, th.th_id);
+	} else if (thw->thw_matchkey == (uintptr_t)mhe->mhe_key) {
+		int i, j, k;
+		tr_buf_t *tr;
+
+		mdb_printf("Object %p in IP stack %p:\n", mhe->mhe_key,
+		    thw->thw_ipst);
+		i = th.th_trace_lastref;
+		mdb_printf("\tThread %p refcnt %d:\n", th.th_id,
+		    th.th_refcnt);
+		for (j = TR_BUF_MAX; j > 0; j--) {
+			tr = th.th_trbuf + i;
+			if (tr->tr_depth == 0 || tr->tr_depth > TR_STACK_DEPTH)
+				break;
+			mdb_printf("\t  T%+ld:\n", tr->tr_time -
+			    thw->thw_lbolt);
+			for (k = 0; k < tr->tr_depth; k++)
+				mdb_printf("\t\t%a\n", tr->tr_stack[k]);
+			if (--i < 0)
+				i = TR_BUF_MAX - 1;
+		}
+	}
+	return (WALK_NEXT);
+}
+
+/*
+ * This is called by ::th_trace (via a callback) when walking the th_hash
+ * list.  It calls modent to find the entries.
+ */
+/* ARGSUSED */
+static int
+th_hash_summary(uintptr_t addr, const void *data, void *private)
+{
+	const th_hash_t *thh = data;
+	th_walk_data_t *thw = private;
+
+	thw->thw_ipst = (uintptr_t)thh->thh_ipst;
+	return (mdb_pwalk("modent", modent_summary, private,
+	    (uintptr_t)thh->thh_hash));
+}
+
+/*
+ * Print or summarize the th_trace_t structures.
+ */
+static int
+th_trace(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	th_walk_data_t thw;
+
+	(void) memset(&thw, 0, sizeof (thw));
+
+	if (mdb_getopts(argc, argv,
+	    'n', MDB_OPT_SETBITS, TRUE, &thw.thw_non_zero_only,
+	    NULL) != argc)
+		return (DCMD_USAGE);
+
+	if (!(flags & DCMD_ADDRSPEC)) {
+		/*
+		 * No address specified.  Walk all of the th_hash_t in the
+		 * system, and summarize the th_trace_t entries in each.
+		 */
+		mdb_printf("%?s %?s %?s %8s %?s\n",
+		    "IPSTACK", "OBJECT", "TRACE", "REFCNT", "THREAD");
+		thw.thw_match = B_FALSE;
+	} else {
+		thw.thw_match = B_TRUE;
+		thw.thw_matchkey = addr;
+		if (mdb_readvar(&thw.thw_lbolt,
+		    mdb_prop_postmortem ? "panic_lbolt" : "lbolt") == -1) {
+			mdb_warn("failed to read lbolt");
+			return (DCMD_ERR);
+		}
+	}
+	if (mdb_pwalk("th_hash", th_hash_summary, &thw, NULL) == -1) {
+		mdb_warn("can't walk th_hash entries");
+		return (DCMD_ERR);
+	}
+	return (DCMD_OK);
+}
+
+static void
+th_trace_help(void)
+{
+	mdb_printf("If given an address of an ill_t, ipif_t, ire_t, or nce_t, "
+	    "print the\n"
+	    "corresponding th_trace_t structure in detail.  Otherwise, if no "
+	    "address is\n"
+	    "given, then summarize all th_trace_t structures.\n\n");
+	mdb_printf("Options:\n"
+	    "\t-n\tdisplay only entries with non-zero th_refcnt\n");
+}
+
 static const mdb_dcmd_t dcmds[] = {
 	{ "illif", "?[-P v4 | v6]",
 	    "display or filter IP Lower Level InterFace structures", illif,
@@ -1060,6 +1221,8 @@
 	{ "tcphdr", ":", "display a TCP header", tcphdr },
 	{ "udphdr", ":", "display an UDP header", udphdr },
 	{ "sctphdr", ":", "display an SCTP header", sctphdr },
+	{ "th_trace", "?[-n]", "display th_trace_t structures", th_trace,
+	    th_trace_help },
 	{ NULL }
 };
 
@@ -1077,6 +1240,8 @@
 		ire_next_walk_init, ire_next_walk_step, NULL },
 	{ "ip_stacks", "walk all the ip_stack_t",
 		ip_stacks_walk_init, ip_stacks_walk_step, NULL },
+	{ "th_hash", "walk all the th_hash_t entries",
+		th_hash_walk_init, th_hash_walk_step, NULL },
 	{ NULL }
 };
 
--- a/usr/src/uts/common/disp/thread.c	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/disp/thread.c	Tue Sep 11 04:26:06 2007 -0700
@@ -34,7 +34,6 @@
 #include <sys/user.h>
 #include <sys/systm.h>
 #include <sys/sysinfo.h>
-#include <sys/var.h>
 #include <sys/errno.h>
 #include <sys/cmn_err.h>
 #include <sys/cred.h>
@@ -43,7 +42,6 @@
 #include <sys/project.h>
 #include <sys/proc.h>
 #include <sys/debug.h>
-#include <sys/inline.h>
 #include <sys/disp.h>
 #include <sys/class.h>
 #include <vm/seg_kmem.h>
@@ -75,8 +73,6 @@
 #include <sys/kdi.h>
 #include <sys/waitq.h>
 #include <sys/cpucaps.h>
-#include <inet/ip.h>
-#include <inet/ip_if.h>
 
 struct kmem_cache *thread_cache;	/* cache of free threads */
 struct kmem_cache *lwp_cache;		/* cache of free lwps */
@@ -561,23 +557,14 @@
 	}
 }
 
-/*
- * This is a function which is called from thread_exit
- * that can be used to debug reference count issues in IP.
- */
-void (*ip_cleanup_func)(void);
-
 void
-thread_exit()
+thread_exit(void)
 {
 	kthread_t *t = curthread;
 
 	if ((t->t_proc_flag & TP_ZTHREAD) != 0)
 		cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
 
-	if (ip_cleanup_func != NULL)
-		(*ip_cleanup_func)();
-
 	tsd_exit();		/* Clean up this thread's TSD */
 
 	kcpc_passivate();	/* clean up performance counter state */
--- a/usr/src/uts/common/inet/ip.h	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ip.h	Tue Sep 11 04:26:06 2007 -0700
@@ -58,11 +58,9 @@
 #include <sys/systm.h>
 #include <sys/multidata.h>
 #include <net/radix.h>
+#include <sys/modhash.h>
 
 #ifdef DEBUG
-#define	ILL_DEBUG
-#define	IRE_DEBUG
-#define	NCE_DEBUG
 #define	CONN_DEBUG
 #endif
 
@@ -1235,21 +1233,26 @@
  * do not allow the granularity need to trace refrences to ipif/ill/ire's. This
  * mechanism should be revisited once dtrace is available.
  */
-#define	IP_STACK_DEPTH	15
+#define	TR_STACK_DEPTH	14
 typedef struct tr_buf_s {
 	int	tr_depth;
-	pc_t	tr_stack[IP_STACK_DEPTH];
+	clock_t	tr_time;
+	pc_t	tr_stack[TR_STACK_DEPTH];
 } tr_buf_t;
 
 typedef struct th_trace_s {
-	struct	th_trace_s *th_next;
-	struct	th_trace_s **th_prev;
+	int		th_refcnt;
+	uint_t		th_trace_lastref;
 	kthread_t	*th_id;
-	int	th_refcnt;
-	uint_t	th_trace_lastref;
 #define	TR_BUF_MAX	38
-	tr_buf_t th_trbuf[TR_BUF_MAX];
+	tr_buf_t	th_trbuf[TR_BUF_MAX];
 } th_trace_t;
+
+typedef struct th_hash_s {
+	list_node_t	thh_link;
+	mod_hash_t	*thh_hash;
+	ip_stack_t	*thh_ipst;
+} th_hash_t;
 #endif
 
 /* The following are ipif_state_flags */
@@ -1316,11 +1319,7 @@
 	zoneid_t
 		ipif_zoneid;		/* zone ID number */
 	timeout_id_t ipif_recovery_id;	/* Timer for DAD recovery */
-#ifdef ILL_DEBUG
-#define	IP_TR_HASH_MAX	64
-	th_trace_t *ipif_trace[IP_TR_HASH_MAX];
-	boolean_t	ipif_trace_disable;	/* True when alloc fails */
-#endif
+	boolean_t ipif_trace_disable;	/* True when alloc fails */
 } ipif_t;
 
 /*
@@ -1380,20 +1379,16 @@
 
 #define	IP_TR_HASH(tid)	((((uintptr_t)tid) >> 6) & (IP_TR_HASH_MAX - 1))
 
-#ifdef ILL_DEBUG
+#ifdef DEBUG
 #define	IPIF_TRACE_REF(ipif)	ipif_trace_ref(ipif)
 #define	ILL_TRACE_REF(ill)	ill_trace_ref(ill)
 #define	IPIF_UNTRACE_REF(ipif)	ipif_untrace_ref(ipif)
 #define	ILL_UNTRACE_REF(ill)	ill_untrace_ref(ill)
-#define	ILL_TRACE_CLEANUP(ill)	ill_trace_cleanup(ill)
-#define	IPIF_TRACE_CLEANUP(ipif)	ipif_trace_cleanup(ipif)
 #else
 #define	IPIF_TRACE_REF(ipif)
 #define	ILL_TRACE_REF(ill)
 #define	IPIF_UNTRACE_REF(ipif)
 #define	ILL_UNTRACE_REF(ill)
-#define	ILL_TRACE_CLEANUP(ill)
-#define	IPIF_TRACE_CLEANUP(ipif)
 #endif
 
 /* IPv4 compatability macros */
@@ -1462,12 +1457,13 @@
 	boolean_t	ipsq_split;	/* ipsq may need to be split */
 	int		ipsq_waitfor;	/* Values encoded below */
 	char		ipsq_name[LIFNAMSIZ+1];	/* same as phyint_groupname */
-
-#ifdef ILL_DEBUG
+	ip_stack_t	*ipsq_ipst;	/* Does not have a netstack_hold */
+
+#ifdef DEBUG
 	int		ipsq_depth;	/* debugging aid */
-	pc_t		ipsq_stack[IP_STACK_DEPTH];	/* debugging aid */
+#define	IPSQ_STACK_DEPTH	15
+	pc_t		ipsq_stack[IPSQ_STACK_DEPTH];	/* debugging aid */
 #endif
-	ip_stack_t	*ipsq_ipst;	/* Does not have a netstack_hold */
 } ipsq_t;
 
 /* ipsq_flags */
@@ -1968,10 +1964,7 @@
 	t_uscalar_t	ill_dlpi_pending; /* Last DLPI primitive issued */
 	uint_t		ill_usesrc_ifindex; /* use src addr from this ILL */
 	struct ill_s	*ill_usesrc_grp_next; /* Next ILL in the usesrc group */
-#ifdef ILL_DEBUG
-	th_trace_t	*ill_trace[IP_TR_HASH_MAX];
 	boolean_t	ill_trace_disable;	/* True when alloc fails */
-#endif
 	zoneid_t	ill_zoneid;
 	ip_stack_t	*ill_ipst;	/* Corresponds to a netstack_hold */
 } ill_t;
@@ -2319,26 +2312,21 @@
  * holding the lock. Currently ip_wput does this for caching IRE_CACHEs.
  */
 
-#ifndef IRE_DEBUG
-
-#define	IRE_REFHOLD_NOTR(ire)	IRE_REFHOLD(ire)
+#ifdef DEBUG
+#define	IRE_UNTRACE_REF(ire)	ire_untrace_ref(ire);
+#define	IRE_TRACE_REF(ire)	ire_trace_ref(ire);
+#else
 #define	IRE_UNTRACE_REF(ire)
 #define	IRE_TRACE_REF(ire)
-
-#else
+#endif
 
 #define	IRE_REFHOLD_NOTR(ire) {				\
 	atomic_add_32(&(ire)->ire_refcnt, 1);		\
 	ASSERT((ire)->ire_refcnt != 0);			\
 }
 
-#define	IRE_UNTRACE_REF(ire)	ire_untrace_ref(ire);
-#define	IRE_TRACE_REF(ire)	ire_trace_ref(ire);
-#endif
-
 #define	IRE_REFHOLD(ire) {				\
-	atomic_add_32(&(ire)->ire_refcnt, 1);		\
-	ASSERT((ire)->ire_refcnt != 0);			\
+	IRE_REFHOLD_NOTR(ire);				\
 	IRE_TRACE_REF(ire);				\
 }
 
@@ -2358,30 +2346,19 @@
  *	  To avoid bloating the code, we use the function "ire_refrele"
  *	  which essentially calls the macro.
  */
-#ifndef IRE_DEBUG
-#define	IRE_REFRELE(ire) {					\
+#define	IRE_REFRELE_NOTR(ire) {					\
 	ASSERT((ire)->ire_refcnt != 0);				\
 	membar_exit();						\
 	if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0)	\
 		ire_inactive(ire);				\
 }
-#define	IRE_REFRELE_NOTR(ire)	IRE_REFRELE(ire)
-#else
+
 #define	IRE_REFRELE(ire) {					\
-	if (ire->ire_bucket != NULL)				\
-		ire_untrace_ref(ire);				\
-	ASSERT((ire)->ire_refcnt != 0);				\
-	membar_exit();						\
-	if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0)	\
-		ire_inactive(ire);				\
+	if (ire->ire_bucket != NULL) {				\
+		IRE_UNTRACE_REF(ire);				\
+	}							\
+	IRE_REFRELE_NOTR(ire);					\
 }
-#define	IRE_REFRELE_NOTR(ire) {				\
-	ASSERT((ire)->ire_refcnt != 0);				\
-	membar_exit();						\
-	if (atomic_add_32_nv(&(ire)->ire_refcnt, -1) == 0)	\
-		ire_inactive(ire);				\
-}
-#endif
 
 /*
  * Bump up the reference count on the hash bucket - IRB to
@@ -2517,11 +2494,8 @@
 	uint_t	ire_stq_ifindex;
 	uint_t		ire_defense_count;	/* number of ARP conflicts */
 	uint_t		ire_defense_time;	/* last time defended (secs) */
+	boolean_t	ire_trace_disable;	/* True when alloc fails */
 	ip_stack_t	*ire_ipst;	/* Does not have a netstack_hold */
-#ifdef IRE_DEBUG
-	th_trace_t	*ire_trace[IP_TR_HASH_MAX];
-	boolean_t	ire_trace_disable;	/* True when alloc fails */
-#endif
 } ire_t;
 
 /* IPv4 compatiblity macros */
@@ -3067,6 +3041,9 @@
 #endif
 
 extern int	ip_debug;
+extern uint_t	ip_thread_data;
+extern krwlock_t ip_thread_rwlock;
+extern list_t	ip_thread_list;
 
 #ifdef IP_DEBUG
 #include <sys/debug.h>
@@ -3195,14 +3172,13 @@
 extern void	ire_inactive(ire_t *);
 extern boolean_t irb_inactive(irb_t *);
 extern ire_t	*ire_unlink(irb_t *);
-#ifdef IRE_DEBUG
-extern	void	ire_trace_ref(ire_t *ire);
-extern	void	ire_untrace_ref(ire_t *ire);
-extern	void	ire_thread_exit(ire_t *ire, caddr_t);
-#endif
-#ifdef ILL_DEBUG
-extern	void	ill_trace_cleanup(ill_t *);
-extern	void	ipif_trace_cleanup(ipif_t *);
+
+#ifdef DEBUG
+extern	boolean_t th_trace_ref(const void *, ip_stack_t *);
+extern	void	th_trace_unref(const void *);
+extern	void	th_trace_cleanup(const void *, boolean_t);
+extern	void	ire_trace_ref(ire_t *);
+extern	void	ire_untrace_ref(ire_t *);
 #endif
 
 extern int	ip_srcid_insert(const in6_addr_t *, zoneid_t, ip_stack_t *);
--- a/usr/src/uts/common/inet/ip/ip.c	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip.c	Tue Sep 11 04:26:06 2007 -0700
@@ -154,6 +154,15 @@
 int ip_modclose_ackwait_ms = 3000;
 
 /*
+ * It would be nice to have these present only in DEBUG systems, but the
+ * current design of the global symbol checking logic requires them to be
+ * unconditionally present.
+ */
+uint_t ip_thread_data;			/* TSD key for debug support */
+krwlock_t ip_thread_rwlock;
+list_t	ip_thread_list;
+
+/*
  * Structure to represent a linked list of msgblks. Used by ip_snmp_ functions.
  */
 
@@ -5757,6 +5766,12 @@
 	ip_ire_g_fini();
 	inet_minor_destroy(ip_minor_arena);
 
+#ifdef DEBUG
+	list_destroy(&ip_thread_list);
+	rw_destroy(&ip_thread_rwlock);
+	tsd_destroy(&ip_thread_data);
+#endif
+
 	netstack_unregister(NS_IP);
 }
 
@@ -5888,6 +5903,23 @@
 }
 
 /*
+ * This function is called from the TSD destructor, and is used to debug
+ * reference count issues in IP. See block comment in <inet/ip_if.h> for
+ * details.
+ */
+static void
+ip_thread_exit(void *phash)
+{
+	th_hash_t *thh = phash;
+
+	rw_enter(&ip_thread_rwlock, RW_WRITER);
+	list_remove(&ip_thread_list, thh);
+	rw_exit(&ip_thread_rwlock);
+	mod_hash_destroy_hash(thh->thh_hash);
+	kmem_free(thh, sizeof (*thh));
+}
+
+/*
  * Called when the IP kernel module is loaded into the kernel
  */
 void
@@ -5916,9 +5948,11 @@
 	ip_ire_g_init();
 	ip_net_g_init();
 
-#ifdef ILL_DEBUG
-	/* Default cleanup function */
-	ip_cleanup_func = ip_thread_exit;
+#ifdef DEBUG
+	tsd_create(&ip_thread_data, ip_thread_exit);
+	rw_init(&ip_thread_rwlock, NULL, RW_DEFAULT, NULL);
+	list_create(&ip_thread_list, sizeof (th_hash_t),
+	    offsetof(th_hash_t, thh_link));
 #endif
 
 	/*
--- a/usr/src/uts/common/inet/ip/ip_if.c	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip_if.c	Tue Sep 11 04:26:06 2007 -0700
@@ -45,6 +45,7 @@
 #include <sys/zone.h>
 #include <sys/sunldi.h>
 #include <sys/file.h>
+#include <sys/bitmap.h>
 
 #include <sys/kmem.h>
 #include <sys/systm.h>
@@ -255,6 +256,11 @@
 
 static void	conn_cleanup_stale_ire(conn_t *, caddr_t);
 
+#ifdef DEBUG
+static	void	ill_trace_cleanup(const ill_t *);
+static	void	ipif_trace_cleanup(const ipif_t *);
+#endif
+
 /*
  * if we go over the memory footprint limit more than once in this msec
  * interval, we'll start pruning aggressively.
@@ -937,11 +943,14 @@
 	} while (mpp++ != &ill->ill_last_mp_to_free);
 
 	ill_free_mib(ill);
+
+#ifdef DEBUG
+	ill_trace_cleanup(ill);
+#endif
+
 	/* Drop refcnt here */
 	netstack_rele(ill->ill_ipst->ips_netstack);
 	ill->ill_ipst = NULL;
-
-	ILL_TRACE_CLEANUP(ill);
 }
 
 static void
@@ -4654,8 +4663,9 @@
 	ipsq->ipsq_writer = curthread;
 	ipsq->ipsq_reentry_cnt = 1;
 	ipsq->ipsq_ipst = ill->ill_ipst;	/* No netstack_hold */
-#ifdef ILL_DEBUG
-	ipsq->ipsq_depth = getpcstack((pc_t *)ipsq->ipsq_stack, IP_STACK_DEPTH);
+#ifdef DEBUG
+	ipsq->ipsq_depth = getpcstack((pc_t *)ipsq->ipsq_stack,
+	    IPSQ_STACK_DEPTH);
 #endif
 	(void) strcpy(ipsq->ipsq_name, ill->ill_name);
 	return (B_TRUE);
@@ -5035,7 +5045,7 @@
 	ill->ill_phyint->phyint_ipsq->ipsq_writer = NULL;
 	ill->ill_phyint->phyint_ipsq->ipsq_reentry_cnt--;
 	ASSERT(ill->ill_phyint->phyint_ipsq->ipsq_reentry_cnt == 0);
-#ifdef ILL_DEBUG
+#ifdef DEBUG
 	ill->ill_phyint->phyint_ipsq->ipsq_depth = 0;
 #endif
 	ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE);
@@ -6441,9 +6451,9 @@
 	}
 }
 
-#ifdef ILL_DEBUG
+#ifdef DEBUG
 /* Reuse trace buffer from beginning (if reached the end) and record trace */
-void
+static void
 th_trace_rrecord(th_trace_t *th_trace)
 {
 	tr_buf_t *tr_buf;
@@ -6455,301 +6465,226 @@
 		lastref = 0;
 	th_trace->th_trace_lastref = lastref;
 	tr_buf = &th_trace->th_trbuf[lastref];
-	tr_buf->tr_depth = getpcstack(tr_buf->tr_stack, IP_STACK_DEPTH);
-}
-
-th_trace_t *
-th_trace_ipif_lookup(ipif_t *ipif)
-{
-	int bucket_id;
-	th_trace_t *th_trace;
-
-	ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock));
-
-	bucket_id = IP_TR_HASH(curthread);
-	ASSERT(bucket_id < IP_TR_HASH_MAX);
-
-	for (th_trace = ipif->ipif_trace[bucket_id]; th_trace != NULL;
-	    th_trace = th_trace->th_next) {
-		if (th_trace->th_id == curthread)
-			return (th_trace);
-	}
-	return (NULL);
-}
-
-void
-ipif_trace_ref(ipif_t *ipif)
-{
-	int bucket_id;
-	th_trace_t *th_trace;
-
-	ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock));
-
-	if (ipif->ipif_trace_disable)
-		return;
-
-	/*
-	 * Attempt to locate the trace buffer for the curthread.
-	 * If it does not exist, then allocate a new trace buffer
-	 * and link it in list of trace bufs for this ipif, at the head
-	 */
-	th_trace = th_trace_ipif_lookup(ipif);
-	if (th_trace == NULL) {
-		bucket_id = IP_TR_HASH(curthread);
-		th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t),
-		    KM_NOSLEEP);
-		if (th_trace == NULL) {
-			ipif->ipif_trace_disable = B_TRUE;
-			ipif_trace_cleanup(ipif);
-			return;
-		}
-		th_trace->th_id = curthread;
-		th_trace->th_next = ipif->ipif_trace[bucket_id];
-		th_trace->th_prev = &ipif->ipif_trace[bucket_id];
-		if (th_trace->th_next != NULL)
-			th_trace->th_next->th_prev = &th_trace->th_next;
-		ipif->ipif_trace[bucket_id] = th_trace;
-	}
-	ASSERT(th_trace->th_refcnt >= 0 &&
-	    th_trace->th_refcnt < TR_BUF_MAX -1);
-	th_trace->th_refcnt++;
-	th_trace_rrecord(th_trace);
-}
-
-void
-ipif_untrace_ref(ipif_t *ipif)
+	tr_buf->tr_time = lbolt;
+	tr_buf->tr_depth = getpcstack(tr_buf->tr_stack, TR_STACK_DEPTH);
+}
+
+static void
+th_trace_free(void *value)
+{
+	th_trace_t *th_trace = value;
+
+	ASSERT(th_trace->th_refcnt == 0);
+	kmem_free(th_trace, sizeof (*th_trace));
+}
+
+/*
+ * Find or create the per-thread hash table used to track object references.
+ * The ipst argument is NULL if we shouldn't allocate.
+ *
+ * Accesses per-thread data, so there's no need to lock here.
+ */
+static mod_hash_t *
+th_trace_gethash(ip_stack_t *ipst)
+{
+	th_hash_t *thh;
+
+	if ((thh = tsd_get(ip_thread_data)) == NULL && ipst != NULL) {
+		mod_hash_t *mh;
+		char name[256];
+		size_t objsize, rshift;
+		int retv;
+
+		if ((thh = kmem_alloc(sizeof (*thh), KM_NOSLEEP)) == NULL)
+			return (NULL);
+		(void) snprintf(name, sizeof (name), "th_trace_%p", curthread);
+
+		/*
+		 * We use mod_hash_create_extended here rather than the more
+		 * obvious mod_hash_create_ptrhash because the latter has a
+		 * hard-coded KM_SLEEP, and we'd prefer to fail rather than
+		 * block.
+		 */
+		objsize = MAX(MAX(sizeof (ill_t), sizeof (ipif_t)),
+		    MAX(sizeof (ire_t), sizeof (nce_t)));
+		rshift = highbit(objsize);
+		mh = mod_hash_create_extended(name, 64, mod_hash_null_keydtor,
+		    th_trace_free, mod_hash_byptr, (void *)rshift,
+		    mod_hash_ptrkey_cmp, KM_NOSLEEP);
+		if (mh == NULL) {
+			kmem_free(thh, sizeof (*thh));
+			return (NULL);
+		}
+		thh->thh_hash = mh;
+		thh->thh_ipst = ipst;
+		/*
+		 * We trace ills, ipifs, ires, and nces.  All of these are
+		 * per-IP-stack, so the lock on the thread list is as well.
+		 */
+		rw_enter(&ip_thread_rwlock, RW_WRITER);
+		list_insert_tail(&ip_thread_list, thh);
+		rw_exit(&ip_thread_rwlock);
+		retv = tsd_set(ip_thread_data, thh);
+		ASSERT(retv == 0);
+	}
+	return (thh != NULL ? thh->thh_hash : NULL);
+}
+
+boolean_t
+th_trace_ref(const void *obj, ip_stack_t *ipst)
 {
 	th_trace_t *th_trace;
-
-	ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock));
-
-	if (ipif->ipif_trace_disable)
-		return;
-	th_trace = th_trace_ipif_lookup(ipif);
-	ASSERT(th_trace != NULL);
-	ASSERT(th_trace->th_refcnt > 0);
-
-	th_trace->th_refcnt--;
-	th_trace_rrecord(th_trace);
-}
-
-th_trace_t *
-th_trace_ill_lookup(ill_t *ill)
-{
-	th_trace_t *th_trace;
-	int bucket_id;
-
-	ASSERT(MUTEX_HELD(&ill->ill_lock));
-
-	bucket_id = IP_TR_HASH(curthread);
-	ASSERT(bucket_id < IP_TR_HASH_MAX);
-
-	for (th_trace = ill->ill_trace[bucket_id]; th_trace != NULL;
-	    th_trace = th_trace->th_next) {
-		if (th_trace->th_id == curthread)
-			return (th_trace);
-	}
-	return (NULL);
-}
-
-void
-ill_trace_ref(ill_t *ill)
-{
-	int bucket_id;
-	th_trace_t *th_trace;
-
-	ASSERT(MUTEX_HELD(&ill->ill_lock));
-	if (ill->ill_trace_disable)
-		return;
-	/*
-	 * Attempt to locate the trace buffer for the curthread.
-	 * If it does not exist, then allocate a new trace buffer
-	 * and link it in list of trace bufs for this ill, at the head
-	 */
-	th_trace = th_trace_ill_lookup(ill);
-	if (th_trace == NULL) {
-		bucket_id = IP_TR_HASH(curthread);
-		th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t),
-		    KM_NOSLEEP);
-		if (th_trace == NULL) {
-			ill->ill_trace_disable = B_TRUE;
-			ill_trace_cleanup(ill);
-			return;
-		}
+	mod_hash_t *mh;
+	mod_hash_val_t val;
+
+	if ((mh = th_trace_gethash(ipst)) == NULL)
+		return (B_FALSE);
+
+	/*
+	 * Attempt to locate the trace buffer for this obj and thread.
+	 * If it does not exist, then allocate a new trace buffer and
+	 * insert into the hash.
+	 */
+	if (mod_hash_find(mh, (mod_hash_key_t)obj, &val) == MH_ERR_NOTFOUND) {
+		th_trace = kmem_zalloc(sizeof (th_trace_t), KM_NOSLEEP);
+		if (th_trace == NULL)
+			return (B_FALSE);
+
 		th_trace->th_id = curthread;
-		th_trace->th_next = ill->ill_trace[bucket_id];
-		th_trace->th_prev = &ill->ill_trace[bucket_id];
-		if (th_trace->th_next != NULL)
-			th_trace->th_next->th_prev = &th_trace->th_next;
-		ill->ill_trace[bucket_id] = th_trace;
-	}
+		if (mod_hash_insert(mh, (mod_hash_key_t)obj,
+		    (mod_hash_val_t)th_trace) != 0) {
+			kmem_free(th_trace, sizeof (th_trace_t));
+			return (B_FALSE);
+		}
+	} else {
+		th_trace = (th_trace_t *)val;
+	}
+
 	ASSERT(th_trace->th_refcnt >= 0 &&
 	    th_trace->th_refcnt < TR_BUF_MAX - 1);
 
 	th_trace->th_refcnt++;
 	th_trace_rrecord(th_trace);
+	return (B_TRUE);
+}
+
+/*
+ * For the purpose of tracing a reference release, we assume that global
+ * tracing is always on and that the same thread initiated the reference hold
+ * is releasing.
+ */
+void
+th_trace_unref(const void *obj)
+{
+	int retv;
+	mod_hash_t *mh;
+	th_trace_t *th_trace;
+	mod_hash_val_t val;
+
+	mh = th_trace_gethash(NULL);
+	retv = mod_hash_find(mh, (mod_hash_key_t)obj, &val);
+	ASSERT(retv == 0);
+	th_trace = (th_trace_t *)val;
+
+	ASSERT(th_trace->th_refcnt > 0);
+	th_trace->th_refcnt--;
+	th_trace_rrecord(th_trace);
+}
+
+/*
+ * If tracing has been disabled, then we assume that the reference counts are
+ * now useless, and we clear them out before destroying the entries.
+ */
+void
+th_trace_cleanup(const void *obj, boolean_t trace_disable)
+{
+	th_hash_t	*thh;
+	mod_hash_t	*mh;
+	mod_hash_val_t	val;
+	th_trace_t	*th_trace;
+	int		retv;
+
+	rw_enter(&ip_thread_rwlock, RW_READER);
+	for (thh = list_head(&ip_thread_list); thh != NULL;
+	    thh = list_next(&ip_thread_list, thh)) {
+		if (mod_hash_find(mh = thh->thh_hash, (mod_hash_key_t)obj,
+		    &val) == 0) {
+			th_trace = (th_trace_t *)val;
+			if (trace_disable)
+				th_trace->th_refcnt = 0;
+			retv = mod_hash_destroy(mh, (mod_hash_key_t)obj);
+			ASSERT(retv == 0);
+		}
+	}
+	rw_exit(&ip_thread_rwlock);
+}
+
+void
+ipif_trace_ref(ipif_t *ipif)
+{
+	ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock));
+
+	if (ipif->ipif_trace_disable)
+		return;
+
+	if (!th_trace_ref(ipif, ipif->ipif_ill->ill_ipst)) {
+		ipif->ipif_trace_disable = B_TRUE;
+		ipif_trace_cleanup(ipif);
+	}
+}
+
+void
+ipif_untrace_ref(ipif_t *ipif)
+{
+	ASSERT(MUTEX_HELD(&ipif->ipif_ill->ill_lock));
+
+	if (!ipif->ipif_trace_disable)
+		th_trace_unref(ipif);
+}
+
+void
+ill_trace_ref(ill_t *ill)
+{
+	ASSERT(MUTEX_HELD(&ill->ill_lock));
+
+	if (ill->ill_trace_disable)
+		return;
+
+	if (!th_trace_ref(ill, ill->ill_ipst)) {
+		ill->ill_trace_disable = B_TRUE;
+		ill_trace_cleanup(ill);
+	}
 }
 
 void
 ill_untrace_ref(ill_t *ill)
 {
-	th_trace_t *th_trace;
-
 	ASSERT(MUTEX_HELD(&ill->ill_lock));
 
-	if (ill->ill_trace_disable)
-		return;
-	th_trace = th_trace_ill_lookup(ill);
-	ASSERT(th_trace != NULL);
-	ASSERT(th_trace->th_refcnt > 0);
-
-	th_trace->th_refcnt--;
-	th_trace_rrecord(th_trace);
-}
-
-/*
- * Verify that this thread has no refs to the ipif and free
- * the trace buffers
- */
-/* ARGSUSED */
-void
-ipif_thread_exit(ipif_t *ipif, void *dummy)
-{
-	th_trace_t *th_trace;
-
-	mutex_enter(&ipif->ipif_ill->ill_lock);
-
-	th_trace = th_trace_ipif_lookup(ipif);
-	if (th_trace == NULL) {
-		mutex_exit(&ipif->ipif_ill->ill_lock);
-		return;
-	}
-	ASSERT(th_trace->th_refcnt == 0);
-	/* unlink th_trace and free it */
-	*th_trace->th_prev = th_trace->th_next;
-	if (th_trace->th_next != NULL)
-		th_trace->th_next->th_prev = th_trace->th_prev;
-	th_trace->th_next = NULL;
-	th_trace->th_prev = NULL;
-	kmem_free(th_trace, sizeof (th_trace_t));
-
-	mutex_exit(&ipif->ipif_ill->ill_lock);
-}
-
-/*
- * Verify that this thread has no refs to the ill and free
- * the trace buffers
- */
-/* ARGSUSED */
-void
-ill_thread_exit(ill_t *ill, void *dummy)
-{
-	th_trace_t *th_trace;
-
-	mutex_enter(&ill->ill_lock);
-
-	th_trace = th_trace_ill_lookup(ill);
-	if (th_trace == NULL) {
-		mutex_exit(&ill->ill_lock);
-		return;
-	}
-	ASSERT(th_trace->th_refcnt == 0);
-	/* unlink th_trace and free it */
-	*th_trace->th_prev = th_trace->th_next;
-	if (th_trace->th_next != NULL)
-		th_trace->th_next->th_prev = th_trace->th_prev;
-	th_trace->th_next = NULL;
-	th_trace->th_prev = NULL;
-	kmem_free(th_trace, sizeof (th_trace_t));
-
-	mutex_exit(&ill->ill_lock);
-}
-#endif
-
-#ifdef ILL_DEBUG
-void
-ip_thread_exit_stack(ip_stack_t *ipst)
-{
-	ill_t	*ill;
-	ipif_t	*ipif;
-	ill_walk_context_t	ctx;
-
-	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
-	ill = ILL_START_WALK_ALL(&ctx, ipst);
-	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
-		for (ipif = ill->ill_ipif; ipif != NULL;
-		    ipif = ipif->ipif_next) {
-			ipif_thread_exit(ipif, NULL);
-		}
-		ill_thread_exit(ill, NULL);
-	}
-	rw_exit(&ipst->ips_ill_g_lock);
-
-	ire_walk(ire_thread_exit, NULL, ipst);
-	ndp_walk_common(ipst->ips_ndp4, NULL, nce_thread_exit, NULL, B_FALSE);
-	ndp_walk_common(ipst->ips_ndp6, NULL, nce_thread_exit, NULL, B_FALSE);
-}
-
-/*
- * This is a function which is called from thread_exit
- * that can be used to debug reference count issues in IP. See comment in
- * <inet/ip.h> on how it is used.
- */
-void
-ip_thread_exit(void)
-{
-	netstack_t *ns;
-
-	ns = netstack_get_current();
-	if (ns != NULL) {
-		ip_thread_exit_stack(ns->netstack_ip);
-		netstack_rele(ns);
-	}
-}
-
-/*
- * Called when ipif is unplumbed or when memory alloc fails
- */
-void
-ipif_trace_cleanup(ipif_t *ipif)
-{
-	int	i;
-	th_trace_t	*th_trace;
-	th_trace_t	*th_trace_next;
-
-	for (i = 0; i < IP_TR_HASH_MAX; i++) {
-		for (th_trace = ipif->ipif_trace[i]; th_trace != NULL;
-		    th_trace = th_trace_next) {
-			th_trace_next = th_trace->th_next;
-			kmem_free(th_trace, sizeof (th_trace_t));
-		}
-		ipif->ipif_trace[i] = NULL;
-	}
-}
-
-/*
- * Called when ill is unplumbed or when memory alloc fails
- */
-void
-ill_trace_cleanup(ill_t *ill)
-{
-	int	i;
-	th_trace_t	*th_trace;
-	th_trace_t	*th_trace_next;
-
-	for (i = 0; i < IP_TR_HASH_MAX; i++) {
-		for (th_trace = ill->ill_trace[i]; th_trace != NULL;
-		    th_trace = th_trace_next) {
-			th_trace_next = th_trace->th_next;
-			kmem_free(th_trace, sizeof (th_trace_t));
-		}
-		ill->ill_trace[i] = NULL;
-	}
-}
-
-#else
-void ip_thread_exit(void) {}
-#endif
+	if (!ill->ill_trace_disable)
+		th_trace_unref(ill);
+}
+
+/*
+ * Called when ipif is unplumbed or when memory alloc fails.  Note that on
+ * failure, ipif_trace_disable is set.
+ */
+static void
+ipif_trace_cleanup(const ipif_t *ipif)
+{
+	th_trace_cleanup(ipif, ipif->ipif_trace_disable);
+}
+
+/*
+ * Called when ill is unplumbed or when memory alloc fails.  Note that on
+ * failure, ill_trace_disable is set.
+ */
+static void
+ill_trace_cleanup(const ill_t *ill)
+{
+	th_trace_cleanup(ill, ill->ill_trace_disable);
+}
+#endif /* DEBUG */
 
 void
 ipif_refhold_locked(ipif_t *ipif)
@@ -7732,8 +7667,8 @@
 	ASSERT(ipsq->ipsq_reentry_cnt == 0);
 	ipsq->ipsq_writer = curthread;
 	ipsq->ipsq_reentry_cnt++;
-#ifdef ILL_DEBUG
-	ipsq->ipsq_depth = getpcstack(ipsq->ipsq_stack, IP_STACK_DEPTH);
+#ifdef DEBUG
+	ipsq->ipsq_depth = getpcstack(ipsq->ipsq_stack, IPSQ_STACK_DEPTH);
 #endif
 	mutex_exit(&ipsq->ipsq_lock);
 	mutex_exit(&ill->ill_lock);
@@ -7816,8 +7751,9 @@
 		mutex_exit(&ipsq->ipsq_lock);
 		mutex_exit(&ill->ill_lock);
 		RELEASE_CONN_LOCK(q);
-#ifdef ILL_DEBUG
-		ipsq->ipsq_depth = getpcstack(ipsq->ipsq_stack, IP_STACK_DEPTH);
+#ifdef DEBUG
+		ipsq->ipsq_depth = getpcstack(ipsq->ipsq_stack,
+		    IPSQ_STACK_DEPTH);
 #endif
 		return (ipsq);
 	}
@@ -7977,7 +7913,7 @@
 	ipsq->ipsq_writer = NULL;
 	ipsq->ipsq_reentry_cnt--;
 	ASSERT(ipsq->ipsq_reentry_cnt == 0);
-#ifdef ILL_DEBUG
+#ifdef DEBUG
 	ipsq->ipsq_depth = 0;
 #endif
 	mutex_exit(&ipsq->ipsq_lock);
@@ -14681,7 +14617,7 @@
 	newipsq->ipsq_writer = NULL;
 	newipsq->ipsq_reentry_cnt--;
 	ASSERT(newipsq->ipsq_reentry_cnt == 0);
-#ifdef ILL_DEBUG
+#ifdef DEBUG
 	newipsq->ipsq_depth = 0;
 #endif
 
@@ -17811,7 +17747,9 @@
 		}
 		ip_rts_ifmsg(rep_ipif_ptr);
 		ip_rts_newaddrmsg(RTM_DELETE, 0, rep_ipif_ptr);
-		IPIF_TRACE_CLEANUP(rep_ipif_ptr);
+#ifdef DEBUG
+		ipif_trace_cleanup(rep_ipif_ptr);
+#endif
 		mi_free(rep_ipif_ptr);
 	}
 
@@ -19117,7 +19055,9 @@
 	 */
 	ASSERT(ilm_walk_ipif(ipif) == 0);
 
-	IPIF_TRACE_CLEANUP(ipif);
+#ifdef DEBUG
+	ipif_trace_cleanup(ipif);
+#endif
 
 	/* Ask SCTP to take it out of it list */
 	sctp_update_ipif(ipif, SCTP_IPIF_REMOVE);
--- a/usr/src/uts/common/inet/ip/ip_ire.c	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip_ire.c	Tue Sep 11 04:26:06 2007 -0700
@@ -34,7 +34,6 @@
 #include <sys/types.h>
 #include <sys/stream.h>
 #include <sys/stropts.h>
-#include <sys/strsun.h>
 #include <sys/ddi.h>
 #include <sys/cmn_err.h>
 #include <sys/policy.h>
@@ -73,7 +72,6 @@
 
 #include <sys/tsol/label.h>
 #include <sys/tsol/tnet.h>
-#include <sys/dlpi.h>
 
 struct kmem_cache *rt_entry_cache;
 
@@ -355,11 +353,9 @@
 static void	ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type,
     pfv_t func, void *arg, uchar_t vers, ill_t *ill);
 static void	ire_cache_cleanup(irb_t *irb, uint32_t threshold, int cnt);
-extern void	ill_unlock_ills(ill_t **list, int cnt);
 static	void	ip_nce_clookup_and_delete(nce_t *nce, void *arg);
-extern void	th_trace_rrecord(th_trace_t *);
-#ifdef IRE_DEBUG
-static void	ire_trace_inactive(ire_t *);
+#ifdef DEBUG
+static void	ire_trace_cleanup(const ire_t *);
 #endif
 
 /*
@@ -1759,10 +1755,7 @@
 	}
 	ire->ire_refcnt = 1;
 	ire->ire_ipst = ipst;	/* No netstack_hold */
-
-#ifdef IRE_DEBUG
-	bzero(ire->ire_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX);
-#endif
+	ire->ire_trace_disable = B_FALSE;
 
 	return (B_TRUE);
 }
@@ -3792,8 +3785,8 @@
 	}
 	ire->ire_ipif = NULL;
 
-#ifdef IRE_DEBUG
-	ire_trace_inactive(ire);
+#ifdef DEBUG
+	ire_trace_cleanup(ire);
 #endif
 	mutex_destroy(&ire->ire_lock);
 	if (ire->ire_ipversion == IPV6_VERSION) {
@@ -5431,147 +5424,40 @@
 	return (NULL);
 }
 
-#ifdef IRE_DEBUG
-th_trace_t *
-th_trace_ire_lookup(ire_t *ire)
-{
-	int bucket_id;
-	th_trace_t *th_trace;
-
-	ASSERT(MUTEX_HELD(&ire->ire_lock));
-
-	bucket_id = IP_TR_HASH(curthread);
-	ASSERT(bucket_id < IP_TR_HASH_MAX);
-
-	for (th_trace = ire->ire_trace[bucket_id]; th_trace != NULL;
-	    th_trace = th_trace->th_next) {
-		if (th_trace->th_id == curthread)
-			return (th_trace);
-	}
-	return (NULL);
-}
-
+#ifdef DEBUG
 void
 ire_trace_ref(ire_t *ire)
 {
-	int bucket_id;
-	th_trace_t *th_trace;
-
-	/*
-	 * Attempt to locate the trace buffer for the curthread.
-	 * If it does not exist, then allocate a new trace buffer
-	 * and link it in list of trace bufs for this ipif, at the head
-	 */
 	mutex_enter(&ire->ire_lock);
-	if (ire->ire_trace_disable == B_TRUE) {
+	if (ire->ire_trace_disable) {
 		mutex_exit(&ire->ire_lock);
 		return;
 	}
-	th_trace = th_trace_ire_lookup(ire);
-	if (th_trace == NULL) {
-		bucket_id = IP_TR_HASH(curthread);
-		th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t),
-		    KM_NOSLEEP);
-		if (th_trace == NULL) {
-			ire->ire_trace_disable = B_TRUE;
-			mutex_exit(&ire->ire_lock);
-			ire_trace_inactive(ire);
-			return;
-		}
-
-		th_trace->th_id = curthread;
-		th_trace->th_next = ire->ire_trace[bucket_id];
-		th_trace->th_prev = &ire->ire_trace[bucket_id];
-		if (th_trace->th_next != NULL)
-			th_trace->th_next->th_prev = &th_trace->th_next;
-		ire->ire_trace[bucket_id] = th_trace;
+
+	if (th_trace_ref(ire, ire->ire_ipst)) {
+		mutex_exit(&ire->ire_lock);
+	} else {
+		ire->ire_trace_disable = B_TRUE;
+		mutex_exit(&ire->ire_lock);
+		ire_trace_cleanup(ire);
 	}
-	ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1);
-	th_trace->th_refcnt++;
-	th_trace_rrecord(th_trace);
-	mutex_exit(&ire->ire_lock);
-}
-
-void
-ire_trace_free(th_trace_t *th_trace)
-{
-	/* unlink th_trace and free it */
-	*th_trace->th_prev = th_trace->th_next;
-	if (th_trace->th_next != NULL)
-		th_trace->th_next->th_prev = th_trace->th_prev;
-	th_trace->th_next = NULL;
-	th_trace->th_prev = NULL;
-	kmem_free(th_trace, sizeof (th_trace_t));
 }
 
 void
 ire_untrace_ref(ire_t *ire)
 {
-	th_trace_t *th_trace;
-
 	mutex_enter(&ire->ire_lock);
-
-	if (ire->ire_trace_disable == B_TRUE) {
-		mutex_exit(&ire->ire_lock);
-		return;
-	}
-
-	th_trace = th_trace_ire_lookup(ire);
-	ASSERT(th_trace != NULL && th_trace->th_refcnt > 0);
-	th_trace_rrecord(th_trace);
-	th_trace->th_refcnt--;
-
-	if (th_trace->th_refcnt == 0)
-		ire_trace_free(th_trace);
-
+	if (!ire->ire_trace_disable)
+		th_trace_unref(ire);
 	mutex_exit(&ire->ire_lock);
 }
 
 static void
-ire_trace_inactive(ire_t *ire)
+ire_trace_cleanup(const ire_t *ire)
 {
-	th_trace_t *th_trace;
-	int i;
-
-	mutex_enter(&ire->ire_lock);
-	for (i = 0; i < IP_TR_HASH_MAX; i++) {
-		while (ire->ire_trace[i] != NULL) {
-			th_trace = ire->ire_trace[i];
-
-			/* unlink th_trace and free it */
-			ire->ire_trace[i] = th_trace->th_next;
-			if (th_trace->th_next != NULL)
-				th_trace->th_next->th_prev =
-				    &ire->ire_trace[i];
-
-			th_trace->th_next = NULL;
-			th_trace->th_prev = NULL;
-			kmem_free(th_trace, sizeof (th_trace_t));
-		}
-	}
-
-	mutex_exit(&ire->ire_lock);
+	th_trace_cleanup(ire, ire->ire_trace_disable);
 }
-
-/* ARGSUSED */
-void
-ire_thread_exit(ire_t *ire, caddr_t arg)
-{
-	th_trace_t	*th_trace;
-
-	mutex_enter(&ire->ire_lock);
-	th_trace = th_trace_ire_lookup(ire);
-	if (th_trace == NULL) {
-		mutex_exit(&ire->ire_lock);
-		return;
-	}
-	ASSERT(th_trace->th_refcnt == 0);
-
-	ire_trace_free(th_trace);
-	mutex_exit(&ire->ire_lock);
-}
-
-#endif
+#endif /* DEBUG */
 
 /*
  * Generate a message chain with an arp request to resolve the in_ire.
--- a/usr/src/uts/common/inet/ip/ip_ndp.c	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip_ndp.c	Tue Sep 11 04:26:06 2007 -0700
@@ -99,7 +99,6 @@
 static	boolean_t	nce_xmit(ill_t *ill, uint32_t operation,
     ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender,
     const in6_addr_t *target, int flag);
-extern void	th_trace_rrecord(th_trace_t *);
 static int	ndp_add_v4(ill_t *, const in_addr_t *, uint16_t,
     nce_t **, nce_t *);
 
@@ -107,15 +106,13 @@
  * We track the time of creation of the nce in the  nce_init_time field
  * of IPv4 nce_t entries. If an nce is stuck in the ND_INITIAL state for
  * more than NCE_STUCK_TIMEOUT milliseconds, trigger the nce-stuck dtrace
- * probe to assist in debugging. This probe will be fired from
- * nce_thread_exit() for debug kernels, and from nce_report1() when
- * 'ndd -get /dev/ip ip_ndp_cache_report' is invoked on both debug and
- * non-debug kernels.
+ * probe to assist in debugging. This probe is fired from from nce_report1()
+ * when 'ndd -get /dev/ip ip_ndp_cache_report' is invoked.
  */
 #define	NCE_STUCK_TIMEOUT	120000
 
-#ifdef NCE_DEBUG
-void	nce_trace_inactive(nce_t *);
+#ifdef DEBUG
+static void	nce_trace_cleanup(const nce_t *);
 #endif
 
 #define	NCE_HASH_PTR_V4(ipst, addr)					\
@@ -241,9 +238,8 @@
 		ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
 	}
 
-#ifdef NCE_DEBUG
-	bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX);
-#endif
+	nce->nce_trace_disable = B_FALSE;
+
 	/*
 	 * Atomically ensure that the ill is not CONDEMNED, before
 	 * adding the NCE.
@@ -497,8 +493,8 @@
 		}
 	} while (mpp++ != &nce->nce_last_mp_to_free);
 
-#ifdef NCE_DEBUG
-	nce_trace_inactive(nce);
+#ifdef DEBUG
+	nce_trace_cleanup(nce);
 #endif
 
 	ill = nce->nce_ill;
@@ -3517,140 +3513,34 @@
 		ncc->ncc_host++;
 }
 
-#ifdef NCE_DEBUG
-th_trace_t *
-th_trace_nce_lookup(nce_t *nce)
-{
-	int bucket_id;
-	th_trace_t *th_trace;
-
-	ASSERT(MUTEX_HELD(&nce->nce_lock));
-
-	bucket_id = IP_TR_HASH(curthread);
-	ASSERT(bucket_id < IP_TR_HASH_MAX);
-
-	for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL;
-	    th_trace = th_trace->th_next) {
-		if (th_trace->th_id == curthread)
-			return (th_trace);
-	}
-	return (NULL);
-}
-
+#ifdef DEBUG
 void
 nce_trace_ref(nce_t *nce)
 {
-	int bucket_id;
-	th_trace_t *th_trace;
-
-	/*
-	 * Attempt to locate the trace buffer for the curthread.
-	 * If it does not exist, then allocate a new trace buffer
-	 * and link it in list of trace bufs for this ipif, at the head
-	 */
 	ASSERT(MUTEX_HELD(&nce->nce_lock));
 
-	if (nce->nce_trace_disable == B_TRUE)
+	if (nce->nce_trace_disable)
 		return;
 
-	th_trace = th_trace_nce_lookup(nce);
-	if (th_trace == NULL) {
-		bucket_id = IP_TR_HASH(curthread);
-		th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t),
-		    KM_NOSLEEP);
-		if (th_trace == NULL) {
-			nce->nce_trace_disable = B_TRUE;
-			nce_trace_inactive(nce);
-			return;
-		}
-		th_trace->th_id = curthread;
-		th_trace->th_next = nce->nce_trace[bucket_id];
-		th_trace->th_prev = &nce->nce_trace[bucket_id];
-		if (th_trace->th_next != NULL)
-			th_trace->th_next->th_prev = &th_trace->th_next;
-		nce->nce_trace[bucket_id] = th_trace;
+	if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) {
+		nce->nce_trace_disable = B_TRUE;
+		nce_trace_cleanup(nce);
 	}
-	ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1);
-	th_trace->th_refcnt++;
-	th_trace_rrecord(th_trace);
 }
 
 void
 nce_untrace_ref(nce_t *nce)
 {
-	th_trace_t *th_trace;
-
 	ASSERT(MUTEX_HELD(&nce->nce_lock));
 
-	if (nce->nce_trace_disable == B_TRUE)
-		return;
-
-	th_trace = th_trace_nce_lookup(nce);
-	ASSERT(th_trace != NULL && th_trace->th_refcnt > 0);
-
-	th_trace_rrecord(th_trace);
-	th_trace->th_refcnt--;
+	if (!nce->nce_trace_disable)
+		th_trace_unref(nce);
 }
 
-void
-nce_trace_inactive(nce_t *nce)
+static void
+nce_trace_cleanup(const nce_t *nce)
 {
-	th_trace_t *th_trace;
-	int i;
-
-	ASSERT(MUTEX_HELD(&nce->nce_lock));
-
-	for (i = 0; i < IP_TR_HASH_MAX; i++) {
-		while (nce->nce_trace[i] != NULL) {
-			th_trace = nce->nce_trace[i];
-
-			/* unlink th_trace and free it */
-			nce->nce_trace[i] = th_trace->th_next;
-			if (th_trace->th_next != NULL)
-				th_trace->th_next->th_prev =
-				    &nce->nce_trace[i];
-
-			th_trace->th_next = NULL;
-			th_trace->th_prev = NULL;
-			kmem_free(th_trace, sizeof (th_trace_t));
-		}
-	}
-
-}
-
-/* ARGSUSED */
-int
-nce_thread_exit(nce_t *nce, caddr_t arg)
-{
-	th_trace_t	*th_trace;
-	uint64_t	now;
-
-	mutex_enter(&nce->nce_lock);
-	if (nce->nce_state == ND_INITIAL) {
-
-		now = TICK_TO_MSEC(lbolt64);
-		if (now - nce->nce_init_time > NCE_STUCK_TIMEOUT) {
-			DTRACE_PROBE1(nce__stuck, nce_t *, nce);
-		}
-	}
-	th_trace = th_trace_nce_lookup(nce);
-
-	if (th_trace == NULL) {
-		mutex_exit(&nce->nce_lock);
-		return (0);
-	}
-
-	ASSERT(th_trace->th_refcnt == 0);
-
-	/* unlink th_trace and free it */
-	*th_trace->th_prev = th_trace->th_next;
-	if (th_trace->th_next != NULL)
-		th_trace->th_next->th_prev = th_trace->th_prev;
-	th_trace->th_next = NULL;
-	th_trace->th_prev = NULL;
-	kmem_free(th_trace, sizeof (th_trace_t));
-	mutex_exit(&nce->nce_lock);
-	return (0);
+	th_trace_cleanup(nce, nce->nce_trace_disable);
 }
 #endif
 
@@ -3767,9 +3657,8 @@
 	mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
 	ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
 
-#ifdef NCE_DEBUG
-	bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX);
-#endif
+	nce->nce_trace_disable = B_FALSE;
+
 	if (src_nce != NULL) {
 		/*
 		 * src_nce has been provided by the caller. The only
--- a/usr/src/uts/common/inet/ip/ipclassifier.c	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ipclassifier.c	Tue Sep 11 04:26:06 2007 -0700
@@ -2425,7 +2425,7 @@
 		last = 0;
 
 	ctb = &connp->conn_trace_buf[last];
-	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
+	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
 	connp->conn_trace_last = last;
 	return (1);
 }
@@ -2443,7 +2443,7 @@
 		last = 0;
 
 	ctb = &connp->conn_trace_buf[last];
-	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
+	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
 	connp->conn_trace_last = last;
 	return (1);
 }
--- a/usr/src/uts/common/inet/ip_if.h	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ip_if.h	Tue Sep 11 04:26:06 2007 -0700
@@ -471,9 +471,6 @@
 extern	void	conn_delete_ire(conn_t *, caddr_t);
 
 /*
- * This is a function which is called from thread_exit
- * that can be used to debug reference count issues in IP.
- *
  * Notes on reference tracing on ill, ipif, ire, nce data structures:
  *
  * The current model of references on an ipif or ill is purely based on threads
@@ -487,9 +484,11 @@
  *
  * As a debugging aid, the refhold and refrele functions call into tracing
  * functions that record the stack trace of the caller and the references
- * acquired or released by the calling thread, hashed by the thread id. On
- * thread exit, ipif_thread_exit and ill_thread_exit verify that there are no
- * outstanding references to the ipif or ill from the exiting thread.
+ * acquired or released by the calling thread, hashed by the structure address
+ * in thread-specific-data (TSD).  On thread exit, ip_thread_exit destroys the
+ * hash, and the destructor for the hash entries (th_trace_free) verifies that
+ * there are no outstanding references to the ipif or ill from the exiting
+ * thread.
  *
  * In the case of ires and nces, the model is slightly different. Typically each
  * ire pointing to an nce contributes to the nce_refcnt. Similarly a conn_t
@@ -500,17 +499,14 @@
  * ire_thread_exit, nce_thread_exit does the verification that are no
  * outstanding references on the ire / nce from the exiting thread.
  *
- * The reference verification is driven from thread_exit() which calls into IP
- * via a function pointer ip_cleanup_func into the verification function
- * ip_thread_exit. This debugging aid may be helpful in tracing missing
- * refrele's on a debug kernel. On a non-debug kernel, these missing refrele's
- * are noticeable only when an interface is being unplumbed, and the unplumb
- * hangs, long after the missing refrele. On a debug kernel, the traces
- * (th_trace_t) which contain the stack backtraces can be examined on a crash
- * dump to locate the missing refrele.
+ * The reference verification is driven from the TSD destructor which calls
+ * into IP's verification function ip_thread_exit. This debugging aid may be
+ * helpful in tracing missing refrele's on a debug kernel. On a non-debug
+ * kernel, these missing refrele's are noticeable only when an interface is
+ * being unplumbed, and the unplumb hangs, long after the missing refrele. On a
+ * debug kernel, the traces (th_trace_t) which contain the stack backtraces can
+ * be examined on a crash dump to locate the missing refrele.
  */
-extern void (*ip_cleanup_func)(void);
-extern void ip_thread_exit(void);
 
 #endif /* _KERNEL */
 
--- a/usr/src/uts/common/inet/ip_ndp.h	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ip_ndp.h	Tue Sep 11 04:26:06 2007 -0700
@@ -78,10 +78,7 @@
 	uint_t		nce_defense_count;	/* number of NDP conflicts */
 	uint_t		nce_defense_time;	/* last time defended (secs) */
 	uint64_t	nce_init_time;  /* time when it was set to ND_INITIAL */
-#ifdef NCE_DEBUG
-	th_trace_t	*nce_trace[IP_TR_HASH_MAX];
 	boolean_t	nce_trace_disable;	/* True when alloc fails */
-#endif
 } nce_t;
 
 /*
@@ -169,7 +166,7 @@
 #define	ND_MAX_Q		4
 
 
-#ifdef NCE_DEBUG
+#ifdef DEBUG
 #define	NCE_TRACE_REF(nce)		nce_trace_ref(nce)
 #define	NCE_UNTRACE_REF(nce)		nce_untrace_ref(nce)
 #else
@@ -357,11 +354,9 @@
 extern	int	ndp_lookup_then_add_v4(ill_t *,
     const in_addr_t *, uint16_t, nce_t **, nce_t *);
 
-#ifdef NCE_DEBUG
-extern	void	nce_trace_inactive(nce_t *);
+#ifdef DEBUG
 extern	void	nce_trace_ref(nce_t *);
 extern	void	nce_untrace_ref(nce_t *);
-extern	int	nce_thread_exit(nce_t *, caddr_t);
 #endif
 
 #endif	/* _KERNEL */
--- a/usr/src/uts/common/inet/ipclassifier.h	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/common/inet/ipclassifier.h	Tue Sep 11 04:26:06 2007 -0700
@@ -125,8 +125,8 @@
 typedef struct
 {
 	int	ctb_depth;
-#define	IP_STACK_DEPTH	15
-	pc_t	ctb_stack[IP_STACK_DEPTH];
+#define	CONN_STACK_DEPTH	15
+	pc_t	ctb_stack[CONN_STACK_DEPTH];
 } conn_trace_t;
 
 struct conn_s {
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64	Tue Sep 11 04:26:06 2007 -0700
@@ -126,6 +126,9 @@
 ip_squeue_profile
 ip_squeue_worker_wait
 ip_squeues_per_cpu
+ip_thread_data
+ip_thread_list
+ip_thread_rwlock
 ip_wput_frag_mdt_min
 ipcl_bind_fanout_size
 ipcl_conn_cache
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64	Tue Sep 11 04:26:06 2007 -0700
@@ -126,6 +126,9 @@
 ip_squeue_profile
 ip_squeue_worker_wait
 ip_squeues_per_cpu
+ip_thread_data
+ip_thread_list
+ip_thread_rwlock
 ip_wput_frag_mdt_min
 ipcl_bind_fanout_size
 ipcl_conn_cache
--- a/usr/src/uts/sparc/ip/Makefile	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/sparc/ip/Makefile	Tue Sep 11 04:26:06 2007 -0700
@@ -102,8 +102,6 @@
 clean.lint:	$(CLEAN_LINT_DEPS)
 
 install:	$(INSTALL_DEPS) $(SISCHECK_DEPS)
-	pwd;
-	echo "abc me";
 
 $(ROOTLINK):	$(ROOT_STRMOD_DIR) $(ROOTMODULE)
 	-$(RM) $@; ln $(ROOTMODULE) $@
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64	Tue Sep 11 04:26:06 2007 -0700
@@ -126,6 +126,9 @@
 ip_squeue_profile
 ip_squeue_worker_wait
 ip_squeues_per_cpu
+ip_thread_data
+ip_thread_list
+ip_thread_rwlock
 ip_wput_frag_mdt_min
 ipcl_bind_fanout_size
 ipcl_conn_cache
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64	Mon Sep 10 15:47:44 2007 -0700
+++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64	Tue Sep 11 04:26:06 2007 -0700
@@ -126,6 +126,9 @@
 ip_squeue_profile
 ip_squeue_worker_wait
 ip_squeues_per_cpu
+ip_thread_data
+ip_thread_list
+ip_thread_rwlock
 ip_wput_frag_mdt_min
 ipcl_bind_fanout_size
 ipcl_conn_cache