changeset 12881:fb36eaeb6ee0

6910078 SCTP should have similar memory handling mechanism as TCP
author Kacheong Poon <Kacheong.Poon@Sun.COM>
date Mon, 19 Jul 2010 17:27:45 -0700
parents 76234fce8dc5
children 5249b6635922
files usr/src/cmd/mdb/common/modules/sctp/sctp.c usr/src/uts/common/Makefile.files usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/sctp/sctp.c usr/src/uts/common/inet/sctp/sctp_asconf.c usr/src/uts/common/inet/sctp/sctp_bind.c usr/src/uts/common/inet/sctp/sctp_common.c usr/src/uts/common/inet/sctp/sctp_conn.c usr/src/uts/common/inet/sctp/sctp_cookie.c usr/src/uts/common/inet/sctp/sctp_error.c usr/src/uts/common/inet/sctp/sctp_heartbeat.c usr/src/uts/common/inet/sctp/sctp_impl.h usr/src/uts/common/inet/sctp/sctp_input.c usr/src/uts/common/inet/sctp/sctp_misc.c usr/src/uts/common/inet/sctp/sctp_output.c usr/src/uts/common/inet/sctp/sctp_snmp.c usr/src/uts/common/inet/sctp/sctp_stack.h usr/src/uts/common/inet/sctp/sctp_timer.c usr/src/uts/common/inet/sctp/sctp_tunables.c usr/src/uts/common/inet/tcp/tcp.c usr/src/uts/common/inet/tcp/tcp_misc.c usr/src/uts/common/inet/tcp/tcp_stats.c usr/src/uts/common/inet/tcp/tcp_timers.c usr/src/uts/common/inet/tcp/tcp_tunables.c usr/src/uts/common/inet/tcp_impl.h usr/src/uts/common/inet/tcp_stack.h usr/src/uts/common/inet/tcp_stats.h usr/src/uts/common/inet/tunables.h usr/src/uts/common/inet/udp/udp.c usr/src/uts/common/inet/udp/udp_stats.c usr/src/uts/common/inet/udp_impl.h usr/src/uts/intel/ip/ip.global-objs.debug64 usr/src/uts/intel/ip/ip.global-objs.obj64 usr/src/uts/sparc/ip/ip.global-objs.debug64 usr/src/uts/sparc/ip/ip.global-objs.obj64
diffstat 35 files changed, 2214 insertions(+), 975 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/sctp/sctp.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/cmd/mdb/common/modules/sctp/sctp.c	Mon Jul 19 17:27:45 2010 -0700
@@ -1499,6 +1499,68 @@
 	    wsp->walk_cbdata));
 }
 
+/*
+ * Initialization function for the per CPU SCTP stats counter walker of a given
+ * SCTP stack.
+ */
+int
+sctps_sc_walk_init(mdb_walk_state_t *wsp)
+{
+	sctp_stack_t sctps;
+
+	if (wsp->walk_addr == NULL)
+		return (WALK_ERR);
+
+	if (mdb_vread(&sctps, sizeof (sctps), wsp->walk_addr) == -1) {
+		mdb_warn("failed to read sctp_stack_t at %p", wsp->walk_addr);
+		return (WALK_ERR);
+	}
+	if (sctps.sctps_sc_cnt == 0)
+		return (WALK_DONE);
+
+	/*
+	 * Store the sctp_stack_t pointer in walk_data.  The stepping function
+	 * used it to calculate if the end of the counter has reached.
+	 */
+	wsp->walk_data = (void *)wsp->walk_addr;
+	wsp->walk_addr = (uintptr_t)sctps.sctps_sc;
+	return (WALK_NEXT);
+}
+
+/*
+ * Stepping function for the per CPU SCTP stats counterwalker.
+ */
+int
+sctps_sc_walk_step(mdb_walk_state_t *wsp)
+{
+	int status;
+	sctp_stack_t sctps;
+	sctp_stats_cpu_t *stats;
+	char *next, *end;
+
+	if (mdb_vread(&sctps, sizeof (sctps), (uintptr_t)wsp->walk_data) ==
+	    -1) {
+		mdb_warn("failed to read sctp_stack_t at %p", wsp->walk_addr);
+		return (WALK_ERR);
+	}
+	if (mdb_vread(&stats, sizeof (stats), wsp->walk_addr) == -1) {
+		mdb_warn("failed ot read sctp_stats_cpu_t at %p",
+		    wsp->walk_addr);
+		return (WALK_ERR);
+	}
+	status = wsp->walk_callback((uintptr_t)stats, &stats, wsp->walk_cbdata);
+	if (status != WALK_NEXT)
+		return (status);
+
+	next = (char *)wsp->walk_addr + sizeof (sctp_stats_cpu_t *);
+	end = (char *)sctps.sctps_sc + sctps.sctps_sc_cnt *
+	    sizeof (sctp_stats_cpu_t *);
+	if (next >= end)
+		return (WALK_DONE);
+	wsp->walk_addr = (uintptr_t)next;
+	return (WALK_NEXT);
+}
+
 static void
 sctp_help(void)
 {
@@ -1522,6 +1584,7 @@
 	mdb_printf("\t-d\t Local and Peer addresses\n");
 	mdb_printf("\t-P\t Peer addresses\n");
 }
+
 static const mdb_dcmd_t dcmds[] = {
 	{ "sctp", ":[-afhoimrSFHpRCcedP]",
 	    "display sctp control structure", sctp, sctp_help },
@@ -1591,8 +1654,8 @@
 		sctp_stack_ill_walk_init, sctp_stack_ill_walk_step, NULL },
 	{ "sctp_stack_walk_ipif", "walk the sctp_g_ipif list for one stack",
 		sctp_stack_ipif_walk_init, sctp_stack_ipif_walk_step, NULL },
-	{ "sctp_stacks", "walk all the sctp_stack_t",
-		sctp_stacks_walk_init, sctp_stacks_walk_step, NULL },
+	{ "sctps_sc", "walk all the per CPU stats counters of a sctp_stack_t",
+		sctps_sc_walk_init, sctps_sc_walk_step, NULL },
 	{ NULL }
 };
 
--- a/usr/src/uts/common/Makefile.files	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/Makefile.files	Mon Jul 19 17:27:45 2010 -0700
@@ -528,14 +528,15 @@
 IP_TCP_OBJS =	tcp.o tcp_fusion.o tcp_opt_data.o tcp_sack.o tcp_stats.o \
 		tcp_misc.o tcp_timers.o tcp_time_wait.o tcp_tpi.o tcp_output.o \
 		tcp_input.o tcp_socket.o tcp_bind.o tcp_cluster.o tcp_tunables.o
-IP_UDP_OBJS =	udp.o udp_opt_data.o udp_tunables.o
+IP_UDP_OBJS =	udp.o udp_opt_data.o udp_tunables.o udp_stats.o
 IP_SCTP_OBJS =	sctp.o sctp_opt_data.o sctp_output.o \
 		sctp_init.o sctp_input.o sctp_cookie.o \
 		sctp_conn.o sctp_error.o sctp_snmp.o \
 		sctp_tunables.o sctp_shutdown.o sctp_common.o \
 		sctp_timer.o sctp_heartbeat.o sctp_hash.o \
 		sctp_bind.o sctp_notify.o sctp_asconf.o \
-		sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o
+		sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o \
+		sctp_misc.o
 IP_ILB_OBJS =	ilb.o ilb_nat.o ilb_conn.o ilb_alg_hash.o ilb_alg_rr.o
 
 IP_OBJS +=	igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o \
--- a/usr/src/uts/common/inet/ip/ip.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/ip/ip.c	Mon Jul 19 17:27:45 2010 -0700
@@ -740,6 +740,8 @@
 static void	ipobs_init(ip_stack_t *);
 static void	ipobs_fini(ip_stack_t *);
 
+static int	ip_tp_cpu_update(cpu_setup_t, int, void *);
+
 ipaddr_t	ip_g_all_ones = IP_HOST_MASK;
 
 static long ip_rput_pullups;
@@ -4274,6 +4276,11 @@
 void
 ip_ddi_destroy(void)
 {
+	/* This needs to be called before destroying any transports. */
+	mutex_enter(&cpu_lock);
+	unregister_cpu_setup_func(ip_tp_cpu_update, NULL);
+	mutex_exit(&cpu_lock);
+
 	tnet_fini();
 
 	icmp_ddi_g_destroy();
@@ -4531,6 +4538,11 @@
 	rts_ddi_g_init();
 	icmp_ddi_g_init();
 	ilb_ddi_g_init();
+
+	/* This needs to be called after all transports are initialized. */
+	mutex_enter(&cpu_lock);
+	register_cpu_setup_func(ip_tp_cpu_update, NULL);
+	mutex_exit(&cpu_lock);
 }
 
 /*
@@ -14152,7 +14164,7 @@
 		 * Drop the packet here if the sctp checksum failed.
 		 */
 		if (iraflags & IRAF_SCTP_CSUM_ERR) {
-			BUMP_MIB(&sctps->sctps_mib, sctpChecksumError);
+			SCTPS_BUMP_MIB(sctps, sctpChecksumError);
 			freemsg(mp);
 			return;
 		}
@@ -15088,3 +15100,45 @@
 	    *v4srcp));
 	return (B_FALSE);
 }
+
+/*
+ * Transport protocol call back function for CPU state change.
+ */
+/* ARGSUSED */
+static int
+ip_tp_cpu_update(cpu_setup_t what, int id, void *arg)
+{
+	processorid_t cpu_seqid;
+	netstack_handle_t nh;
+	netstack_t *ns;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+	cpu_seqid = cpu[id]->cpu_seqid;
+
+	switch (what) {
+	case CPU_CONFIG:
+	case CPU_ON:
+	case CPU_INIT:
+	case CPU_CPUPART_IN:
+		netstack_next_init(&nh);
+		while ((ns = netstack_next(&nh)) != NULL) {
+			tcp_stack_cpu_add(ns->netstack_tcp, cpu_seqid);
+			sctp_stack_cpu_add(ns->netstack_sctp, cpu_seqid);
+			udp_stack_cpu_add(ns->netstack_udp, cpu_seqid);
+			netstack_rele(ns);
+		}
+		netstack_next_fini(&nh);
+		break;
+	case CPU_UNCONFIG:
+	case CPU_OFF:
+	case CPU_CPUPART_OUT:
+		/*
+		 * Nothing to do.  We don't remove the per CPU stats from
+		 * the IP stack even when the CPU goes offline.
+		 */
+		break;
+	default:
+		break;
+	}
+	return (0);
+}
--- a/usr/src/uts/common/inet/sctp/sctp.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp.c	Mon Jul 19 17:27:45 2010 -0700
@@ -109,13 +109,13 @@
 
 /* /etc/system variables */
 /* The minimum number of threads for each taskq. */
-int		sctp_recvq_tq_thr_min = 4;
+int sctp_recvq_tq_thr_min = 4;
 /* The maximum number of threads for each taskq. */
-int		sctp_recvq_tq_thr_max = 16;
-/* The minimum number of tasks for each taskq. */
-int		sctp_recvq_tq_task_min = 5;
-/* The maxiimum number of tasks for each taskq. */
-int		sctp_recvq_tq_task_max = 50;
+int sctp_recvq_tq_thr_max = 48;
+/* The mnimum number of tasks for each taskq. */
+int sctp_recvq_tq_task_min = 8;
+/* Default value of sctp_recvq_tq_list_max_sz. */
+int sctp_recvq_tq_list_max = 16;
 
 /*
  * SCTP tunables related declarations. Definitions are in sctp_tunables.c
@@ -225,6 +225,9 @@
 	 */
 	SCTP_LINK(sctp, sctps);
 
+	/* If the listener has a limit, inherit the counter info. */
+	sctp->sctp_listen_cnt = psctp->sctp_listen_cnt;
+
 	return (sctp);
 }
 
@@ -374,7 +377,7 @@
 			sctp->sctp_running = B_FALSE;
 			while (sctp->sctp_state >= SCTPS_ESTABLISHED &&
 			    sctp->sctp_client_errno == 0) {
-				cv_broadcast(&sctp->sctp_cv);
+				cv_signal(&sctp->sctp_cv);
 				ret = cv_timedwait_sig(&sctp->sctp_cv,
 				    &sctp->sctp_lock, stoptime);
 				if (ret < 0) {
@@ -457,6 +460,13 @@
 	mblk_t	*mp;
 	conn_t	*connp = sctp->sctp_connp;
 
+	/* The counter is incremented only for established associations. */
+	if (sctp->sctp_state >= SCTPS_ESTABLISHED)
+		SCTPS_ASSOC_DEC(sctp->sctp_sctps);
+
+	if (sctp->sctp_listen_cnt != NULL)
+		SCTP_DECR_LISTEN_CNT(sctp);
+
 	/* Sanity check, don't do the same thing twice.  */
 	if (connp->conn_state_flags & CONN_CLOSING) {
 		ASSERT(sctp->sctp_state == SCTPS_IDLE);
@@ -717,19 +727,17 @@
 	bzero(&sctp->sctp_bits, sizeof (sctp->sctp_bits));
 
 	/* It is time to update the global statistics. */
-	UPDATE_MIB(&sctps->sctps_mib, sctpOutSCTPPkts, sctp->sctp_opkts);
-	UPDATE_MIB(&sctps->sctps_mib, sctpOutCtrlChunks, sctp->sctp_obchunks);
-	UPDATE_MIB(&sctps->sctps_mib, sctpOutOrderChunks, sctp->sctp_odchunks);
-	UPDATE_MIB(&sctps->sctps_mib,
-	    sctpOutUnorderChunks, sctp->sctp_oudchunks);
-	UPDATE_MIB(&sctps->sctps_mib, sctpRetransChunks, sctp->sctp_rxtchunks);
-	UPDATE_MIB(&sctps->sctps_mib, sctpInSCTPPkts, sctp->sctp_ipkts);
-	UPDATE_MIB(&sctps->sctps_mib, sctpInCtrlChunks, sctp->sctp_ibchunks);
-	UPDATE_MIB(&sctps->sctps_mib, sctpInOrderChunks, sctp->sctp_idchunks);
-	UPDATE_MIB(&sctps->sctps_mib,
-	    sctpInUnorderChunks, sctp->sctp_iudchunks);
-	UPDATE_MIB(&sctps->sctps_mib, sctpFragUsrMsgs, sctp->sctp_fragdmsgs);
-	UPDATE_MIB(&sctps->sctps_mib, sctpReasmUsrMsgs, sctp->sctp_reassmsgs);
+	SCTPS_UPDATE_MIB(sctps, sctpOutSCTPPkts, sctp->sctp_opkts);
+	SCTPS_UPDATE_MIB(sctps, sctpOutCtrlChunks, sctp->sctp_obchunks);
+	SCTPS_UPDATE_MIB(sctps, sctpOutOrderChunks, sctp->sctp_odchunks);
+	SCTPS_UPDATE_MIB(sctps, sctpOutUnorderChunks, sctp->sctp_oudchunks);
+	SCTPS_UPDATE_MIB(sctps, sctpRetransChunks, sctp->sctp_rxtchunks);
+	SCTPS_UPDATE_MIB(sctps, sctpInSCTPPkts, sctp->sctp_ipkts);
+	SCTPS_UPDATE_MIB(sctps, sctpInCtrlChunks, sctp->sctp_ibchunks);
+	SCTPS_UPDATE_MIB(sctps, sctpInOrderChunks, sctp->sctp_idchunks);
+	SCTPS_UPDATE_MIB(sctps, sctpInUnorderChunks, sctp->sctp_iudchunks);
+	SCTPS_UPDATE_MIB(sctps, sctpFragUsrMsgs, sctp->sctp_fragdmsgs);
+	SCTPS_UPDATE_MIB(sctps, sctpReasmUsrMsgs, sctp->sctp_reassmsgs);
 	sctp->sctp_opkts = 0;
 	sctp->sctp_obchunks = 0;
 	sctp->sctp_odchunks = 0;
@@ -766,64 +774,6 @@
 	kmem_cache_free(sctp_conn_cache, connp);
 }
 
-/* Diagnostic routine used to return a string associated with the sctp state. */
-char *
-sctp_display(sctp_t *sctp, char *sup_buf)
-{
-	char	*buf;
-	char	buf1[30];
-	static char	priv_buf[INET6_ADDRSTRLEN * 2 + 80];
-	char	*cp;
-	conn_t	*connp;
-
-	if (sctp == NULL)
-		return ("NULL_SCTP");
-
-	connp = sctp->sctp_connp;
-	buf = (sup_buf != NULL) ? sup_buf : priv_buf;
-
-	switch (sctp->sctp_state) {
-	case SCTPS_IDLE:
-		cp = "SCTP_IDLE";
-		break;
-	case SCTPS_BOUND:
-		cp = "SCTP_BOUND";
-		break;
-	case SCTPS_LISTEN:
-		cp = "SCTP_LISTEN";
-		break;
-	case SCTPS_COOKIE_WAIT:
-		cp = "SCTP_COOKIE_WAIT";
-		break;
-	case SCTPS_COOKIE_ECHOED:
-		cp = "SCTP_COOKIE_ECHOED";
-		break;
-	case SCTPS_ESTABLISHED:
-		cp = "SCTP_ESTABLISHED";
-		break;
-	case SCTPS_SHUTDOWN_PENDING:
-		cp = "SCTP_SHUTDOWN_PENDING";
-		break;
-	case SCTPS_SHUTDOWN_SENT:
-		cp = "SCTPS_SHUTDOWN_SENT";
-		break;
-	case SCTPS_SHUTDOWN_RECEIVED:
-		cp = "SCTPS_SHUTDOWN_RECEIVED";
-		break;
-	case SCTPS_SHUTDOWN_ACK_SENT:
-		cp = "SCTPS_SHUTDOWN_ACK_SENT";
-		break;
-	default:
-		(void) mi_sprintf(buf1, "SCTPUnkState(%d)", sctp->sctp_state);
-		cp = buf1;
-		break;
-	}
-	(void) mi_sprintf(buf, "[%u, %u] %s",
-	    ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp);
-
-	return (buf);
-}
-
 /*
  * Initialize protocol control block. If a parent exists, inherit
  * all values set through setsockopt().
@@ -1208,7 +1158,7 @@
 				if (!sctp_icmp_verf(sctp, sctph, mp)) {
 					break;
 				}
-				BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+				SCTPS_BUMP_MIB(sctps, sctpAborted);
 				sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0,
 				    NULL);
 				sctp_clean_death(sctp, ECONNREFUSED);
@@ -1315,7 +1265,7 @@
 			}
 			if (sctp->sctp_state == SCTPS_COOKIE_WAIT ||
 			    sctp->sctp_state == SCTPS_COOKIE_ECHOED) {
-				BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+				SCTPS_BUMP_MIB(sctps, sctpAborted);
 				sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0,
 				    NULL);
 				sctp_clean_death(sctp, ECONNREFUSED);
@@ -1344,7 +1294,7 @@
 				break;
 			}
 			if (sctp->sctp_state == SCTPS_COOKIE_WAIT) {
-				BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+				SCTPS_BUMP_MIB(sctps, sctpAborted);
 				sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0,
 				    NULL);
 				sctp_clean_death(sctp, ECONNREFUSED);
@@ -1386,13 +1336,18 @@
 		sctps = psctp->sctp_sctps;
 		/* Increase here to have common decrease at end */
 		netstack_hold(sctps->sctps_netstack);
+		ASSERT(sctps->sctps_recvq_tq_list_cur_sz > 0);
 	} else {
 		netstack_t *ns;
 
 		ns = netstack_find_by_cred(credp);
-		ASSERT(ns != NULL);
 		sctps = ns->netstack_sctp;
-		ASSERT(sctps != NULL);
+		/*
+		 * Check if the receive queue taskq for this sctp_stack_t has
+		 * been set up.
+		 */
+		if (sctps->sctps_recvq_tq_list_cur_sz == 0)
+			sctp_rq_tq_init(sctps);
 
 		/*
 		 * For exclusive stacks we set the zoneid to zero
@@ -1570,6 +1525,7 @@
 {
 	sctp_stack_t	*sctps;
 	size_t		arrsz;
+	int		i;
 
 	sctps = kmem_zalloc(sizeof (*sctps), KM_SLEEP);
 	sctps->sctps_netstack = ns;
@@ -1589,9 +1545,6 @@
 	    KM_SLEEP);
 	bcopy(sctp_propinfo_tbl, sctps->sctps_propinfo_tbl, arrsz);
 
-	/* Initialize the recvq taskq. */
-	sctp_rq_tq_init(sctps);
-
 	/* saddr init */
 	sctp_saddr_init(sctps);
 
@@ -1599,10 +1552,29 @@
 	list_create(&sctps->sctps_g_list, sizeof (sctp_t),
 	    offsetof(sctp_t, sctp_list));
 
-	/* Initialize sctp kernel stats. */
+	/* Initialize SCTP kstats. */
 	sctps->sctps_mibkp = sctp_kstat_init(stackid);
-	sctps->sctps_kstat =
-	    sctp_kstat2_init(stackid, &sctps->sctps_statistics);
+	sctps->sctps_kstat = sctp_kstat2_init(stackid);
+
+	mutex_init(&sctps->sctps_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
+	sctps->sctps_reclaim = B_FALSE;
+	sctps->sctps_reclaim_tid = 0;
+	sctps->sctps_reclaim_period = sctps->sctps_rto_maxg;
+
+	/* Allocate the per netstack stats */
+	mutex_enter(&cpu_lock);
+	sctps->sctps_sc_cnt = MAX(ncpus, boot_ncpus);
+	mutex_exit(&cpu_lock);
+	sctps->sctps_sc = kmem_zalloc(max_ncpus  * sizeof (sctp_stats_cpu_t *),
+	    KM_SLEEP);
+	for (i = 0; i < sctps->sctps_sc_cnt; i++) {
+		sctps->sctps_sc[i] = kmem_zalloc(sizeof (sctp_stats_cpu_t),
+		    KM_SLEEP);
+	}
+
+	mutex_init(&sctps->sctps_listener_conf_lock, NULL, MUTEX_DEFAULT, NULL);
+	list_create(&sctps->sctps_listener_conf, sizeof (sctp_listener_t),
+	    offsetof(sctp_listener_t, sl_link));
 
 	return (sctps);
 }
@@ -1635,6 +1607,20 @@
 sctp_stack_fini(netstackid_t stackid, void *arg)
 {
 	sctp_stack_t *sctps = (sctp_stack_t *)arg;
+	int i;
+
+	/*
+	 * Set sctps_reclaim to false tells sctp_reclaim_timer() not to restart
+	 * the timer.
+	 */
+	mutex_enter(&sctps->sctps_reclaim_lock);
+	sctps->sctps_reclaim = B_FALSE;
+	mutex_exit(&sctps->sctps_reclaim_lock);
+	if (sctps->sctps_reclaim_tid != 0)
+		(void) untimeout(sctps->sctps_reclaim_tid);
+	mutex_destroy(&sctps->sctps_reclaim_lock);
+
+	sctp_listener_conf_cleanup(sctps);
 
 	kmem_free(sctps->sctps_propinfo_tbl,
 	    sctp_propinfo_count * sizeof (mod_prop_info_t));
@@ -1653,12 +1639,14 @@
 	sctp_hash_destroy(sctps);
 
 	/* Destroy SCTP kernel stats. */
-	sctp_kstat2_fini(stackid, sctps->sctps_kstat);
-	sctps->sctps_kstat = NULL;
-	bzero(&sctps->sctps_statistics, sizeof (sctps->sctps_statistics));
+	for (i = 0; i < sctps->sctps_sc_cnt; i++)
+		kmem_free(sctps->sctps_sc[i], sizeof (sctp_stats_cpu_t));
+	kmem_free(sctps->sctps_sc, max_ncpus * sizeof (sctp_stats_cpu_t *));
 
 	sctp_kstat_fini(stackid, sctps->sctps_mibkp);
 	sctps->sctps_mibkp = NULL;
+	sctp_kstat2_fini(stackid, sctps->sctps_kstat);
+	sctps->sctps_kstat = NULL;
 
 	mutex_destroy(&sctps->sctps_g_lock);
 	mutex_destroy(&sctps->sctps_epriv_port_lock);
@@ -1666,26 +1654,30 @@
 	kmem_free(sctps, sizeof (*sctps));
 }
 
-void
-sctp_display_all(sctp_stack_t *sctps)
-{
-	sctp_t *sctp_walker;
-
-	mutex_enter(&sctps->sctps_g_lock);
-	for (sctp_walker = list_head(&sctps->sctps_g_list);
-	    sctp_walker != NULL;
-	    sctp_walker = (sctp_t *)list_next(&sctps->sctps_g_list,
-	    sctp_walker)) {
-		(void) sctp_display(sctp_walker, NULL);
-	}
-	mutex_exit(&sctps->sctps_g_lock);
-}
-
 static void
 sctp_rq_tq_init(sctp_stack_t *sctps)
 {
-	sctps->sctps_recvq_tq_list_max_sz = 16;
+	char tq_name[TASKQ_NAMELEN];
+	int thrs;
+	int max_tasks;
+
+	thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min,
+	    MAX(ncpus, boot_ncpus)));
+	/*
+	 * Make sure that the maximum number of tasks is at least thrice as
+	 * large as the number of threads.
+	 */
+	max_tasks = MAX(sctp_recvq_tq_task_min, thrs) * 3;
+
+	/*
+	 * This helps differentiate the default taskqs in different IP stacks.
+	 */
+	(void) snprintf(tq_name, sizeof (tq_name), "sctp_def_rq_taskq_%d",
+	    sctps->sctps_netstack->netstack_stackid);
+
+	sctps->sctps_recvq_tq_list_max_sz = sctp_recvq_tq_list_max;
 	sctps->sctps_recvq_tq_list_cur_sz = 1;
+
 	/*
 	 * Initialize the recvq_tq_list and create the first recvq taskq.
 	 * What to do if it fails?
@@ -1693,10 +1685,8 @@
 	sctps->sctps_recvq_tq_list =
 	    kmem_zalloc(sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *),
 	    KM_SLEEP);
-	sctps->sctps_recvq_tq_list[0] = taskq_create("sctp_def_recvq_taskq",
-	    MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, ncpus)),
-	    minclsyspri, sctp_recvq_tq_task_min, sctp_recvq_tq_task_max,
-	    TASKQ_PREPOPULATE);
+	sctps->sctps_recvq_tq_list[0] = taskq_create(tq_name, thrs,
+	    minclsyspri, sctp_recvq_tq_task_min, max_tasks, TASKQ_PREPOPULATE);
 	mutex_init(&sctps->sctps_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL);
 }
 
@@ -1705,6 +1695,9 @@
 {
 	int i;
 
+	if (sctps->sctps_recvq_tq_list_cur_sz == 0)
+		return;
+
 	for (i = 0; i < sctps->sctps_recvq_tq_list_cur_sz; i++) {
 		ASSERT(sctps->sctps_recvq_tq_list[i] != NULL);
 		taskq_destroy(sctps->sctps_recvq_tq_list[i]);
@@ -1720,6 +1713,16 @@
 {
 	taskq_t *tq;
 	char tq_name[TASKQ_NAMELEN];
+	int thrs;
+	int max_tasks;
+
+	thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min,
+	    MAX(ncpus, boot_ncpus)));
+	/*
+	 * Make sure that the maximum number of tasks is at least thrice as
+	 * large as the number of threads.
+	 */
+	max_tasks = MAX(sctp_recvq_tq_task_min, thrs) * 3;
 
 	mutex_enter(&sctps->sctps_rq_tq_lock);
 	if (sctps->sctps_recvq_tq_list_cur_sz + 1 >
@@ -1729,12 +1732,11 @@
 		return;
 	}
 
-	(void) snprintf(tq_name, sizeof (tq_name), "sctp_recvq_taskq_%u",
+	(void) snprintf(tq_name, sizeof (tq_name), "sctp_rq_taskq_%d_%u",
+	    sctps->sctps_netstack->netstack_stackid,
 	    sctps->sctps_recvq_tq_list_cur_sz);
-	tq = taskq_create(tq_name,
-	    MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, ncpus)),
-	    minclsyspri, sctp_recvq_tq_task_min, sctp_recvq_tq_task_max,
-	    TASKQ_PREPOPULATE);
+	tq = taskq_create(tq_name, thrs, minclsyspri, sctp_recvq_tq_task_min,
+	    max_tasks, TASKQ_PREPOPULATE);
 	if (tq == NULL) {
 		mutex_exit(&sctps->sctps_rq_tq_lock);
 		cmn_err(CE_NOTE, "SCTP recvq taskq creation failed");
@@ -2072,7 +2074,7 @@
 {
 	sctp_conn_cache = kmem_cache_create("sctp_conn_cache",
 	    sizeof (sctp_t) + sizeof (conn_t), 0, sctp_conn_cache_constructor,
-	    sctp_conn_cache_destructor, NULL, NULL, NULL, 0);
+	    sctp_conn_cache_destructor, sctp_conn_reclaim, NULL, NULL, 0);
 }
 
 static void
--- a/usr/src/uts/common/inet/sctp/sctp_asconf.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_asconf.c	Mon Jul 19 17:27:45 2010 -0700
@@ -870,7 +870,7 @@
 	/* Retransmission */
 	if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) {
 		/* time to give up */
-		BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+		SCTPS_BUMP_MIB(sctps, sctpAborted);
 		sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL);
 		sctp_clean_death(sctp, ETIMEDOUT);
 		return;
--- a/usr/src/uts/common/inet/sctp/sctp_bind.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_bind.c	Mon Jul 19 17:27:45 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -50,6 +49,12 @@
 #include "sctp_addr.h"
 
 /*
+ * Minimum number of associations which can be created per listener.  Used
+ * when the listener association count is in effect.
+ */
+static uint32_t sctp_min_assoc_listener = 2;
+
+/*
  * Returns 0 on success, EACCES on permission failure.
  */
 static int
@@ -160,9 +165,54 @@
 	(void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN);
 	sctp->sctp_last_secret_update = ddi_get_lbolt64();
 	bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN);
+
+	/*
+	 * If there is an association limit, allocate and initialize
+	 * the counter struct.  Note that since listen can be called
+	 * multiple times, the struct may have been allready allocated.
+	 */
+	if (!list_is_empty(&sctps->sctps_listener_conf) &&
+	    sctp->sctp_listen_cnt == NULL) {
+		sctp_listen_cnt_t *slc;
+		uint32_t ratio;
+
+		ratio = sctp_find_listener_conf(sctps,
+		    ntohs(connp->conn_lport));
+		if (ratio != 0) {
+			uint32_t mem_ratio, tot_buf;
+
+			slc = kmem_alloc(sizeof (sctp_listen_cnt_t), KM_SLEEP);
+			/*
+			 * Calculate the connection limit based on
+			 * the configured ratio and maxusers.  Maxusers
+			 * are calculated based on memory size,
+			 * ~ 1 user per MB.  Note that the conn_rcvbuf
+			 * and conn_sndbuf may change after a
+			 * connection is accepted.  So what we have
+			 * is only an approximation.
+			 */
+			if ((tot_buf = connp->conn_rcvbuf +
+			    connp->conn_sndbuf) < MB) {
+				mem_ratio = MB / tot_buf;
+				slc->slc_max = maxusers / ratio * mem_ratio;
+			} else {
+				mem_ratio = tot_buf / MB;
+				slc->slc_max = maxusers / ratio / mem_ratio;
+			}
+			/* At least we should allow some associations! */
+			if (slc->slc_max < sctp_min_assoc_listener)
+				slc->slc_max = sctp_min_assoc_listener;
+			slc->slc_cnt = 1;
+			slc->slc_drop = 0;
+			sctp->sctp_listen_cnt = slc;
+		}
+	}
+
+
 	tf = &sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(
 	    ntohs(connp->conn_lport))];
 	sctp_listen_hash_insert(tf, sctp);
+
 	WAKE_SCTP(sctp);
 	return (0);
 }
--- a/usr/src/uts/common/inet/sctp/sctp_common.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_common.c	Mon Jul 19 17:27:45 2010 -0700
@@ -618,10 +618,19 @@
 void
 sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp)
 {
-	int64_t now = ddi_get_lbolt64();
+	int64_t now = LBOLT_FASTPATH64;
 
+	/*
+	 * If we are under memory pressure, we abort association waiting
+	 * in zero window probing state for too long.  We do this by not
+	 * resetting sctp_strikes.  So if sctp_zero_win_probe continues
+	 * while under memory pressure, this association will eventually
+	 * time out.
+	 */
+	if (!sctp->sctp_zero_win_probe || !sctp->sctp_sctps->sctps_reclaim) {
+		sctp->sctp_strikes = 0;
+	}
 	fp->strikes = 0;
-	sctp->sctp_strikes = 0;
 	fp->lastactive = now;
 	fp->hb_expiry = now + SET_HB_INTVL(fp);
 	fp->hb_pending = B_FALSE;
@@ -646,18 +655,22 @@
 	}
 }
 
-int
+/*
+ * Return B_TRUE if there is still an active peer address with zero strikes;
+ * otherwise rturn B_FALSE.
+ */
+boolean_t
 sctp_is_a_faddr_clean(sctp_t *sctp)
 {
 	sctp_faddr_t *fp;
 
 	for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
 		if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) {
-			return (1);
+			return (B_TRUE);
 		}
 	}
 
-	return (0);
+	return (B_FALSE);
 }
 
 /*
@@ -723,7 +736,7 @@
 
 	/* All faddrs are down; kill the association */
 	dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n"));
-	BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+	SCTPS_BUMP_MIB(sctps, sctpAborted);
 	sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ?
 	    SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL);
 	sctp_clean_death(sctp, sctp->sctp_client_errno ?
--- a/usr/src/uts/common/inet/sctp/sctp_conn.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_conn.c	Mon Jul 19 17:27:45 2010 -0700
@@ -63,7 +63,7 @@
 	uint_t			sctp_options;
 	conn_t			*aconnp;
 	conn_t			*lconnp;
-	sctp_stack_t	*sctps = listener->sctp_sctps;
+	sctp_stack_t		*sctps = listener->sctp_sctps;
 
 	sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len);
 	ASSERT(OK_32PTR(sctph));
@@ -127,15 +127,8 @@
 	sctp_bind_hash_insert(&sctps->sctps_bind_fanout[
 	    SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0);
 
-	/*
-	 * No need to check for multicast destination since ip will only pass
-	 * up multicasts to those that have expressed interest
-	 * TODO: what about rejecting broadcasts?
-	 * Also check that source is not a multicast or broadcast address.
-	 */
-	/* XXXSCTP */
-	acceptor->sctp_state = SCTPS_ESTABLISHED;
-	acceptor->sctp_assoc_start_time = (uint32_t)ddi_get_lbolt();
+	SCTP_ASSOC_EST(sctps, acceptor);
+
 	/*
 	 * listener->sctp_rwnd should be the default window size or a
 	 * window size changed via SO_RCVBUF option.
@@ -163,6 +156,8 @@
 	pid_t		cpid;
 	in6_addr_t	faddr, laddr;
 	ip_xmit_attr_t	*ixa;
+	sctp_listen_cnt_t *slc = sctp->sctp_listen_cnt;
+	boolean_t	slc_set = B_FALSE;
 
 	/*
 	 * No need to check for duplicate as this is the listener
@@ -173,19 +168,48 @@
 	 */
 	ASSERT(OK_32PTR(mp->b_rptr));
 
+	connp = sctp->sctp_connp;
+	sctps = sctp->sctp_sctps;
+
+	/*
+	 * Enforce the limit set on the number of connections per listener.
+	 * Note that tlc_cnt starts with 1.  So need to add 1 to tlc_max
+	 * for comparison.
+	 */
+	if (slc != NULL) {
+		int64_t now;
+
+		if (atomic_add_32_nv(&slc->slc_cnt, 1) > slc->slc_max + 1) {
+			now = ddi_get_lbolt64();
+			atomic_add_32(&slc->slc_cnt, -1);
+			SCTP_KSTAT(sctps, sctp_listen_cnt_drop);
+			slc->slc_drop++;
+			if (now - slc->slc_report_time >
+			    MSEC_TO_TICK(SCTP_SLC_REPORT_INTERVAL)) {
+				zcmn_err(connp->conn_zoneid, CE_WARN,
+				    "SCTP listener (port %d) association max "
+				    "(%u) reached: %u attempts dropped total\n",
+				    ntohs(connp->conn_lport),
+				    slc->slc_max, slc->slc_drop);
+				slc->slc_report_time = now;
+			}
+			return (NULL);
+		}
+		slc_set = B_TRUE;
+	}
+
 	if ((eager = sctp_create_eager(sctp)) == NULL) {
+		if (slc_set)
+			atomic_add_32(&slc->slc_cnt, -1);
 		return (NULL);
 	}
-
-	connp = sctp->sctp_connp;
-	sctps = sctp->sctp_sctps;
 	econnp = eager->sctp_connp;
 
 	if (connp->conn_policy != NULL) {
 		/* Inherit the policy from the listener; use actions from ira */
 		if (!ip_ipsec_policy_inherit(econnp, connp, ira)) {
 			sctp_close_eager(eager);
-			BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
+			SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 			return (NULL);
 		}
 	}
@@ -217,7 +241,7 @@
 	if (ipsec_conn_cache_policy(econnp,
 	    (ira->ira_flags & IRAF_IS_IPV4) != 0) != 0) {
 		sctp_close_eager(eager);
-		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
+		SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 		return (NULL);
 	}
 
@@ -261,7 +285,7 @@
 	err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack);
 	if (err != 0) {
 		sctp_close_eager(eager);
-		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
+		SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 		return (NULL);
 	}
 
@@ -301,7 +325,7 @@
 			if (flist != NULL)
 				kmem_free(flist, fsize);
 			sctp_close_eager(eager);
-			BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
+			SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 			SCTP_KSTAT(sctps, sctp_cl_connect);
 			return (NULL);
 		}
@@ -319,7 +343,7 @@
 	    (sock_lower_handle_t)eager, NULL, cr, cpid,
 	    &eager->sctp_upcalls)) == NULL) {
 		sctp_close_eager(eager);
-		BUMP_MIB(&sctps->sctps_mib, sctpListenDrop);
+		SCTPS_BUMP_MIB(sctps, sctpListenDrop);
 		return (NULL);
 	}
 	ASSERT(SCTP_IS_DETACHED(eager));
--- a/usr/src/uts/common/inet/sctp/sctp_cookie.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_cookie.c	Mon Jul 19 17:27:45 2010 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -749,7 +749,7 @@
 
 	/* timestamp */
 	now = (int64_t *)(cookieph + 1);
-	nowt = ddi_get_lbolt64();
+	nowt = LBOLT_FASTPATH64;
 	bcopy(&nowt, now, sizeof (*now));
 
 	/* cookie lifetime -- need configuration */
@@ -951,7 +951,7 @@
 	cph = NULL;
 	if (validate_init_params(sctp, iackch, iack, iackmp, &cph, &errmp,
 	    &pad, &sctp_options, ira) == 0) { /* result in 'pad' ignored */
-		BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+		SCTPS_BUMP_MIB(sctps, sctpAborted);
 		sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL);
 		sctp_clean_death(sctp, ECONNABORTED);
 		return;
@@ -1292,7 +1292,7 @@
 	 * So it is lbolt64 - (ts + *lt).  If it is positive, it means
 	 * that the Cookie has expired.
 	 */
-	diff = ddi_get_lbolt64() - (ts + *lt);
+	diff = LBOLT_FASTPATH64 - (ts + *lt);
 	if (diff > 0 && (init->sic_inittag != sctp->sctp_fvtag ||
 	    iack->sic_inittag != sctp->sctp_lvtag)) {
 		uint32_t staleness;
@@ -1354,11 +1354,8 @@
 			sctp->sctp_frwnd = ntohl(init->sic_a_rwnd);
 			sctp->sctp_fcsn = sctp->sctp_lastacked;
 
-			if (sctp->sctp_state < SCTPS_ESTABLISHED) {
-				sctp->sctp_state = SCTPS_ESTABLISHED;
-				sctp->sctp_assoc_start_time =
-				    (uint32_t)ddi_get_lbolt();
-			}
+			if (sctp->sctp_state < SCTPS_ESTABLISHED)
+				SCTP_ASSOC_EST(sctps, sctp);
 
 			dprint(1, ("sctp peer %x:%x:%x:%x (%d) restarted\n",
 			    SCTP_PRINTADDR(sctp->sctp_current->faddr),
@@ -1384,9 +1381,7 @@
 			if (sctp->sctp_state < SCTPS_ESTABLISHED) {
 				if (!sctp_initialize_params(sctp, init, iack))
 					return (-1);	/* Drop? */
-				sctp->sctp_state = SCTPS_ESTABLISHED;
-				sctp->sctp_assoc_start_time =
-				    (uint32_t)ddi_get_lbolt();
+				SCTP_ASSOC_EST(sctps, sctp);
 			}
 
 			dprint(1, ("init collision with %x:%x:%x:%x (%d)\n",
@@ -1416,9 +1411,7 @@
 			if (sctp->sctp_state < SCTPS_ESTABLISHED) {
 				if (!sctp_initialize_params(sctp, init, iack))
 					return (-1);	/* Drop? */
-				sctp->sctp_state = SCTPS_ESTABLISHED;
-				sctp->sctp_assoc_start_time =
-				    (uint32_t)ddi_get_lbolt();
+				SCTP_ASSOC_EST(sctps, sctp);
 			}
 			return (0);
 		} else {
--- a/usr/src/uts/common/inet/sctp/sctp_error.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_error.c	Mon Jul 19 17:27:45 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -150,7 +149,7 @@
 		freemsg(mp);
 		return;
 	}
-	BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+	SCTPS_BUMP_MIB(sctps, sctpAborted);
 	BUMP_LOCAL(sctp->sctp_opkts);
 	BUMP_LOCAL(sctp->sctp_obchunks);
 
@@ -282,7 +281,7 @@
 		ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr6_len;
 	}
 
-	BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+	SCTPS_BUMP_MIB(sctps, sctpAborted);
 	BUMP_LOCAL(sctp->sctp_obchunks);
 
 	if (is_system_labeled() && ixa->ixa_tsl != NULL) {
@@ -435,7 +434,7 @@
 	ixas.ixa_ipst = ipst;
 	ixas.ixa_ifindex = 0;
 
-	BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+	SCTPS_BUMP_MIB(sctps, sctpAborted);
 
 	if (is_system_labeled()) {
 		ASSERT(ira->ira_tsl != NULL);
--- a/usr/src/uts/common/inet/sctp/sctp_heartbeat.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_heartbeat.c	Mon Jul 19 17:27:45 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -190,7 +189,7 @@
 	fp->hb_pending = B_TRUE;
 
 	BUMP_LOCAL(sctp->sctp_obchunks);
-	BUMP_MIB(&sctps->sctps_mib, sctpTimHeartBeatProbe);
+	SCTPS_BUMP_MIB(sctps, sctpTimHeartBeatProbe);
 
 	sctp_set_iplen(sctp, hbmp, fp->ixa);
 	(void) conn_ip_output(hbmp, fp->ixa);
--- a/usr/src/uts/common/inet/sctp/sctp_impl.h	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_impl.h	Mon Jul 19 17:27:45 2010 -0700
@@ -31,6 +31,9 @@
 #include <sys/list.h>
 #include <sys/strsun.h>
 #include <sys/zone.h>
+#include <sys/cpuvar.h>
+#include <sys/clock_impl.h>
+
 #include <netinet/ip6.h>
 #include <inet/optcom.h>
 #include <inet/tunables.h>
@@ -349,6 +352,80 @@
 	    ((sctps)->sctps_conn_hash_size - 1))
 
 /*
+ * Linked list struct to store SCTP listener association limit configuration
+ * per IP stack.  The list is stored at sctps_listener_conf in sctp_stack_t.
+ *
+ * sl_port: the listener port of this limit configuration
+ * sl_ratio: the maximum amount of memory consumed by all concurrent SCTP
+ *           connections created by a listener does not exceed 1/tl_ratio
+ *           of the total system memory.  Note that this is only an
+ *           approximation.
+ * sl_link: linked list struct
+ */
+typedef struct sctp_listener_s {
+	in_port_t	sl_port;
+	uint32_t	sl_ratio;
+	list_node_t	sl_link;
+} sctp_listener_t;
+
+/*
+ * If there is a limit set on the number of association allowed per each
+ * listener, the following struct is used to store that counter.  It keeps
+ * the number of SCTP association created by a listener.  Note that this needs
+ * to be separated from the listener since the listener can go away before
+ * all the associations are gone.
+ *
+ * When the struct is allocated, slc_cnt is set to 1.  When a new association
+ * is created by the listener, slc_cnt is incremented by 1.  When an
+ * association created by the listener goes away, slc_count is decremented by
+ * 1.  When the listener itself goes away, slc_cnt is decremented  by one.
+ * The last association (or the listener) which decrements slc_cnt to zero
+ * frees the struct.
+ *
+ * slc_max is the maximum number of concurrent associations created from a
+ * listener.  It is calculated when the sctp_listen_cnt_t is allocated.
+ *
+ * slc_report_time stores the time when cmn_err() is called to report that the
+ * max has been exceeeded.  Report is done at most once every
+ * SCTP_SLC_REPORT_INTERVAL mins for a listener.
+ *
+ * slc_drop stores the number of connection attempt dropped because the
+ * limit has reached.
+ */
+typedef struct sctp_listen_cnt_s {
+	uint32_t	slc_max;
+	uint32_t	slc_cnt;
+	int64_t		slc_report_time;
+	uint32_t	slc_drop;
+} sctp_listen_cnt_t;
+
+#define	SCTP_SLC_REPORT_INTERVAL	(30 * MINUTES)
+
+#define	SCTP_DECR_LISTEN_CNT(sctp)					\
+{									\
+	ASSERT((sctp)->sctp_listen_cnt->slc_cnt > 0);			\
+	if (atomic_add_32_nv(&(sctp)->sctp_listen_cnt->slc_cnt, -1) == 0) \
+		kmem_free((sctp)->sctp_listen_cnt, sizeof (sctp_listen_cnt_t));\
+	(sctp)->sctp_listen_cnt = NULL;					\
+}
+
+/* Increment and decrement the number of associations in sctp_stack_t. */
+#define	SCTPS_ASSOC_INC(sctps)						\
+	atomic_inc_64(							\
+	    (uint64_t *)&(sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_assoc_cnt)
+
+#define	SCTPS_ASSOC_DEC(sctps)						\
+	atomic_dec_64(							\
+	    (uint64_t *)&(sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_assoc_cnt)
+
+#define	SCTP_ASSOC_EST(sctps, sctp)					\
+{									\
+	(sctp)->sctp_state = SCTPS_ESTABLISHED;				\
+	(sctp)->sctp_assoc_start_time = (uint32_t)LBOLT_FASTPATH64;	\
+	SCTPS_ASSOC_INC(sctps);						\
+}
+
+/*
  * Bind hash array size and hash function.  The size must be a power
  * of 2 and lport must be in host byte order.
  */
@@ -873,6 +950,9 @@
 	 * user request for stats on this endpoint.
 	 */
 	int	sctp_prev_maxrto;
+
+	/* For association counting. */
+	sctp_listen_cnt_t	*sctp_listen_cnt;
 } sctp_t;
 
 #define	SCTP_TXQ_LEN(sctp)	((sctp)->sctp_unsent + (sctp)->sctp_unacked)
@@ -925,6 +1005,7 @@
 extern void	sctp_conn_init(conn_t *);
 extern sctp_t	*sctp_conn_match(in6_addr_t **, uint32_t, in6_addr_t *,
 		    uint32_t, zoneid_t, iaflags_t, sctp_stack_t *);
+extern void	sctp_conn_reclaim(void *);
 extern sctp_t	*sctp_conn_request(sctp_t *, mblk_t *, uint_t, uint_t,
 		    sctp_init_chunk_t *, ip_recv_attr_t *);
 extern uint32_t	sctp_cumack(sctp_t *, uint32_t, mblk_t **);
@@ -943,6 +1024,7 @@
 extern void	sctp_faddr_init(void);
 extern void	sctp_fast_rexmit(sctp_t *);
 extern void	sctp_fill_sack(sctp_t *, unsigned char *, int);
+extern uint32_t sctp_find_listener_conf(sctp_stack_t *, in_port_t);
 extern void	sctp_free_faddr_timers(sctp_t *);
 extern void	sctp_free_ftsn_set(sctp_ftsn_set_t *);
 extern void	sctp_free_msg(mblk_t *);
@@ -978,17 +1060,18 @@
 extern void	sctp_intf_event(sctp_t *, in6_addr_t, int, int);
 extern void	sctp_input_data(sctp_t *, mblk_t *, ip_recv_attr_t *);
 extern void	sctp_instream_cleanup(sctp_t *, boolean_t);
-extern int	sctp_is_a_faddr_clean(sctp_t *);
+extern boolean_t sctp_is_a_faddr_clean(sctp_t *);
 
 extern void	*sctp_kstat_init(netstackid_t);
 extern void	sctp_kstat_fini(netstackid_t, kstat_t *);
-extern void	*sctp_kstat2_init(netstackid_t, sctp_kstat_t *);
+extern void	*sctp_kstat2_init(netstackid_t);
 extern void	sctp_kstat2_fini(netstackid_t, kstat_t *);
 
 extern ssize_t	sctp_link_abort(mblk_t *, uint16_t, char *, size_t, int,
 		    boolean_t);
 extern void	sctp_listen_hash_insert(sctp_tf_t *, sctp_t *);
 extern void	sctp_listen_hash_remove(sctp_t *);
+extern void	sctp_listener_conf_cleanup(sctp_stack_t *);
 extern sctp_t	*sctp_lookup(sctp_t *, in6_addr_t *, sctp_tf_t *, uint32_t *,
 		    int);
 extern sctp_faddr_t *sctp_lookup_faddr(sctp_t *, in6_addr_t *);
@@ -1058,6 +1141,7 @@
 extern void	sctp_set_iplen(sctp_t *, mblk_t *, ip_xmit_attr_t *);
 extern void	sctp_set_ulp_prop(sctp_t *);
 extern void	sctp_ss_rexmit(sctp_t *);
+extern void	sctp_stack_cpu_add(sctp_stack_t *, processorid_t);
 extern size_t	sctp_supaddr_param_len(sctp_t *);
 extern size_t	sctp_supaddr_param(sctp_t *, uchar_t *);
 
--- a/usr/src/uts/common/inet/sctp/sctp_input.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_input.c	Mon Jul 19 17:27:45 2010 -0700
@@ -1349,7 +1349,7 @@
 
 	/* We can no longer deliver anything up, but still need to handle it. */
 	if (SCTP_IS_DETACHED(sctp)) {
-		BUMP_MIB(&sctps->sctps_mib, sctpInClosed);
+		SCTPS_BUMP_MIB(sctps, sctpInClosed);
 		can_deliver = B_FALSE;
 	}
 
@@ -1937,9 +1937,9 @@
 	    (void *)sctp->sctp_lastdata,
 	    SCTP_PRINTADDR(sctp->sctp_lastdata->faddr)));
 
-	sctp->sctp_active = ddi_get_lbolt64();
-
-	BUMP_MIB(&sctps->sctps_mib, sctpOutAck);
+	sctp->sctp_active = LBOLT_FASTPATH64;
+
+	SCTPS_BUMP_MIB(sctps, sctpOutAck);
 
 	sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->ixa);
 	(void) conn_ip_output(smp, sctp->sctp_lastdata->ixa);
@@ -2124,7 +2124,7 @@
 cum_ack_done:
 	*first_unacked = mp;
 	if (cumack_forward > 0) {
-		BUMP_MIB(&sctps->sctps_mib, sctpInAck);
+		SCTPS_BUMP_MIB(sctps, sctpInAck);
 		if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) {
 			sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd;
 		}
@@ -2143,7 +2143,7 @@
 		sctp->sctp_xmit_unacked = mp;
 	} else {
 		/* dup ack */
-		BUMP_MIB(&sctps->sctps_mib, sctpInDupAck);
+		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
 	}
 	sctp->sctp_lastack_rxd = tsn;
 	if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd))
@@ -2298,7 +2298,7 @@
 	remaining =  ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn);
 
 	if (SCTP_IS_DETACHED(sctp)) {
-		BUMP_MIB(&sctps->sctps_mib, sctpInClosed);
+		SCTPS_BUMP_MIB(sctps, sctpInClosed);
 		can_deliver = B_FALSE;
 	}
 	/*
@@ -2543,7 +2543,7 @@
 		 */
 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
-			BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent);
+			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
 			*trysend = -1;
 			return (acked);
 		} else if (SEQ_LT(gapend, gapstart) ||
@@ -2742,7 +2742,7 @@
 		return (0);
 
 	if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) {
-		BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent);
+		SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
 		/* Send an ABORT */
 		return (-1);
 	}
@@ -2768,7 +2768,7 @@
 			mp = sctp->sctp_xmit_head->b_cont;
 		else
 			mp = NULL;
-		BUMP_MIB(&sctps->sctps_mib, sctpInDupAck);
+		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
 		/*
 		 * If we were doing a zero win probe and the win
 		 * has now opened to at least MSS, re-transmit the
@@ -2880,8 +2880,7 @@
 				    sctp->sctp_xmit_head, mp1,
 				    &trysend, &fast_recovery, gapstart);
 				if (trysend < 0) {
-					BUMP_MIB(&sctps->sctps_mib,
-					    sctpInAckUnsent);
+					SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
 					return (-1);
 				}
 				break;
@@ -2898,7 +2897,7 @@
 		 */
 		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
 		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
-			BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent);
+			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
 			return (-1);
 		} else if (SEQ_LT(gapend, gapstart) ||
 		    SEQ_LEQ(gapstart, cumtsn)) {
@@ -3409,8 +3408,8 @@
 
 	sctps = ipst->ips_netstack->netstack_sctp;
 
-	BUMP_MIB(&sctps->sctps_mib, sctpOutOfBlue);
-	BUMP_MIB(&sctps->sctps_mib, sctpInSCTPPkts);
+	SCTPS_BUMP_MIB(sctps, sctpOutOfBlue);
+	SCTPS_BUMP_MIB(sctps, sctpInSCTPPkts);
 
 	if (mp->b_cont != NULL) {
 		/*
@@ -3578,7 +3577,7 @@
 {
 	sctp_stack_t	*sctps = sctp->sctp_sctps;
 
-	BUMP_MIB(&sctps->sctps_mib, sctpAborted);
+	SCTPS_BUMP_MIB(sctps, sctpAborted);
 	BUMP_LOCAL(sctp->sctp_ibchunks);
 
 	sctp_assoc_event(sctp, SCTP_COMM_LOST,
@@ -3753,7 +3752,7 @@
 	gotdata = 0;
 	trysend = 0;
 
-	now = ddi_get_lbolt64();
+	now = LBOLT_FASTPATH64;
 	/* Process the chunks */
 	do {
 		dprint(3, ("sctp_dispatch_rput: state=%d, chunk id=%d\n",
@@ -3861,8 +3860,7 @@
 				BUMP_LOCAL(sctp->sctp_ibchunks);
 				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
 					sctp_shutdown_complete(sctp);
-					BUMP_MIB(&sctps->sctps_mib,
-					    sctpShutdowns);
+					SCTPS_BUMP_MIB(sctps, sctpShutdowns);
 					sctp_assoc_event(sctp,
 					    SCTP_SHUTDOWN_COMP, 0, NULL);
 					sctp_clean_death(sctp, 0);
@@ -3897,7 +3895,7 @@
 						sctp_adaptation_event(sctp);
 					}
 				} else {
-					BUMP_MIB(&sctps->sctps_mib,
+					SCTPS_BUMP_MIB(sctps,
 					    sctpInInvalidCookie);
 				}
 				break;
@@ -3953,7 +3951,7 @@
 				if (sctp_process_cookie(sctp, ch, mp, &iack,
 				    sctph, &recv_adaptation, &peer_src,
 				    ira) == -1) {
-					BUMP_MIB(&sctps->sctps_mib,
+					SCTPS_BUMP_MIB(sctps,
 					    sctpInInvalidCookie);
 					goto done;
 				}
@@ -3997,7 +3995,7 @@
 				 * properly reprocessed on the
 				 * eager's queue.
 				 */
-				BUMP_MIB(&sctps->sctps_mib, sctpPassiveEstab);
+				SCTPS_BUMP_MIB(sctps, sctpPassiveEstab);
 				if (mlen > ntohs(ch->sch_len)) {
 					eager->sctp_cookie_mp = dupb(mp);
 					/*
@@ -4075,7 +4073,7 @@
 
 				if (sctp_process_cookie(sctp, ch, mp, &iack,
 				    sctph, &recv_adaptation, NULL, ira) == -1) {
-					BUMP_MIB(&sctps->sctps_mib,
+					SCTPS_BUMP_MIB(sctps,
 					    sctpInInvalidCookie);
 					break;
 				}
@@ -4087,10 +4085,8 @@
 					sctp_set_ulp_prop(sctp);
 
 				}
-				sctp->sctp_state = SCTPS_ESTABLISHED;
-				sctp->sctp_assoc_start_time =
-				    (uint32_t)ddi_get_lbolt();
-				BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab);
+				SCTP_ASSOC_EST(sctps, sctp);
+				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
 				if (sctp->sctp_cookie_mp) {
 					freemsg(sctp->sctp_cookie_mp);
 					sctp->sctp_cookie_mp = NULL;
@@ -4129,10 +4125,8 @@
 				}
 				if (sctp->sctp_unacked == 0)
 					sctp_stop_faddr_timers(sctp);
-				sctp->sctp_state = SCTPS_ESTABLISHED;
-				sctp->sctp_assoc_start_time =
-				    (uint32_t)ddi_get_lbolt();
-				BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab);
+				SCTP_ASSOC_EST(sctps, sctp);
+				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
 				BUMP_LOCAL(sctp->sctp_ibchunks);
 				if (sctp->sctp_cookie_mp) {
 					freemsg(sctp->sctp_cookie_mp);
@@ -4157,7 +4151,7 @@
 
 				if (sctp_process_cookie(sctp, ch, mp, &iack,
 				    sctph, &recv_adaptation, NULL, ira) == -1) {
-					BUMP_MIB(&sctps->sctps_mib,
+					SCTPS_BUMP_MIB(sctps,
 					    sctpInInvalidCookie);
 					break;
 				}
@@ -4171,10 +4165,8 @@
 				}
 				if (sctp->sctp_unacked == 0)
 					sctp_stop_faddr_timers(sctp);
-				sctp->sctp_state = SCTPS_ESTABLISHED;
-				sctp->sctp_assoc_start_time =
-				    (uint32_t)ddi_get_lbolt();
-				BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab);
+				SCTP_ASSOC_EST(sctps, sctp);
+				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
 				if (sctp->sctp_cookie_mp) {
 					freemsg(sctp->sctp_cookie_mp);
 					sctp->sctp_cookie_mp = NULL;
@@ -4206,7 +4198,7 @@
 					p = (sctp_parm_hdr_t *)(ch + 1);
 					if (p->sph_type ==
 					    htons(SCTP_ERR_STALE_COOKIE)) {
-						BUMP_MIB(&sctps->sctps_mib,
+						SCTPS_BUMP_MIB(sctps,
 						    sctpAborted);
 						sctp_error_event(sctp,
 						    ch, B_FALSE);
@@ -4241,7 +4233,7 @@
 				goto done;
 			case CHUNK_SHUTDOWN_COMPLETE:
 				BUMP_LOCAL(sctp->sctp_ibchunks);
-				BUMP_MIB(&sctps->sctps_mib, sctpShutdowns);
+				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
 				    NULL);
 
@@ -4252,7 +4244,7 @@
 			case CHUNK_SHUTDOWN_ACK:
 				sctp_shutdown_complete(sctp);
 				BUMP_LOCAL(sctp->sctp_ibchunks);
-				BUMP_MIB(&sctps->sctps_mib, sctpShutdowns);
+				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
 				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
 				    NULL);
 				sctp_clean_death(sctp, 0);
@@ -4435,7 +4427,7 @@
 	if (sctp->sctp_state >= SCTPS_ESTABLISHED &&
 	    ((old <= new >> 1) || (old < sctp->sctp_mss))) {
 		sctp->sctp_force_sack = 1;
-		BUMP_MIB(&sctps->sctps_mib, sctpOutWinUpdate);
+		SCTPS_BUMP_MIB(sctps, sctpOutWinUpdate);
 		(void) sctp_sack(sctp, NULL);
 	}
 	WAKE_SCTP(sctp);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/sctp/sctp_misc.c	Mon Jul 19 17:27:45 2010 -0700
@@ -0,0 +1,277 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <inet/common.h>
+#include "sctp_impl.h"
+
+/* Control whether SCTP can enter defensive mode when under memory pressure. */
+static boolean_t sctp_do_reclaim = B_TRUE;
+
+static void	sctp_reclaim_timer(void *);
+
+/* Diagnostic routine used to return a string associated with the sctp state. */
+char *
+sctp_display(sctp_t *sctp, char *sup_buf)
+{
+	char	*buf;
+	char	buf1[30];
+	static char	priv_buf[INET6_ADDRSTRLEN * 2 + 80];
+	char	*cp;
+	conn_t	*connp;
+
+	if (sctp == NULL)
+		return ("NULL_SCTP");
+
+	connp = sctp->sctp_connp;
+	buf = (sup_buf != NULL) ? sup_buf : priv_buf;
+
+	switch (sctp->sctp_state) {
+	case SCTPS_IDLE:
+		cp = "SCTP_IDLE";
+		break;
+	case SCTPS_BOUND:
+		cp = "SCTP_BOUND";
+		break;
+	case SCTPS_LISTEN:
+		cp = "SCTP_LISTEN";
+		break;
+	case SCTPS_COOKIE_WAIT:
+		cp = "SCTP_COOKIE_WAIT";
+		break;
+	case SCTPS_COOKIE_ECHOED:
+		cp = "SCTP_COOKIE_ECHOED";
+		break;
+	case SCTPS_ESTABLISHED:
+		cp = "SCTP_ESTABLISHED";
+		break;
+	case SCTPS_SHUTDOWN_PENDING:
+		cp = "SCTP_SHUTDOWN_PENDING";
+		break;
+	case SCTPS_SHUTDOWN_SENT:
+		cp = "SCTPS_SHUTDOWN_SENT";
+		break;
+	case SCTPS_SHUTDOWN_RECEIVED:
+		cp = "SCTPS_SHUTDOWN_RECEIVED";
+		break;
+	case SCTPS_SHUTDOWN_ACK_SENT:
+		cp = "SCTPS_SHUTDOWN_ACK_SENT";
+		break;
+	default:
+		(void) mi_sprintf(buf1, "SCTPUnkState(%d)", sctp->sctp_state);
+		cp = buf1;
+		break;
+	}
+	(void) mi_sprintf(buf, "[%u, %u] %s",
+	    ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp);
+
+	return (buf);
+}
+
+void
+sctp_display_all(sctp_stack_t *sctps)
+{
+	sctp_t *sctp_walker;
+
+	mutex_enter(&sctps->sctps_g_lock);
+	for (sctp_walker = list_head(&sctps->sctps_g_list);
+	    sctp_walker != NULL;
+	    sctp_walker = (sctp_t *)list_next(&sctps->sctps_g_list,
+	    sctp_walker)) {
+		(void) sctp_display(sctp_walker, NULL);
+	}
+	mutex_exit(&sctps->sctps_g_lock);
+}
+
+/*
+ * Given a sctp_stack_t and a port (in host byte order), find a listener
+ * configuration for that port and return the ratio.
+ */
+uint32_t
+sctp_find_listener_conf(sctp_stack_t *sctps, in_port_t port)
+{
+	sctp_listener_t	*sl;
+	uint32_t ratio = 0;
+
+	mutex_enter(&sctps->sctps_listener_conf_lock);
+	for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL;
+	    sl = list_next(&sctps->sctps_listener_conf, sl)) {
+		if (sl->sl_port == port) {
+			ratio = sl->sl_ratio;
+			break;
+		}
+	}
+	mutex_exit(&sctps->sctps_listener_conf_lock);
+	return (ratio);
+}
+
+/*
+ * To remove all listener limit configuration in a sctp_stack_t.
+ */
+void
+sctp_listener_conf_cleanup(sctp_stack_t *sctps)
+{
+	sctp_listener_t	*sl;
+
+	mutex_enter(&sctps->sctps_listener_conf_lock);
+	while ((sl = list_head(&sctps->sctps_listener_conf)) != NULL) {
+		list_remove(&sctps->sctps_listener_conf, sl);
+		kmem_free(sl, sizeof (sctp_listener_t));
+	}
+	mutex_destroy(&sctps->sctps_listener_conf_lock);
+	list_destroy(&sctps->sctps_listener_conf);
+}
+
+
+/*
+ * Timeout function to reset the SCTP stack variable sctps_reclaim to false.
+ */
+static void
+sctp_reclaim_timer(void *arg)
+{
+	sctp_stack_t *sctps = (sctp_stack_t *)arg;
+	int64_t tot_assoc = 0;
+	int i;
+	extern pgcnt_t lotsfree, needfree;
+
+	for (i = 0; i < sctps->sctps_sc_cnt; i++)
+		tot_assoc += sctps->sctps_sc[i]->sctp_sc_assoc_cnt;
+
+	/*
+	 * This happens only when a stack is going away.  sctps_reclaim_tid
+	 * should not be reset to 0 when returning in this case.
+	 */
+	mutex_enter(&sctps->sctps_reclaim_lock);
+	if (!sctps->sctps_reclaim) {
+		mutex_exit(&sctps->sctps_reclaim_lock);
+		return;
+	}
+
+	if ((freemem >= lotsfree + needfree) || tot_assoc < maxusers) {
+		sctps->sctps_reclaim = B_FALSE;
+		sctps->sctps_reclaim_tid = 0;
+	} else {
+		/* Stay in defensive mode and restart the timer */
+		sctps->sctps_reclaim_tid = timeout(sctp_reclaim_timer,
+		    sctps, MSEC_TO_TICK(sctps->sctps_reclaim_period));
+	}
+	mutex_exit(&sctps->sctps_reclaim_lock);
+}
+
+/*
+ * Kmem reclaim call back function.  When the system is under memory
+ * pressure, we set the SCTP stack variable sctps_reclaim to true.  This
+ * variable is reset to false after sctps_reclaim_period msecs.  During this
+ * period, SCTP will be more aggressive in aborting connections not making
+ * progress, meaning retransmitting for shorter time (sctp_pa_early_abort/
+ * sctp_pp_early_abort number of strikes).
+ */
+/* ARGSUSED */
+void
+sctp_conn_reclaim(void *arg)
+{
+	netstack_handle_t nh;
+	netstack_t *ns;
+	sctp_stack_t *sctps;
+	extern pgcnt_t lotsfree, needfree;
+
+	if (!sctp_do_reclaim)
+		return;
+
+	/*
+	 * The reclaim function may be called even when the system is not
+	 * really under memory pressure.
+	 */
+	if (freemem >= lotsfree + needfree)
+		return;
+
+	netstack_next_init(&nh);
+	while ((ns = netstack_next(&nh)) != NULL) {
+		int i;
+		int64_t tot_assoc = 0;
+
+		/*
+		 * During boot time, the first netstack_t is created and
+		 * initialized before SCTP has registered with the netstack
+		 * framework.  If this reclaim function is called before SCTP
+		 * has finished its initialization, netstack_next() will
+		 * return the first netstack_t (since its netstack_flags is
+		 * not NSF_UNINIT).  And its netstack_sctp will be NULL.  We
+		 * need to catch it.
+		 *
+		 * All subsequent netstack_t creation will not have this
+		 * problem since the initialization is not finished until SCTP
+		 * has finished its own sctp_stack_t initialization.  Hence
+		 * netstack_next() will not return one with NULL netstack_sctp.
+		 */
+		if ((sctps = ns->netstack_sctp) == NULL) {
+			netstack_rele(ns);
+			continue;
+		}
+
+		/*
+		 * Even if the system is under memory pressure, the reason may
+		 * not be because of SCTP activity.  Check the number of
+		 * associations in each stack.  If the number exceeds the
+		 * threshold (maxusers), turn on defensive mode.
+		 */
+		for (i = 0; i < sctps->sctps_sc_cnt; i++)
+			tot_assoc += sctps->sctps_sc[i]->sctp_sc_assoc_cnt;
+		if (tot_assoc < maxusers) {
+			netstack_rele(ns);
+			continue;
+		}
+
+		mutex_enter(&sctps->sctps_reclaim_lock);
+		if (!sctps->sctps_reclaim) {
+			sctps->sctps_reclaim = B_TRUE;
+			sctps->sctps_reclaim_tid = timeout(sctp_reclaim_timer,
+			    sctps, MSEC_TO_TICK(sctps->sctps_reclaim_period));
+			SCTP_KSTAT(sctps, sctp_reclaim_cnt);
+		}
+		mutex_exit(&sctps->sctps_reclaim_lock);
+		netstack_rele(ns);
+	}
+	netstack_next_fini(&nh);
+}
+
+/*
+ * When a CPU is added, we need to allocate the per CPU stats struct.
+ */
+void
+sctp_stack_cpu_add(sctp_stack_t *sctps, processorid_t cpu_seqid)
+{
+	int i;
+
+	if (cpu_seqid < sctps->sctps_sc_cnt)
+		return;
+	for (i = sctps->sctps_sc_cnt; i <= cpu_seqid; i++) {
+		ASSERT(sctps->sctps_sc[i] == NULL);
+		sctps->sctps_sc[i] = kmem_zalloc(sizeof (sctp_stats_cpu_t),
+		    KM_SLEEP);
+	}
+	membar_producer();
+	sctps->sctps_sc_cnt = cpu_seqid + 1;
+}
--- a/usr/src/uts/common/inet/sctp/sctp_output.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_output.c	Mon Jul 19 17:27:45 2010 -0700
@@ -759,7 +759,7 @@
 						return (start_mp);
 					}
 				}
-				BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans);
+				SCTPS_BUMP_MIB(sctps, sctpOutFastRetrans);
 				BUMP_LOCAL(sctp->sctp_rxtchunks);
 				SCTP_CHUNK_CLEAR_REXMIT(mp);
 				if (start_mp == NULL) {
@@ -997,7 +997,7 @@
 	int32_t			pad = 0;
 	int32_t			pathmax;
 	int			extra;
-	int64_t			now = ddi_get_lbolt64();
+	int64_t			now = LBOLT_FASTPATH64;
 	sctp_faddr_t		*fp;
 	sctp_faddr_t		*lfp;
 	sctp_data_hdr_t		*sdc;
@@ -1772,7 +1772,7 @@
 		sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
 		ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
 		sctp->sctp_zero_win_probe = B_TRUE;
-		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
+		SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
 	}
 	return;
 out:
@@ -1818,7 +1818,7 @@
 		if (oldfp != fp && oldfp->suna != 0)
 			SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto);
 		SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto);
-		BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe);
+		SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
 		return;
 	}
 
--- a/usr/src/uts/common/inet/sctp/sctp_snmp.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_snmp.c	Mon Jul 19 17:27:45 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -44,8 +43,11 @@
 #include "sctp_impl.h"
 #include "sctp_addr.h"
 
-static int sctp_snmp_state(sctp_t *sctp);
-
+static void sctp_clr_kstats2(sctp_kstat_t *);
+static void sctp_add_kstats2(sctp_kstat_counter_t *, sctp_kstat_t *);
+static int sctp_snmp_state(sctp_t *);
+static void sctp_sum_mib(sctp_stack_t *, mib2_sctp_t *);
+static void sctp_add_mib(mib2_sctp_t *, mib2_sctp_t *);
 
 static int
 sctp_kstat_update(kstat_t *kp, int rw)
@@ -56,6 +58,7 @@
 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
 	netstack_t	*ns;
 	sctp_stack_t	*sctps;
+	mib2_sctp_t	sctp_mib;
 
 	if (kp == NULL|| kp->ks_data == NULL)
 		return (EIO);
@@ -71,13 +74,21 @@
 		netstack_rele(ns);
 		return (-1);
 	}
-	myzoneid = netstackid_to_zoneid(stackid);
+
+	/*
+	 * For all exclusive netstacks, the zone ID is always GLOBAL_ZONEID.
+	 */
+	if (stackid != GLOBAL_NETSTACKID)
+		myzoneid = GLOBAL_ZONEID;
+	else
+		myzoneid = curproc->p_zone->zone_id;
+
+	bzero(&sctp_mib, sizeof (sctp_mib));
 
 	/*
 	 * Get the number of current associations and gather their
 	 * individual set of statistics.
 	 */
-	SET_MIB(sctps->sctps_mib.sctpCurrEstab, 0);
 	sctp_prev = NULL;
 	mutex_enter(&sctps->sctps_g_lock);
 	sctp = list_head(&sctps->sctps_g_list);
@@ -98,17 +109,21 @@
 		if (sctp->sctp_state == SCTPS_ESTABLISHED ||
 		    sctp->sctp_state == SCTPS_SHUTDOWN_PENDING ||
 		    sctp->sctp_state == SCTPS_SHUTDOWN_RECEIVED) {
-			BUMP_MIB(&sctps->sctps_mib, sctpCurrEstab);
+			/*
+			 * Just bump the local sctp_mib.  The number of
+			 * existing associations is not kept in kernel.
+			 */
+			BUMP_MIB(&sctp_mib, sctpCurrEstab);
 		}
 
 		if (sctp->sctp_opkts) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpOutSCTPPkts,
+			SCTPS_UPDATE_MIB(sctps, sctpOutSCTPPkts,
 			    sctp->sctp_opkts);
 			sctp->sctp_opkts = 0;
 		}
 
 		if (sctp->sctp_obchunks) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpOutCtrlChunks,
+			SCTPS_UPDATE_MIB(sctps, sctpOutCtrlChunks,
 			    sctp->sctp_obchunks);
 			UPDATE_LOCAL(sctp->sctp_cum_obchunks,
 			    sctp->sctp_obchunks);
@@ -116,7 +131,7 @@
 		}
 
 		if (sctp->sctp_odchunks) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpOutOrderChunks,
+			SCTPS_UPDATE_MIB(sctps, sctpOutOrderChunks,
 			    sctp->sctp_odchunks);
 			UPDATE_LOCAL(sctp->sctp_cum_odchunks,
 			    sctp->sctp_odchunks);
@@ -124,7 +139,7 @@
 		}
 
 		if (sctp->sctp_oudchunks) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpOutUnorderChunks,
+			SCTPS_UPDATE_MIB(sctps, sctpOutUnorderChunks,
 			    sctp->sctp_oudchunks);
 			UPDATE_LOCAL(sctp->sctp_cum_oudchunks,
 			    sctp->sctp_oudchunks);
@@ -132,7 +147,7 @@
 		}
 
 		if (sctp->sctp_rxtchunks) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpRetransChunks,
+			SCTPS_UPDATE_MIB(sctps, sctpRetransChunks,
 			    sctp->sctp_rxtchunks);
 			UPDATE_LOCAL(sctp->sctp_cum_rxtchunks,
 			    sctp->sctp_rxtchunks);
@@ -140,13 +155,13 @@
 		}
 
 		if (sctp->sctp_ipkts) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpInSCTPPkts,
+			SCTPS_UPDATE_MIB(sctps, sctpInSCTPPkts,
 			    sctp->sctp_ipkts);
 			sctp->sctp_ipkts = 0;
 		}
 
 		if (sctp->sctp_ibchunks) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpInCtrlChunks,
+			SCTPS_UPDATE_MIB(sctps, sctpInCtrlChunks,
 			    sctp->sctp_ibchunks);
 			UPDATE_LOCAL(sctp->sctp_cum_ibchunks,
 			    sctp->sctp_ibchunks);
@@ -154,7 +169,7 @@
 		}
 
 		if (sctp->sctp_idchunks) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpInOrderChunks,
+			SCTPS_UPDATE_MIB(sctps, sctpInOrderChunks,
 			    sctp->sctp_idchunks);
 			UPDATE_LOCAL(sctp->sctp_cum_idchunks,
 			    sctp->sctp_idchunks);
@@ -162,7 +177,7 @@
 		}
 
 		if (sctp->sctp_iudchunks) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpInUnorderChunks,
+			SCTPS_UPDATE_MIB(sctps, sctpInUnorderChunks,
 			    sctp->sctp_iudchunks);
 			UPDATE_LOCAL(sctp->sctp_cum_iudchunks,
 			    sctp->sctp_iudchunks);
@@ -170,13 +185,13 @@
 		}
 
 		if (sctp->sctp_fragdmsgs) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpFragUsrMsgs,
+			SCTPS_UPDATE_MIB(sctps, sctpFragUsrMsgs,
 			    sctp->sctp_fragdmsgs);
 			sctp->sctp_fragdmsgs = 0;
 		}
 
 		if (sctp->sctp_reassmsgs) {
-			UPDATE_MIB(&sctps->sctps_mib, sctpReasmUsrMsgs,
+			SCTPS_UPDATE_MIB(sctps, sctpReasmUsrMsgs,
 			    sctp->sctp_reassmsgs);
 			sctp->sctp_reassmsgs = 0;
 		}
@@ -190,6 +205,8 @@
 	if (sctp_prev != NULL)
 		SCTP_REFRELE(sctp_prev);
 
+	sctp_sum_mib(sctps, &sctp_mib);
+
 	/* Copy data from the SCTP MIB */
 	sctpkp = (sctp_named_kstat_t *)kp->ks_data;
 
@@ -200,52 +217,41 @@
 	sctpkp->sctpValCookieLife.value.ui32 = sctps->sctps_cookie_life;
 	sctpkp->sctpMaxInitRetr.value.ui32 = sctps->sctps_max_init_retr;
 
-	sctpkp->sctpCurrEstab.value.i32 = sctps->sctps_mib.sctpCurrEstab;
-	sctpkp->sctpActiveEstab.value.i32 = sctps->sctps_mib.sctpActiveEstab;
-	sctpkp->sctpPassiveEstab.value.i32 = sctps->sctps_mib.sctpPassiveEstab;
-	sctpkp->sctpAborted.value.i32 = sctps->sctps_mib.sctpAborted;
-	sctpkp->sctpShutdowns.value.i32 = sctps->sctps_mib.sctpShutdowns;
-	sctpkp->sctpOutOfBlue.value.i32 = sctps->sctps_mib.sctpOutOfBlue;
-	sctpkp->sctpChecksumError.value.i32 =
-	    sctps->sctps_mib.sctpChecksumError;
-	sctpkp->sctpOutCtrlChunks.value.i64 =
-	    sctps->sctps_mib.sctpOutCtrlChunks;
-	sctpkp->sctpOutOrderChunks.value.i64 =
-	    sctps->sctps_mib.sctpOutOrderChunks;
-	sctpkp->sctpOutUnorderChunks.value.i64 =
-	    sctps->sctps_mib.sctpOutUnorderChunks;
-	sctpkp->sctpRetransChunks.value.i64 =
-	    sctps->sctps_mib.sctpRetransChunks;
-	sctpkp->sctpOutAck.value.i32 = sctps->sctps_mib.sctpOutAck;
-	sctpkp->sctpOutAckDelayed.value.i32 =
-	    sctps->sctps_mib.sctpOutAckDelayed;
-	sctpkp->sctpOutWinUpdate.value.i32 = sctps->sctps_mib.sctpOutWinUpdate;
-	sctpkp->sctpOutFastRetrans.value.i32 =
-	    sctps->sctps_mib.sctpOutFastRetrans;
-	sctpkp->sctpOutWinProbe.value.i32 = sctps->sctps_mib.sctpOutWinProbe;
-	sctpkp->sctpInCtrlChunks.value.i64 = sctps->sctps_mib.sctpInCtrlChunks;
-	sctpkp->sctpInOrderChunks.value.i64 =
-	    sctps->sctps_mib.sctpInOrderChunks;
-	sctpkp->sctpInUnorderChunks.value.i64 =
-	    sctps->sctps_mib.sctpInUnorderChunks;
-	sctpkp->sctpInAck.value.i32 = sctps->sctps_mib.sctpInAck;
-	sctpkp->sctpInDupAck.value.i32 = sctps->sctps_mib.sctpInDupAck;
-	sctpkp->sctpInAckUnsent.value.i32 = sctps->sctps_mib.sctpInAckUnsent;
-	sctpkp->sctpFragUsrMsgs.value.i64 = sctps->sctps_mib.sctpFragUsrMsgs;
-	sctpkp->sctpReasmUsrMsgs.value.i64 = sctps->sctps_mib.sctpReasmUsrMsgs;
-	sctpkp->sctpOutSCTPPkts.value.i64 = sctps->sctps_mib.sctpOutSCTPPkts;
-	sctpkp->sctpInSCTPPkts.value.i64 = sctps->sctps_mib.sctpInSCTPPkts;
-	sctpkp->sctpInInvalidCookie.value.i32 =
-	    sctps->sctps_mib.sctpInInvalidCookie;
-	sctpkp->sctpTimRetrans.value.i32 = sctps->sctps_mib.sctpTimRetrans;
-	sctpkp->sctpTimRetransDrop.value.i32 =
-	    sctps->sctps_mib.sctpTimRetransDrop;
+	/* Copy data from the local sctp_mib to the provided kstat. */
+	sctpkp->sctpCurrEstab.value.i32 = sctp_mib.sctpCurrEstab;
+	sctpkp->sctpActiveEstab.value.i32 = sctp_mib.sctpActiveEstab;
+	sctpkp->sctpPassiveEstab.value.i32 = sctp_mib.sctpPassiveEstab;
+	sctpkp->sctpAborted.value.i32 = sctp_mib.sctpAborted;
+	sctpkp->sctpShutdowns.value.i32 = sctp_mib.sctpShutdowns;
+	sctpkp->sctpOutOfBlue.value.i32 = sctp_mib.sctpOutOfBlue;
+	sctpkp->sctpChecksumError.value.i32 = sctp_mib.sctpChecksumError;
+	sctpkp->sctpOutCtrlChunks.value.i64 = sctp_mib.sctpOutCtrlChunks;
+	sctpkp->sctpOutOrderChunks.value.i64 = sctp_mib.sctpOutOrderChunks;
+	sctpkp->sctpOutUnorderChunks.value.i64 = sctp_mib.sctpOutUnorderChunks;
+	sctpkp->sctpRetransChunks.value.i64 = sctp_mib.sctpRetransChunks;
+	sctpkp->sctpOutAck.value.i32 = sctp_mib.sctpOutAck;
+	sctpkp->sctpOutAckDelayed.value.i32 = sctp_mib.sctpOutAckDelayed;
+	sctpkp->sctpOutWinUpdate.value.i32 = sctp_mib.sctpOutWinUpdate;
+	sctpkp->sctpOutFastRetrans.value.i32 = sctp_mib.sctpOutFastRetrans;
+	sctpkp->sctpOutWinProbe.value.i32 = sctp_mib.sctpOutWinProbe;
+	sctpkp->sctpInCtrlChunks.value.i64 = sctp_mib.sctpInCtrlChunks;
+	sctpkp->sctpInOrderChunks.value.i64 = sctp_mib.sctpInOrderChunks;
+	sctpkp->sctpInUnorderChunks.value.i64 = sctp_mib.sctpInUnorderChunks;
+	sctpkp->sctpInAck.value.i32 = sctp_mib.sctpInAck;
+	sctpkp->sctpInDupAck.value.i32 = sctp_mib.sctpInDupAck;
+	sctpkp->sctpInAckUnsent.value.i32 = sctp_mib.sctpInAckUnsent;
+	sctpkp->sctpFragUsrMsgs.value.i64 = sctp_mib.sctpFragUsrMsgs;
+	sctpkp->sctpReasmUsrMsgs.value.i64 = sctp_mib.sctpReasmUsrMsgs;
+	sctpkp->sctpOutSCTPPkts.value.i64 = sctp_mib.sctpOutSCTPPkts;
+	sctpkp->sctpInSCTPPkts.value.i64 = sctp_mib.sctpInSCTPPkts;
+	sctpkp->sctpInInvalidCookie.value.i32 = sctp_mib.sctpInInvalidCookie;
+	sctpkp->sctpTimRetrans.value.i32 = sctp_mib.sctpTimRetrans;
+	sctpkp->sctpTimRetransDrop.value.i32 = sctp_mib.sctpTimRetransDrop;
 	sctpkp->sctpTimHeartBeatProbe.value.i32 =
-	    sctps->sctps_mib.sctpTimHeartBeatProbe;
-	sctpkp->sctpTimHeartBeatDrop.value.i32 =
-	    sctps->sctps_mib.sctpTimHeartBeatDrop;
-	sctpkp->sctpListenDrop.value.i32 = sctps->sctps_mib.sctpListenDrop;
-	sctpkp->sctpInClosed.value.i32 = sctps->sctps_mib.sctpInClosed;
+	    sctp_mib.sctpTimHeartBeatProbe;
+	sctpkp->sctpTimHeartBeatDrop.value.i32 = sctp_mib.sctpTimHeartBeatDrop;
+	sctpkp->sctpListenDrop.value.i32 = sctp_mib.sctpListenDrop;
+	sctpkp->sctpInClosed.value.i32 = sctp_mib.sctpInClosed;
 
 	netstack_rele(ns);
 	return (0);
@@ -302,7 +308,7 @@
 	ksp = kstat_create_netstack(SCTP_MOD_NAME, 0, "sctp", "mib2",
 	    KSTAT_TYPE_NAMED, NUM_OF_FIELDS(sctp_named_kstat_t), 0, stackid);
 
-	if (ksp == NULL || ksp->ks_data == NULL)
+	if (ksp == NULL)
 		return (NULL);
 
 	/* These won't change. */
@@ -318,6 +324,123 @@
 }
 
 /*
+ * To set all sctp_stat_t counters to 0.
+ */
+static void
+sctp_clr_kstats2(sctp_kstat_t *stats)
+{
+	stats->sctp_add_faddr.value.ui64 = 0;
+	stats->sctp_add_timer.value.ui64 = 0;
+	stats->sctp_conn_create.value.ui64 = 0;
+	stats->sctp_find_next_tq.value.ui64 = 0;
+	stats->sctp_fr_add_hdr.value.ui64 = 0;
+	stats->sctp_fr_not_found.value.ui64 = 0;
+	stats->sctp_output_failed.value.ui64 = 0;
+	stats->sctp_rexmit_failed.value.ui64 = 0;
+	stats->sctp_send_init_failed.value.ui64 = 0;
+	stats->sctp_send_cookie_failed.value.ui64 = 0;
+	stats->sctp_send_cookie_ack_failed.value.ui64 = 0;
+	stats->sctp_send_err_failed.value.ui64 = 0;
+	stats->sctp_send_sack_failed.value.ui64 = 0;
+	stats->sctp_send_shutdown_failed.value.ui64 = 0;
+	stats->sctp_send_shutdown_ack_failed.value.ui64 = 0;
+	stats->sctp_send_shutdown_comp_failed.value.ui64 = 0;
+	stats->sctp_send_user_abort_failed.value.ui64 = 0;
+	stats->sctp_send_asconf_failed.value.ui64 = 0;
+	stats->sctp_send_asconf_ack_failed.value.ui64 = 0;
+	stats->sctp_send_ftsn_failed.value.ui64 = 0;
+	stats->sctp_send_hb_failed.value.ui64 = 0;
+	stats->sctp_return_hb_failed.value.ui64 = 0;
+	stats->sctp_ss_rexmit_failed.value.ui64 = 0;
+	stats->sctp_cl_connect.value.ui64 = 0;
+	stats->sctp_cl_assoc_change.value.ui64 = 0;
+	stats->sctp_cl_check_addrs.value.ui64 = 0;
+	stats->sctp_reclaim_cnt.value.ui64 = 0;
+	stats->sctp_listen_cnt_drop.value.ui64 = 0;
+}
+
+/*
+ * To add counters from the per CPU sctp_kstat_counter_t to the stack
+ * sctp_kstat_t.
+ */
+static void
+sctp_add_kstats2(sctp_kstat_counter_t *from, sctp_kstat_t *to)
+{
+	to->sctp_add_faddr.value.ui64 += from->sctp_add_faddr;
+	to->sctp_add_timer.value.ui64 += from->sctp_add_timer;
+	to->sctp_conn_create.value.ui64 += from->sctp_conn_create;
+	to->sctp_find_next_tq.value.ui64 += from->sctp_find_next_tq;
+	to->sctp_fr_add_hdr.value.ui64 += from->sctp_fr_add_hdr;
+	to->sctp_fr_not_found.value.ui64 += from->sctp_fr_not_found;
+	to->sctp_output_failed.value.ui64 += from->sctp_output_failed;
+	to->sctp_rexmit_failed.value.ui64 += from->sctp_rexmit_failed;
+	to->sctp_send_init_failed.value.ui64 += from->sctp_send_init_failed;
+	to->sctp_send_cookie_failed.value.ui64 += from->sctp_send_cookie_failed;
+	to->sctp_send_cookie_ack_failed.value.ui64 +=
+	    from->sctp_send_cookie_ack_failed;
+	to->sctp_send_err_failed.value.ui64 += from->sctp_send_err_failed;
+	to->sctp_send_sack_failed.value.ui64 += from->sctp_send_sack_failed;
+	to->sctp_send_shutdown_failed.value.ui64 +=
+	    from->sctp_send_shutdown_failed;
+	to->sctp_send_shutdown_ack_failed.value.ui64 +=
+	    from->sctp_send_shutdown_ack_failed;
+	to->sctp_send_shutdown_comp_failed.value.ui64 +=
+	    from->sctp_send_shutdown_comp_failed;
+	to->sctp_send_user_abort_failed.value.ui64 +=
+	    from->sctp_send_user_abort_failed;
+	to->sctp_send_asconf_failed.value.ui64 += from->sctp_send_asconf_failed;
+	to->sctp_send_asconf_ack_failed.value.ui64 +=
+	    from->sctp_send_asconf_ack_failed;
+	to->sctp_send_ftsn_failed.value.ui64 += from->sctp_send_ftsn_failed;
+	to->sctp_send_hb_failed.value.ui64 += from->sctp_send_hb_failed;
+	to->sctp_return_hb_failed.value.ui64 += from->sctp_return_hb_failed;
+	to->sctp_ss_rexmit_failed.value.ui64 += from->sctp_ss_rexmit_failed;
+	to->sctp_cl_connect.value.ui64 += from->sctp_cl_connect;
+	to->sctp_cl_assoc_change.value.ui64 += from->sctp_cl_assoc_change;
+	to->sctp_cl_check_addrs.value.ui64 += from->sctp_cl_check_addrs;
+}
+
+/*
+ * Sum up all per CPU tcp_stat_t kstat counters.
+ */
+static int
+sctp_kstat2_update(kstat_t *kp, int rw)
+{
+	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
+	netstack_t	*ns;
+	sctp_stack_t	*sctps;
+	sctp_kstat_t	*stats;
+	int		i;
+	int		cnt;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	ns = netstack_find_by_stackid(stackid);
+	if (ns == NULL)
+		return (-1);
+	sctps = ns->netstack_sctp;
+	if (sctps == NULL) {
+		netstack_rele(ns);
+		return (-1);
+	}
+
+	stats = (sctp_kstat_t *)kp->ks_data;
+	sctp_clr_kstats2(stats);
+
+	/*
+	 * sctps_sc_cnt may change in the middle of the loop.  It is better
+	 * to get its value first.
+	 */
+	cnt = sctps->sctps_sc_cnt;
+	for (i = 0; i < cnt; i++)
+		sctp_add_kstats2(&sctps->sctps_sc[i]->sctp_sc_stats, stats);
+
+	netstack_rele(ns);
+	return (0);
+}
+
+/*
  * The following kstats are for debugging purposes.  They keep
  * track of problems which should not happen normally.  But in
  * those cases which they do happen, these kstats would be handy
@@ -325,7 +448,7 @@
  * to be consumed by customers.
  */
 void *
-sctp_kstat2_init(netstackid_t stackid, sctp_kstat_t *sctps_statisticsp)
+sctp_kstat2_init(netstackid_t stackid)
 {
 	kstat_t *ksp;
 
@@ -356,18 +479,19 @@
 		{ "sctp_cl_connect",			KSTAT_DATA_UINT64 },
 		{ "sctp_cl_assoc_change",		KSTAT_DATA_UINT64 },
 		{ "sctp_cl_check_addrs",		KSTAT_DATA_UINT64 },
+		{ "sctp_reclaim_drop",			KSTAT_DATA_UINT64 },
+		{ "sctp_listen_cnt_drop",		KSTAT_DATA_UINT64 },
 	};
 
 	ksp = kstat_create_netstack(SCTP_MOD_NAME, 0, "sctpstat", "net",
-	    KSTAT_TYPE_NAMED, NUM_OF_FIELDS(template), KSTAT_FLAG_VIRTUAL,
-	    stackid);
+	    KSTAT_TYPE_NAMED, NUM_OF_FIELDS(template), 0, stackid);
 
 	if (ksp == NULL)
 		return (NULL);
 
-	bcopy(&template, sctps_statisticsp, sizeof (template));
-	ksp->ks_data = (void *)sctps_statisticsp;
+	bcopy(&template, ksp->ks_data, sizeof (template));
 	ksp->ks_private = (void *)(uintptr_t)stackid;
+	ksp->ks_update = sctp_kstat2_update;
 
 	kstat_install(ksp);
 	return (ksp);
@@ -427,6 +551,7 @@
 	conn_t			*connp;
 	boolean_t		needattr;
 	int			idx;
+	mib2_sctp_t		sctp_mib;
 
 	/*
 	 * Make copies of the original message.
@@ -456,19 +581,13 @@
 	mp_rem_data = mp_rem_ctl->b_cont;
 	mp_attr_data = mp_attr_ctl->b_cont;
 
+	bzero(&sctp_mib, sizeof (sctp_mib));
+
 	/* hostname address parameters are not supported in Solaris */
 	sce.sctpAssocRemHostName.o_length = 0;
 	sce.sctpAssocRemHostName.o_bytes[0] = 0;
 
 	/* build table of connections -- need count in fixed part */
-	SET_MIB(sctps->sctps_mib.sctpRtoAlgorithm, MIB2_SCTP_RTOALGO_VANJ);
-	SET_MIB(sctps->sctps_mib.sctpRtoMin, sctps->sctps_rto_ming);
-	SET_MIB(sctps->sctps_mib.sctpRtoMax, sctps->sctps_rto_maxg);
-	SET_MIB(sctps->sctps_mib.sctpRtoInitial, sctps->sctps_rto_initialg);
-	SET_MIB(sctps->sctps_mib.sctpMaxAssocs, -1);
-	SET_MIB(sctps->sctps_mib.sctpValCookieLife, sctps->sctps_cookie_life);
-	SET_MIB(sctps->sctps_mib.sctpMaxInitRetr, sctps->sctps_max_init_retr);
-	SET_MIB(sctps->sctps_mib.sctpCurrEstab, 0);
 
 	idx = 0;
 	mutex_enter(&sctps->sctps_g_lock);
@@ -490,54 +609,51 @@
 		if (sctp->sctp_state == SCTPS_ESTABLISHED ||
 		    sctp->sctp_state == SCTPS_SHUTDOWN_PENDING ||
 		    sctp->sctp_state == SCTPS_SHUTDOWN_RECEIVED) {
-			BUMP_MIB(&sctps->sctps_mib, sctpCurrEstab);
+			/*
+			 * Just bump the local sctp_mib.  The number of
+			 * existing associations is not kept in kernel.
+			 */
+			BUMP_MIB(&sctp_mib, sctpCurrEstab);
 		}
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpOutSCTPPkts, sctp->sctp_opkts);
+		SCTPS_UPDATE_MIB(sctps, sctpOutSCTPPkts, sctp->sctp_opkts);
 		sctp->sctp_opkts = 0;
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpOutCtrlChunks, sctp->sctp_obchunks);
+		SCTPS_UPDATE_MIB(sctps, sctpOutCtrlChunks, sctp->sctp_obchunks);
 		UPDATE_LOCAL(sctp->sctp_cum_obchunks,
 		    sctp->sctp_obchunks);
 		sctp->sctp_obchunks = 0;
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpOutOrderChunks, sctp->sctp_odchunks);
+		SCTPS_UPDATE_MIB(sctps, sctpOutOrderChunks,
+		    sctp->sctp_odchunks);
 		UPDATE_LOCAL(sctp->sctp_cum_odchunks,
 		    sctp->sctp_odchunks);
 		sctp->sctp_odchunks = 0;
-		UPDATE_MIB(&sctps->sctps_mib, sctpOutUnorderChunks,
+		SCTPS_UPDATE_MIB(sctps, sctpOutUnorderChunks,
 		    sctp->sctp_oudchunks);
 		UPDATE_LOCAL(sctp->sctp_cum_oudchunks,
 		    sctp->sctp_oudchunks);
 		sctp->sctp_oudchunks = 0;
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpRetransChunks, sctp->sctp_rxtchunks);
+		SCTPS_UPDATE_MIB(sctps, sctpRetransChunks,
+		    sctp->sctp_rxtchunks);
 		UPDATE_LOCAL(sctp->sctp_cum_rxtchunks,
 		    sctp->sctp_rxtchunks);
 		sctp->sctp_rxtchunks = 0;
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpInSCTPPkts, sctp->sctp_ipkts);
+		SCTPS_UPDATE_MIB(sctps, sctpInSCTPPkts, sctp->sctp_ipkts);
 		sctp->sctp_ipkts = 0;
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpInCtrlChunks, sctp->sctp_ibchunks);
+		SCTPS_UPDATE_MIB(sctps, sctpInCtrlChunks, sctp->sctp_ibchunks);
 		UPDATE_LOCAL(sctp->sctp_cum_ibchunks,
 		    sctp->sctp_ibchunks);
 		sctp->sctp_ibchunks = 0;
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpInOrderChunks, sctp->sctp_idchunks);
+		SCTPS_UPDATE_MIB(sctps, sctpInOrderChunks, sctp->sctp_idchunks);
 		UPDATE_LOCAL(sctp->sctp_cum_idchunks,
 		    sctp->sctp_idchunks);
 		sctp->sctp_idchunks = 0;
-		UPDATE_MIB(&sctps->sctps_mib, sctpInUnorderChunks,
+		SCTPS_UPDATE_MIB(sctps, sctpInUnorderChunks,
 		    sctp->sctp_iudchunks);
 		UPDATE_LOCAL(sctp->sctp_cum_iudchunks,
 		    sctp->sctp_iudchunks);
 		sctp->sctp_iudchunks = 0;
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpFragUsrMsgs, sctp->sctp_fragdmsgs);
+		SCTPS_UPDATE_MIB(sctps, sctpFragUsrMsgs, sctp->sctp_fragdmsgs);
 		sctp->sctp_fragdmsgs = 0;
-		UPDATE_MIB(&sctps->sctps_mib,
-		    sctpReasmUsrMsgs, sctp->sctp_reassmsgs);
+		SCTPS_UPDATE_MIB(sctps, sctpReasmUsrMsgs, sctp->sctp_reassmsgs);
 		sctp->sctp_reassmsgs = 0;
 
 		sce.sctpAssocId = ntohl(sctp->sctp_lvtag);
@@ -700,15 +816,12 @@
 	if (sctp_prev != NULL)
 		SCTP_REFRELE(sctp_prev);
 
-	/* fixed length structure for IPv4 and IPv6 counters */
-	SET_MIB(sctps->sctps_mib.sctpEntrySize, sizeof (sce));
-	SET_MIB(sctps->sctps_mib.sctpLocalEntrySize, sizeof (scle));
-	SET_MIB(sctps->sctps_mib.sctpRemoteEntrySize, sizeof (scre));
+	sctp_sum_mib(sctps, &sctp_mib);
+
 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
 	optp->level = MIB2_SCTP;
 	optp->name = 0;
-	(void) snmp_append_data(mpdata, (char *)&sctps->sctps_mib,
-	    sizeof (sctps->sctps_mib));
+	(void) snmp_append_data(mpdata, (char *)&sctp_mib, sizeof (sctp_mib));
 	optp->len = msgdsize(mpdata);
 	qreply(q, mpctl);
 
@@ -781,3 +894,76 @@
 		return (0);
 	}
 }
+
+/*
+ * To sum up all MIB2 stats for a sctp_stack_t from all per CPU stats.  The
+ * caller should initialize the target mib2_sctp_t properly as this function
+ * just adds up all the per CPU stats.
+ */
+static void
+sctp_sum_mib(sctp_stack_t *sctps, mib2_sctp_t *sctp_mib)
+{
+	int i;
+	int cnt;
+
+	/* Static componets of mib2_sctp_t. */
+	SET_MIB(sctp_mib->sctpRtoAlgorithm, MIB2_SCTP_RTOALGO_VANJ);
+	SET_MIB(sctp_mib->sctpRtoMin, sctps->sctps_rto_ming);
+	SET_MIB(sctp_mib->sctpRtoMax, sctps->sctps_rto_maxg);
+	SET_MIB(sctp_mib->sctpRtoInitial, sctps->sctps_rto_initialg);
+	SET_MIB(sctp_mib->sctpMaxAssocs, -1);
+	SET_MIB(sctp_mib->sctpValCookieLife, sctps->sctps_cookie_life);
+	SET_MIB(sctp_mib->sctpMaxInitRetr, sctps->sctps_max_init_retr);
+
+	/* fixed length structure for IPv4 and IPv6 counters */
+	SET_MIB(sctp_mib->sctpEntrySize, sizeof (mib2_sctpConnEntry_t));
+	SET_MIB(sctp_mib->sctpLocalEntrySize,
+	    sizeof (mib2_sctpConnLocalEntry_t));
+	SET_MIB(sctp_mib->sctpRemoteEntrySize,
+	    sizeof (mib2_sctpConnRemoteEntry_t));
+
+	/*
+	 * sctps_sc_cnt may change in the middle of the loop.  It is better
+	 * to get its value first.
+	 */
+	cnt = sctps->sctps_sc_cnt;
+	for (i = 0; i < cnt; i++)
+		sctp_add_mib(&sctps->sctps_sc[i]->sctp_sc_mib, sctp_mib);
+}
+
+static void
+sctp_add_mib(mib2_sctp_t *from, mib2_sctp_t *to)
+{
+	to->sctpActiveEstab += from->sctpActiveEstab;
+	to->sctpPassiveEstab += from->sctpPassiveEstab;
+	to->sctpAborted += from->sctpAborted;
+	to->sctpShutdowns += from->sctpShutdowns;
+	to->sctpOutOfBlue += from->sctpOutOfBlue;
+	to->sctpChecksumError += from->sctpChecksumError;
+	to->sctpOutCtrlChunks += from->sctpOutCtrlChunks;
+	to->sctpOutOrderChunks += from->sctpOutOrderChunks;
+	to->sctpOutUnorderChunks += from->sctpOutUnorderChunks;
+	to->sctpRetransChunks += from->sctpRetransChunks;
+	to->sctpOutAck += from->sctpOutAck;
+	to->sctpOutAckDelayed += from->sctpOutAckDelayed;
+	to->sctpOutWinUpdate += from->sctpOutWinUpdate;
+	to->sctpOutFastRetrans += from->sctpOutFastRetrans;
+	to->sctpOutWinProbe += from->sctpOutWinProbe;
+	to->sctpInCtrlChunks += from->sctpInCtrlChunks;
+	to->sctpInOrderChunks += from->sctpInOrderChunks;
+	to->sctpInUnorderChunks += from->sctpInUnorderChunks;
+	to->sctpInAck += from->sctpInAck;
+	to->sctpInDupAck += from->sctpInDupAck;
+	to->sctpInAckUnsent += from->sctpInAckUnsent;
+	to->sctpFragUsrMsgs += from->sctpFragUsrMsgs;
+	to->sctpReasmUsrMsgs += from->sctpReasmUsrMsgs;
+	to->sctpOutSCTPPkts += from->sctpOutSCTPPkts;
+	to->sctpInSCTPPkts += from->sctpInSCTPPkts;
+	to->sctpInInvalidCookie += from->sctpInInvalidCookie;
+	to->sctpTimRetrans += from->sctpTimRetrans;
+	to->sctpTimRetransDrop += from->sctpTimRetransDrop;
+	to->sctpTimHeartBeatProbe += from->sctpTimHeartBeatProbe;
+	to->sctpTimHeartBeatDrop += from->sctpTimHeartBeatDrop;
+	to->sctpListenDrop += from->sctpListenDrop;
+	to->sctpInClosed += from->sctpInClosed;
+}
--- a/usr/src/uts/common/inet/sctp/sctp_stack.h	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_stack.h	Mon Jul 19 17:27:45 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_INET_SCTP_SCTP_STACK_H
@@ -62,9 +61,60 @@
 	kstat_named_t	sctp_cl_connect;
 	kstat_named_t	sctp_cl_assoc_change;
 	kstat_named_t	sctp_cl_check_addrs;
+	kstat_named_t	sctp_reclaim_cnt;
+	kstat_named_t	sctp_listen_cnt_drop;
 } sctp_kstat_t;
 
-#define	SCTP_KSTAT(sctps, x)	((sctps)->sctps_statistics.x.value.ui64++)
+/*
+ * This struct contains only the counter part of sctp_kstat_t.  It is used
+ * in sctp_stats_cpu_t instead of sctp_kstat_t to save memory space.
+ */
+typedef struct sctp_kstat_counter_s {
+	uint64_t	sctp_add_faddr;
+	uint64_t	sctp_add_timer;
+	uint64_t	sctp_conn_create;
+	uint64_t	sctp_find_next_tq;
+	uint64_t	sctp_fr_add_hdr;
+	uint64_t	sctp_fr_not_found;
+	uint64_t	sctp_output_failed;
+	uint64_t	sctp_rexmit_failed;
+	uint64_t	sctp_send_init_failed;
+	uint64_t	sctp_send_cookie_failed;
+	uint64_t	sctp_send_cookie_ack_failed;
+	uint64_t	sctp_send_err_failed;
+	uint64_t	sctp_send_sack_failed;
+	uint64_t	sctp_send_shutdown_failed;
+	uint64_t	sctp_send_shutdown_ack_failed;
+	uint64_t	sctp_send_shutdown_comp_failed;
+	uint64_t	sctp_send_user_abort_failed;
+	uint64_t	sctp_send_asconf_failed;
+	uint64_t	sctp_send_asconf_ack_failed;
+	uint64_t	sctp_send_ftsn_failed;
+	uint64_t	sctp_send_hb_failed;
+	uint64_t	sctp_return_hb_failed;
+	uint64_t	sctp_ss_rexmit_failed;
+	uint64_t	sctp_cl_connect;
+	uint64_t	sctp_cl_assoc_change;
+	uint64_t	sctp_cl_check_addrs;
+	uint64_t	sctp_reclaim_cnt;
+	uint64_t	sctp_listen_cnt_drop;
+} sctp_kstat_counter_t;
+
+/* Per CPU SCTP statistics counters. */
+typedef struct {
+	int64_t			sctp_sc_assoc_cnt;
+	mib2_sctp_t		sctp_sc_mib;
+	sctp_kstat_counter_t	sctp_sc_stats;
+} sctp_stats_cpu_t;
+
+#define	SCTP_KSTAT(sctps, x)		\
+	((sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_stats.x++)
+
+#define	SCTPS_BUMP_MIB(sctps, x)	\
+	BUMP_MIB(&(sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_mib, x)
+
+#define	SCTPS_UPDATE_MIB(sctps, x, y)	\
+	UPDATE_MIB(&(sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_mib, x, y)
 
 /*
  * SCTP stack instances
@@ -72,8 +122,6 @@
 struct sctp_stack {
 	netstack_t	*sctps_netstack;	/* Common netstack */
 
-	mib2_sctp_t		sctps_mib;
-
 	/* Protected by sctps_g_lock */
 	struct list	sctps_g_list;	/* SCTP instance data chain */
 	kmutex_t	sctps_g_lock;
@@ -94,7 +142,10 @@
 	/* holds sctp tunables */
 	struct mod_prop_info_s	*sctps_propinfo_tbl;
 
-/* This lock protects the SCTP recvq_tq_list array and recvq_tq_list_cur_sz. */
+	/*
+	 * This lock protects the SCTP recvq_tq_list array and
+	 * recvq_tq_list_cur_sz.
+	 */
 	kmutex_t		sctps_rq_tq_lock;
 	int			sctps_recvq_tq_list_max_sz;
 	taskq_t			**sctps_recvq_tq_list;
@@ -113,11 +164,31 @@
 	uint32_t		sctps_g_ipifs_count;
 	krwlock_t		sctps_g_ipifs_lock;
 
-	/* kstat exporting sctp_mib data */
+	/* kstat exporting mib2_sctp_t and sctp_kstat_t data */
 	kstat_t			*sctps_mibkp;
 	kstat_t			*sctps_kstat;
-	sctp_kstat_t		sctps_statistics;
+
+	/* Variables for handling kmem reclaim call back. */
+	kmutex_t	sctps_reclaim_lock;
+	boolean_t	sctps_reclaim;
+	timeout_id_t	sctps_reclaim_tid;
+	uint32_t	sctps_reclaim_period;
+
+	/* Listener association limit configuration. */
+	kmutex_t	sctps_listener_conf_lock;
+	list_t		sctps_listener_conf;
+
+	/*
+	 * Per CPU stats
+	 *
+	 * sctps_sc: array of pointer to per CPU stats.  The i-th element in
+	 *   the array represents the stats of the CPU with cpu_seqid.
+	 * sctps_sc_cnt: number of CPU stats in the sctps_sc array.
+	 */
+	sctp_stats_cpu_t	**sctps_sc;
+	int			sctps_sc_cnt;
 };
+
 typedef struct sctp_stack sctp_stack_t;
 
 #ifdef	__cplusplus
--- a/usr/src/uts/common/inet/sctp/sctp_timer.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_timer.c	Mon Jul 19 17:27:45 2010 -0700
@@ -57,6 +57,16 @@
 	clock_t			sctp_tb_time_left;
 } sctp_tb_t;
 
+/*
+ * Early abort threshold when the system is under pressure, sctps_reclaim
+ * is on.
+ *
+ * sctp_pa_early_abort: number of strikes per association before abort
+ * sctp_pp_early_abort: number of strikes per peer address before abort
+ */
+uint32_t sctp_pa_early_abort = 5;
+uint32_t sctp_pp_early_abort = 3;
+
 static void sctp_timer_fire(sctp_tb_t *);
 
 /*
@@ -371,7 +381,7 @@
 
 	sctp->sctp_ack_timer_running = 0;
 	sctp->sctp_sack_toggle = sctps->sctps_deferred_acks_max;
-	BUMP_MIB(&sctps->sctps_mib, sctpOutAckDelayed);
+	SCTPS_BUMP_MIB(sctps, sctpOutAckDelayed);
 	(void) sctp_sack(sctp, NULL);
 }
 
@@ -386,21 +396,21 @@
 	int64_t		earliest_expiry;
 	int		cnt;
 	sctp_stack_t	*sctps = sctp->sctp_sctps;
+	int		pp_max_retr;
 
 	if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) {
 		/*
-		 * If there is a peer address with no strikes,
-		 * don't give up yet. If enough other peer
-		 * address are down, we could otherwise fail
-		 * the association prematurely.  This is a
-		 * byproduct of our aggressive probe approach
-		 * when a heartbeat fails to connect. We may
-		 * wish to revisit this...
+		 * If there is a peer address with no strikes, don't give up
+		 * yet unless we are under memory pressure.  If enough other
+		 * peer  address are down, we could otherwise fail the
+		 * association prematurely.  This is a byproduct of our
+		 * aggressive probe approach when a heartbeat fails to
+		 * connect. We may wish to revisit this...
 		 */
-		if (!sctp_is_a_faddr_clean(sctp)) {
+		if (sctps->sctps_reclaim || !sctp_is_a_faddr_clean(sctp)) {
 			/* time to give up */
-			BUMP_MIB(&sctps->sctps_mib, sctpAborted);
-			BUMP_MIB(&sctps->sctps_mib, sctpTimHeartBeatDrop);
+			SCTPS_BUMP_MIB(sctps, sctpAborted);
+			SCTPS_BUMP_MIB(sctps, sctpTimHeartBeatDrop);
 			sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL);
 			sctp_clean_death(sctp, sctp->sctp_client_errno ?
 			    sctp->sctp_client_errno : ETIMEDOUT);
@@ -424,6 +434,11 @@
 	 * be OK.
 	 */
 	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
+		if (sctps->sctps_reclaim)
+			pp_max_retr = MIN(sctp_pp_early_abort, fp->max_retr);
+		else
+			pp_max_retr = fp->max_retr;
+
 		/*
 		 * If the peer is unreachable because there is no available
 		 * source address, call sctp_get_dest() to see if it is
@@ -438,7 +453,7 @@
 			sctp_get_dest(sctp, fp);
 			if (fp->state == SCTP_FADDRS_UNREACH) {
 				if (fp->hb_enabled &&
-				    ++fp->strikes > fp->max_retr &&
+				    ++fp->strikes > pp_max_retr &&
 				    sctp_faddr_dead(sctp, fp,
 				    SCTP_FADDRS_DOWN) == -1) {
 					/* Assoc is dead */
@@ -489,7 +504,7 @@
 					 */
 					fp->rtt_updates = 0;
 					fp->strikes++;
-					if (fp->strikes > fp->max_retr) {
+					if (fp->strikes > pp_max_retr) {
 						if (sctp_faddr_dead(sctp, fp,
 						    SCTP_FADDRS_DOWN) == -1) {
 							/* Assoc is dead */
@@ -570,6 +585,7 @@
 	mblk_t 		*mp;
 	uint32_t	rto_max = sctp->sctp_rto_max;
 	sctp_stack_t	*sctps = sctp->sctp_sctps;
+	int		pp_max_retr, pa_max_retr;
 
 	ASSERT(fp != NULL);
 
@@ -578,22 +594,31 @@
 
 	fp->timer_running = 0;
 
+	if (!sctps->sctps_reclaim) {
+		pp_max_retr = fp->max_retr;
+		pa_max_retr = sctp->sctp_pa_max_rxt;
+	} else {
+		/* App may have set a very aggressive retransmission limit. */
+		pp_max_retr = MIN(sctp_pp_early_abort, fp->max_retr);
+		pa_max_retr = MIN(sctp_pa_early_abort, sctp->sctp_pa_max_rxt);
+	}
+
 	/* Check is we've reached the max for retries */
 	if (sctp->sctp_state < SCTPS_ESTABLISHED) {
 		if (fp->strikes >= sctp->sctp_max_init_rxt) {
 			/* time to give up */
-			BUMP_MIB(&sctps->sctps_mib, sctpAborted);
-			BUMP_MIB(&sctps->sctps_mib, sctpTimRetransDrop);
+			SCTPS_BUMP_MIB(sctps, sctpAborted);
+			SCTPS_BUMP_MIB(sctps, sctpTimRetransDrop);
 			sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL);
 			sctp_clean_death(sctp, sctp->sctp_client_errno ?
 			    sctp->sctp_client_errno : ETIMEDOUT);
 			return;
 		}
 	} else if (sctp->sctp_state >= SCTPS_ESTABLISHED) {
-		if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) {
+		if (sctp->sctp_strikes >= pa_max_retr) {
 			/* time to give up */
-			BUMP_MIB(&sctps->sctps_mib, sctpAborted);
-			BUMP_MIB(&sctps->sctps_mib, sctpTimRetransDrop);
+			SCTPS_BUMP_MIB(sctps, sctpAborted);
+			SCTPS_BUMP_MIB(sctps, sctpTimRetransDrop);
 			sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL);
 			sctp_clean_death(sctp, sctp->sctp_client_errno ?
 			    sctp->sctp_client_errno : ETIMEDOUT);
@@ -601,7 +626,7 @@
 		}
 	}
 
-	if (fp->strikes >= fp->max_retr) {
+	if (fp->strikes >= pp_max_retr) {
 		if (sctp_faddr_dead(sctp, fp, SCTP_FADDRS_DOWN) == -1) {
 			return;
 		}
@@ -624,7 +649,7 @@
 			return;
 		}
 
-		BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans);
+		SCTPS_BUMP_MIB(sctps, sctpTimRetrans);
 
 		sctp_rexmit(sctp, fp);
 		/*
@@ -643,7 +668,7 @@
 		 */
 		mp = sctp_init_mp(sctp, fp);
 		if (mp != NULL) {
-			BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans);
+			SCTPS_BUMP_MIB(sctps, sctpTimRetrans);
 			(void) conn_ip_output(mp, fp->ixa);
 			BUMP_LOCAL(sctp->sctp_opkts);
 		}
@@ -660,13 +685,13 @@
 			break;
 		(void) conn_ip_output(mp, fp->ixa);
 		BUMP_LOCAL(sctp->sctp_opkts);
-		BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans);
+		SCTPS_BUMP_MIB(sctps, sctpTimRetrans);
 		rto_max = sctp->sctp_rto_max_init;
 		break;
 	case SCTPS_SHUTDOWN_SENT:
 		BUMP_LOCAL(sctp->sctp_T2expire);
 		sctp_send_shutdown(sctp, 1);
-		BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans);
+		SCTPS_BUMP_MIB(sctps, sctpTimRetrans);
 		break;
 	case SCTPS_SHUTDOWN_ACK_SENT:
 		/* We shouldn't have any more outstanding data */
@@ -676,7 +701,7 @@
 		BUMP_LOCAL(sctp->sctp_T2expire);
 		(void) sctp_shutdown_received(sctp, NULL, B_FALSE, B_TRUE,
 		    NULL);
-		BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans);
+		SCTPS_BUMP_MIB(sctps, sctpTimRetrans);
 		break;
 	default:
 		ASSERT(0);
--- a/usr/src/uts/common/inet/sctp/sctp_tunables.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/sctp/sctp_tunables.c	Mon Jul 19 17:27:45 2010 -0700
@@ -18,9 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <inet/ip.h>
@@ -38,6 +38,132 @@
 #define	SCTP_MSS_MAX	SCTP_MSS_MAX_IPV4
 
 /*
+ * returns the current list of listener limit configuration.
+ */
+/* ARGSUSED */
+static int
+sctp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname,
+    void *val, uint_t psize, uint_t flags)
+{
+	sctp_stack_t	*sctps = (sctp_stack_t *)cbarg;
+	sctp_listener_t	*sl;
+	char		*pval = val;
+	size_t		nbytes = 0, tbytes = 0;
+	uint_t		size;
+	int		err = 0;
+
+	bzero(pval, psize);
+	size = psize;
+
+	if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
+		return (0);
+
+	mutex_enter(&sctps->sctps_listener_conf_lock);
+	for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL;
+	    sl = list_next(&sctps->sctps_listener_conf, sl)) {
+		if (psize == size)
+			nbytes = snprintf(pval, size, "%d:%d",  sl->sl_port,
+			    sl->sl_ratio);
+		else
+			nbytes = snprintf(pval, size, ",%d:%d",  sl->sl_port,
+			    sl->sl_ratio);
+		size -= nbytes;
+		pval += nbytes;
+		tbytes += nbytes;
+		if (tbytes >= psize) {
+			/* Buffer overflow, stop copying information */
+			err = ENOBUFS;
+			break;
+		}
+	}
+
+	mutex_exit(&sctps->sctps_listener_conf_lock);
+	return (err);
+}
+
+/*
+ * add a new listener limit configuration.
+ */
+/* ARGSUSED */
+static int
+sctp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
+    const char *ifname, const void* pval, uint_t flags)
+{
+	sctp_listener_t	*new_sl;
+	sctp_listener_t	*sl;
+	long		lport;
+	long		ratio;
+	char		*colon;
+	sctp_stack_t	*sctps = (sctp_stack_t *)cbarg;
+
+	if (flags & MOD_PROP_DEFAULT)
+		return (ENOTSUP);
+
+	if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
+	    lport > USHRT_MAX || *colon != ':') {
+		return (EINVAL);
+	}
+	if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
+		return (EINVAL);
+
+	mutex_enter(&sctps->sctps_listener_conf_lock);
+	for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL;
+	    sl = list_next(&sctps->sctps_listener_conf, sl)) {
+		/* There is an existing entry, so update its ratio value. */
+		if (sl->sl_port == lport) {
+			sl->sl_ratio = ratio;
+			mutex_exit(&sctps->sctps_listener_conf_lock);
+			return (0);
+		}
+	}
+
+	if ((new_sl = kmem_alloc(sizeof (sctp_listener_t), KM_NOSLEEP)) ==
+	    NULL) {
+		mutex_exit(&sctps->sctps_listener_conf_lock);
+		return (ENOMEM);
+	}
+
+	new_sl->sl_port = lport;
+	new_sl->sl_ratio = ratio;
+	list_insert_tail(&sctps->sctps_listener_conf, new_sl);
+	mutex_exit(&sctps->sctps_listener_conf_lock);
+	return (0);
+}
+
+/*
+ * remove a listener limit configuration.
+ */
+/* ARGSUSED */
+static int
+sctp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
+    const char *ifname, const void* pval, uint_t flags)
+{
+	sctp_listener_t	*sl;
+	long		lport;
+	sctp_stack_t	*sctps = (sctp_stack_t *)cbarg;
+
+	if (flags & MOD_PROP_DEFAULT)
+		return (ENOTSUP);
+
+	if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
+	    lport > USHRT_MAX) {
+		return (EINVAL);
+	}
+	mutex_enter(&sctps->sctps_listener_conf_lock);
+	for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL;
+	    sl = list_next(&sctps->sctps_listener_conf, sl)) {
+		if (sl->sl_port == lport) {
+			list_remove(&sctps->sctps_listener_conf, sl);
+			mutex_exit(&sctps->sctps_listener_conf_lock);
+			kmem_free(sl, sizeof (sctp_listener_t));
+			return (0);
+		}
+	}
+	mutex_exit(&sctps->sctps_listener_conf_lock);
+	return (ESRCH);
+}
+
+/*
  * All of these are alterable, within the min/max values given, at run time.
  *
  * Note: All those tunables which do not start with "sctp_" are Committed and
@@ -210,6 +336,15 @@
 	    mod_set_extra_privports, mod_get_extra_privports,
 	    {1, ULP_MAX_PORT, 0}, {0} },
 
+	{ "sctp_listener_limit_conf", MOD_PROTO_SCTP,
+	    NULL, sctp_listener_conf_get, {0}, {0} },
+
+	{ "sctp_listener_limit_conf_add", MOD_PROTO_SCTP,
+	    sctp_listener_conf_add, NULL, {0}, {0} },
+
+	{ "sctp_listener_limit_conf_del", MOD_PROTO_SCTP,
+	    sctp_listener_conf_del, NULL, {0}, {0} },
+
 	{ "?", MOD_PROTO_SCTP, NULL, mod_get_allprop, {0}, {0} },
 
 	{ NULL, 0, NULL, NULL, {0}, {0} }
--- a/usr/src/uts/common/inet/tcp/tcp.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tcp/tcp.c	Mon Jul 19 17:27:45 2010 -0700
@@ -250,7 +250,10 @@
 		((uint_t)(accid) & (TCP_ACCEPTOR_FANOUT_SIZE - 1))
 #endif	/* _ILP32 */
 
-/* Minimum number of connections per listener. */
+/*
+ * Minimum number of connections which can be created per listener.  Used
+ * when the listener connection count is in effect.
+ */
 static uint32_t tcp_min_conn_listener = 2;
 
 uint32_t tcp_early_abort = 30;
@@ -400,8 +403,6 @@
 extern mod_prop_info_t tcp_propinfo_tbl[];
 extern int tcp_propinfo_count;
 
-#define	MB	(1024 * 1024)
-
 #define	IS_VMLOANED_MBLK(mp) \
 	(((mp)->b_datap->db_struioflag & STRUIO_ZC) != 0)
 
@@ -3700,10 +3701,6 @@
 	 * set of tcp_stack_t's.
 	 */
 	netstack_register(NS_TCP, tcp_stack_init, NULL, tcp_stack_fini);
-
-	mutex_enter(&cpu_lock);
-	register_cpu_setup_func(tcp_cpu_update, NULL);
-	mutex_exit(&cpu_lock);
 }
 
 
@@ -3804,7 +3801,9 @@
 	 * are not freed until the stack is going away.  So there is no need
 	 * to grab a lock to access the per CPU tcps_sc[x] pointer.
 	 */
+	mutex_enter(&cpu_lock);
 	tcps->tcps_sc_cnt = MAX(ncpus, boot_ncpus);
+	mutex_exit(&cpu_lock);
 	tcps->tcps_sc = kmem_zalloc(max_ncpus  * sizeof (tcp_stats_cpu_t *),
 	    KM_SLEEP);
 	for (i = 0; i < tcps->tcps_sc_cnt; i++) {
@@ -3825,10 +3824,6 @@
 void
 tcp_ddi_g_destroy(void)
 {
-	mutex_enter(&cpu_lock);
-	unregister_cpu_setup_func(tcp_cpu_update, NULL);
-	mutex_exit(&cpu_lock);
-
 	tcp_g_kstat_fini(tcp_g_kstat);
 	tcp_g_kstat = NULL;
 	bzero(&tcp_g_statistics, sizeof (tcp_g_statistics));
--- a/usr/src/uts/common/inet/tcp/tcp_misc.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tcp/tcp_misc.c	Mon Jul 19 17:27:45 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -649,52 +648,22 @@
 }
 
 /*
- * Call back function for CPU state change.
+ * When a CPU is added, we need to allocate the per CPU stats struct.
  */
-/* ARGSUSED */
-int
-tcp_cpu_update(cpu_setup_t what, int id, void *arg)
+void
+tcp_stack_cpu_add(tcp_stack_t *tcps, processorid_t cpu_seqid)
 {
-	cpu_t *cp;
-	netstack_handle_t nh;
-	netstack_t *ns;
-	tcp_stack_t *tcps;
 	int i;
 
-	ASSERT(MUTEX_HELD(&cpu_lock));
-	cp = cpu[id];
-
-	switch (what) {
-	case CPU_CONFIG:
-	case CPU_ON:
-	case CPU_INIT:
-	case CPU_CPUPART_IN:
-		netstack_next_init(&nh);
-		while ((ns = netstack_next(&nh)) != NULL) {
-			tcps = ns->netstack_tcp;
-			if (cp->cpu_seqid >= tcps->tcps_sc_cnt) {
-				for (i = tcps->tcps_sc_cnt; i <= cp->cpu_seqid;
-				    i++) {
-					ASSERT(tcps->tcps_sc[i] == NULL);
-					tcps->tcps_sc[i] = kmem_zalloc(
-					    sizeof (tcp_stats_cpu_t), KM_SLEEP);
-				}
-				membar_producer();
-				tcps->tcps_sc_cnt = cp->cpu_seqid + 1;
-			}
-			netstack_rele(ns);
-		}
-		netstack_next_fini(&nh);
-		break;
-	case CPU_UNCONFIG:
-	case CPU_OFF:
-	case CPU_CPUPART_OUT:
-		/* Nothing to do */
-		break;
-	default:
-		break;
+	if (cpu_seqid < tcps->tcps_sc_cnt)
+		return;
+	for (i = tcps->tcps_sc_cnt; i <= cpu_seqid; i++) {
+		ASSERT(tcps->tcps_sc[i] == NULL);
+		tcps->tcps_sc[i] = kmem_zalloc(sizeof (tcp_stats_cpu_t),
+		    KM_SLEEP);
 	}
-	return (0);
+	membar_producer();
+	tcps->tcps_sc_cnt = cpu_seqid + 1;
 }
 
 /*
--- a/usr/src/uts/common/inet/tcp/tcp_stats.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tcp/tcp_stats.c	Mon Jul 19 17:27:45 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -41,8 +40,8 @@
 static int	tcp_kstat2_update(kstat_t *kp, int rw);
 static void	tcp_sum_mib(tcp_stack_t *, mib2_tcp_t *);
 
-static void	tcp_cp_mib(mib2_tcp_t *, mib2_tcp_t *);
-static void	tcp_cp_stats(tcp_stat_t *, tcp_stat_t *);
+static void	tcp_add_mib(mib2_tcp_t *, mib2_tcp_t *);
+static void	tcp_add_stats(tcp_stat_counter_t *, tcp_stat_t *);
 static void	tcp_clr_stats(tcp_stat_t *);
 
 tcp_g_stat_t	tcp_g_statistics;
@@ -773,18 +772,18 @@
 	 */
 	cnt = tcps->tcps_sc_cnt;
 	for (i = 0; i < cnt; i++)
-		tcp_cp_stats(&tcps->tcps_sc[i]->tcp_sc_stats, stats);
+		tcp_add_stats(&tcps->tcps_sc[i]->tcp_sc_stats, stats);
 
 	netstack_rele(ns);
 	return (0);
 }
 
 /*
- * To copy stats from one mib2_tcp_t to another.  Static fields are not copied.
+ * To add stats from one mib2_tcp_t to another.  Static fields are not added.
  * The caller should set them up propertly.
  */
 void
-tcp_cp_mib(mib2_tcp_t *from, mib2_tcp_t *to)
+tcp_add_mib(mib2_tcp_t *from, mib2_tcp_t *to)
 {
 	to->tcpActiveOpens += from->tcpActiveOpens;
 	to->tcpPassiveOpens += from->tcpPassiveOpens;
@@ -855,7 +854,7 @@
 	 */
 	cnt = tcps->tcps_sc_cnt;
 	for (i = 0; i < cnt; i++)
-		tcp_cp_mib(&tcps->tcps_sc[i]->tcp_sc_mib, tcp_mib);
+		tcp_add_mib(&tcps->tcps_sc[i]->tcp_sc_mib, tcp_mib);
 
 	/* Fixed length structure for IPv4 and IPv6 counters */
 	SET_MIB(tcp_mib->tcpConnTableSize, sizeof (mib2_tcpConnEntry_t));
@@ -865,7 +864,7 @@
 /*
  * To set all tcp_stat_t counters to 0.
  */
-void
+static void
 tcp_clr_stats(tcp_stat_t *stats)
 {
 	stats->tcp_time_wait_syn_success.value.ui64 = 0;
@@ -921,106 +920,107 @@
 }
 
 /*
- * To copy counters from one tcp_stat_t to another.
+ * To add counters from the per CPU tcp_stat_counter_t to the stack
+ * tcp_stat_t.
  */
-void
-tcp_cp_stats(tcp_stat_t *from, tcp_stat_t *to)
+static void
+tcp_add_stats(tcp_stat_counter_t *from, tcp_stat_t *to)
 {
 	to->tcp_time_wait_syn_success.value.ui64 +=
-	    from->tcp_time_wait_syn_success.value.ui64;
+	    from->tcp_time_wait_syn_success;
 	to->tcp_clean_death_nondetached.value.ui64 +=
-	    from->tcp_clean_death_nondetached.value.ui64;
+	    from->tcp_clean_death_nondetached;
 	to->tcp_eager_blowoff_q.value.ui64 +=
-	    from->tcp_eager_blowoff_q.value.ui64;
+	    from->tcp_eager_blowoff_q;
 	to->tcp_eager_blowoff_q0.value.ui64 +=
-	    from->tcp_eager_blowoff_q0.value.ui64;
+	    from->tcp_eager_blowoff_q0;
 	to->tcp_no_listener.value.ui64 +=
-	    from->tcp_no_listener.value.ui64;
+	    from->tcp_no_listener;
 	to->tcp_listendrop.value.ui64 +=
-	    from->tcp_listendrop.value.ui64;
+	    from->tcp_listendrop;
 	to->tcp_listendropq0.value.ui64 +=
-	    from->tcp_listendropq0.value.ui64;
+	    from->tcp_listendropq0;
 	to->tcp_wsrv_called.value.ui64 +=
-	    from->tcp_wsrv_called.value.ui64;
+	    from->tcp_wsrv_called;
 	to->tcp_flwctl_on.value.ui64 +=
-	    from->tcp_flwctl_on.value.ui64;
+	    from->tcp_flwctl_on;
 	to->tcp_timer_fire_early.value.ui64 +=
-	    from->tcp_timer_fire_early.value.ui64;
+	    from->tcp_timer_fire_early;
 	to->tcp_timer_fire_miss.value.ui64 +=
-	    from->tcp_timer_fire_miss.value.ui64;
+	    from->tcp_timer_fire_miss;
 	to->tcp_zcopy_on.value.ui64 +=
-	    from->tcp_zcopy_on.value.ui64;
+	    from->tcp_zcopy_on;
 	to->tcp_zcopy_off.value.ui64 +=
-	    from->tcp_zcopy_off.value.ui64;
+	    from->tcp_zcopy_off;
 	to->tcp_zcopy_backoff.value.ui64 +=
-	    from->tcp_zcopy_backoff.value.ui64;
+	    from->tcp_zcopy_backoff;
 	to->tcp_fusion_flowctl.value.ui64 +=
-	    from->tcp_fusion_flowctl.value.ui64;
+	    from->tcp_fusion_flowctl;
 	to->tcp_fusion_backenabled.value.ui64 +=
-	    from->tcp_fusion_backenabled.value.ui64;
+	    from->tcp_fusion_backenabled;
 	to->tcp_fusion_urg.value.ui64 +=
-	    from->tcp_fusion_urg.value.ui64;
+	    from->tcp_fusion_urg;
 	to->tcp_fusion_putnext.value.ui64 +=
-	    from->tcp_fusion_putnext.value.ui64;
+	    from->tcp_fusion_putnext;
 	to->tcp_fusion_unfusable.value.ui64 +=
-	    from->tcp_fusion_unfusable.value.ui64;
+	    from->tcp_fusion_unfusable;
 	to->tcp_fusion_aborted.value.ui64 +=
-	    from->tcp_fusion_aborted.value.ui64;
+	    from->tcp_fusion_aborted;
 	to->tcp_fusion_unqualified.value.ui64 +=
-	    from->tcp_fusion_unqualified.value.ui64;
+	    from->tcp_fusion_unqualified;
 	to->tcp_fusion_rrw_busy.value.ui64 +=
-	    from->tcp_fusion_rrw_busy.value.ui64;
+	    from->tcp_fusion_rrw_busy;
 	to->tcp_fusion_rrw_msgcnt.value.ui64 +=
-	    from->tcp_fusion_rrw_msgcnt.value.ui64;
+	    from->tcp_fusion_rrw_msgcnt;
 	to->tcp_fusion_rrw_plugged.value.ui64 +=
-	    from->tcp_fusion_rrw_plugged.value.ui64;
+	    from->tcp_fusion_rrw_plugged;
 	to->tcp_in_ack_unsent_drop.value.ui64 +=
-	    from->tcp_in_ack_unsent_drop.value.ui64;
+	    from->tcp_in_ack_unsent_drop;
 	to->tcp_sock_fallback.value.ui64 +=
-	    from->tcp_sock_fallback.value.ui64;
+	    from->tcp_sock_fallback;
 	to->tcp_lso_enabled.value.ui64 +=
-	    from->tcp_lso_enabled.value.ui64;
+	    from->tcp_lso_enabled;
 	to->tcp_lso_disabled.value.ui64 +=
-	    from->tcp_lso_disabled.value.ui64;
+	    from->tcp_lso_disabled;
 	to->tcp_lso_times.value.ui64 +=
-	    from->tcp_lso_times.value.ui64;
+	    from->tcp_lso_times;
 	to->tcp_lso_pkt_out.value.ui64 +=
-	    from->tcp_lso_pkt_out.value.ui64;
+	    from->tcp_lso_pkt_out;
 	to->tcp_listen_cnt_drop.value.ui64 +=
-	    from->tcp_listen_cnt_drop.value.ui64;
+	    from->tcp_listen_cnt_drop;
 	to->tcp_listen_mem_drop.value.ui64 +=
-	    from->tcp_listen_mem_drop.value.ui64;
+	    from->tcp_listen_mem_drop;
 	to->tcp_zwin_mem_drop.value.ui64 +=
-	    from->tcp_zwin_mem_drop.value.ui64;
+	    from->tcp_zwin_mem_drop;
 	to->tcp_zwin_ack_syn.value.ui64 +=
-	    from->tcp_zwin_ack_syn.value.ui64;
+	    from->tcp_zwin_ack_syn;
 	to->tcp_rst_unsent.value.ui64 +=
-	    from->tcp_rst_unsent.value.ui64;
+	    from->tcp_rst_unsent;
 	to->tcp_reclaim_cnt.value.ui64 +=
-	    from->tcp_reclaim_cnt.value.ui64;
+	    from->tcp_reclaim_cnt;
 	to->tcp_reass_timeout.value.ui64 +=
-	    from->tcp_reass_timeout.value.ui64;
+	    from->tcp_reass_timeout;
 
 #ifdef TCP_DEBUG_COUNTER
 	to->tcp_time_wait.value.ui64 +=
-	    from->tcp_time_wait.value.ui64;
+	    from->tcp_time_wait;
 	to->tcp_rput_time_wait.value.ui64 +=
-	    from->tcp_rput_time_wait.value.ui64;
+	    from->tcp_rput_time_wait;
 	to->tcp_detach_time_wait.value.ui64 +=
-	    from->tcp_detach_time_wait.value.ui64;
+	    from->tcp_detach_time_wait;
 	to->tcp_timeout_calls.value.ui64 +=
-	    from->tcp_timeout_calls.value.ui64;
+	    from->tcp_timeout_calls;
 	to->tcp_timeout_cached_alloc.value.ui64 +=
-	    from->tcp_timeout_cached_alloc.value.ui64;
+	    from->tcp_timeout_cached_alloc;
 	to->tcp_timeout_cancel_reqs.value.ui64 +=
-	    from->tcp_timeout_cancel_reqs.value.ui64;
+	    from->tcp_timeout_cancel_reqs;
 	to->tcp_timeout_canceled.value.ui64 +=
-	    from->tcp_timeout_canceled.value.ui64;
+	    from->tcp_timeout_canceled;
 	to->tcp_timermp_freed.value.ui64 +=
-	    from->tcp_timermp_freed.value.ui64;
+	    from->tcp_timermp_freed;
 	to->tcp_push_timer_cnt.value.ui64 +=
-	    from->tcp_push_timer_cnt.value.ui64;
+	    from->tcp_push_timer_cnt;
 	to->tcp_ack_timer_cnt.value.ui64 +=
-	    from->tcp_ack_timer_cnt.value.ui64;
+	    from->tcp_ack_timer_cnt;
 #endif
 }
--- a/usr/src/uts/common/inet/tcp/tcp_timers.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tcp/tcp_timers.c	Mon Jul 19 17:27:45 2010 -0700
@@ -695,14 +695,18 @@
 		first_threshold =  tcp->tcp_first_ctimer_threshold;
 		second_threshold = tcp->tcp_second_ctimer_threshold;
 
-		/* Retransmit forever unless this is a passive open... */
+		/*
+		 * If an app has set the second_threshold to 0, it means that
+		 * we need to retransmit forever, unless this is a passive
+		 * open.  We need to set second_threshold back to a normal
+		 * value such that later comparison with it still makes
+		 * sense.  But we set dont_timeout to B_TRUE so that we will
+		 * never time out.
+		 */
 		if (second_threshold == 0) {
-			if (!tcp->tcp_active_open) {
-				second_threshold =
-				    tcps->tcps_ip_abort_linterval;
-			} else {
+			second_threshold = tcps->tcps_ip_abort_linterval;
+			if (tcp->tcp_active_open)
 				dont_timeout = B_TRUE;
-			}
 		}
 		break;
 	case TCPS_ESTABLISHED:
@@ -712,8 +716,10 @@
 		 * forever.  But if the end point is closed, the normal
 		 * timeout applies.
 		 */
-		if (second_threshold == 0)
+		if (second_threshold == 0) {
+			second_threshold = tcps->tcps_ip_abort_linterval;
 			dont_timeout = B_TRUE;
+		}
 		/* FALLTHRU */
 	case TCPS_FIN_WAIT_1:
 	case TCPS_CLOSING:
@@ -892,8 +898,7 @@
 		dont_timeout = B_FALSE;
 	}
 
-	if (!dont_timeout && second_threshold == 0)
-		second_threshold = tcps->tcps_ip_abort_interval;
+	ASSERT(second_threshold != 0);
 
 	if ((ms = tcp->tcp_ms_we_have_waited) > second_threshold) {
 		/*
@@ -903,8 +908,14 @@
 			tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
 			    tcp->tcp_xmit_head, B_TRUE);
 
-		if (dont_timeout)
+		if (dont_timeout) {
+			/*
+			 * Reset tcp_ms_we_have_waited to avoid overflow since
+			 * we are going to retransmit forever.
+			 */
+			tcp->tcp_ms_we_have_waited = second_threshold;
 			goto timer_rexmit;
+		}
 
 		/*
 		 * For zero window probe, we need to send indefinitely,
--- a/usr/src/uts/common/inet/tcp/tcp_tunables.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tcp/tcp_tunables.c	Mon Jul 19 17:27:45 2010 -0700
@@ -98,7 +98,7 @@
 			break;
 		}
 	}
-ret:
+
 	mutex_exit(&tcps->tcps_listener_conf_lock);
 	return (err);
 }
--- a/usr/src/uts/common/inet/tcp_impl.h	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tcp_impl.h	Mon Jul 19 17:27:45 2010 -0700
@@ -308,8 +308,8 @@
  * connection (or the listener) which decrements tlc_cnt to zero frees the
  * struct.
  *
- * tlc_max is the threshold value tcps_conn_listen_port.  It is set when the
- * tcp_listen_cnt_t is allocated.
+ * tlc_max is the maximum number of concurrent TCP connections created from a
+ * listner.  It is calculated when the tcp_listen_cnt_t is allocated.
  *
  * tlc_report_time stores the time when cmn_err() is called to report that the
  * max has been exceeeded.  Report is done at most once every
@@ -694,10 +694,10 @@
 /*
  * Misc functions in tcp_misc.c.
  */
-extern int	tcp_cpu_update(cpu_setup_t, int, void *);
+extern uint32_t	tcp_find_listener_conf(tcp_stack_t *, in_port_t);
 extern void	tcp_ioctl_abort_conn(queue_t *, mblk_t *);
-extern uint32_t	tcp_find_listener_conf(tcp_stack_t *, in_port_t);
 extern void	tcp_listener_conf_cleanup(tcp_stack_t *);
+extern void	tcp_stack_cpu_add(tcp_stack_t *, processorid_t);
 
 #endif	/* _KERNEL */
 
--- a/usr/src/uts/common/inet/tcp_stack.h	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tcp_stack.h	Mon Jul 19 17:27:45 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_INET_TCP_STACK_H
@@ -77,8 +76,8 @@
 	 * MIB-2 stuff for SNMP
 	 * Note: tcpInErrs {tcp 15} is accumulated in ip.c
 	 */
-	kstat_t		*tcps_mibkp;	/* kstat exporting tcp_mib data */
-	kstat_t		*tcps_kstat;
+	kstat_t		*tcps_mibkp;	/* kstat exporting mib2_tcp_t data */
+	kstat_t		*tcps_kstat;	/* kstat exporting tcp_stat_t data */
 
 	uint32_t	tcps_iss_incr_extra;
 				/* Incremented for each connection */
--- a/usr/src/uts/common/inet/tcp_stats.h	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tcp_stats.h	Mon Jul 19 17:27:45 2010 -0700
@@ -18,9 +18,9 @@
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_INET_TCP_STATS_H
@@ -91,8 +91,8 @@
 	kstat_named_t	tcp_no_listener;
 	kstat_named_t	tcp_listendrop;
 	kstat_named_t	tcp_listendropq0;
-	kstat_named_t   tcp_wsrv_called;
-	kstat_named_t   tcp_flwctl_on;
+	kstat_named_t	tcp_wsrv_called;
+	kstat_named_t	tcp_flwctl_on;
 	kstat_named_t	tcp_timer_fire_early;
 	kstat_named_t	tcp_timer_fire_miss;
 	kstat_named_t	tcp_zcopy_on;
@@ -135,6 +135,62 @@
 #endif
 } tcp_stat_t;
 
+/*
+ * This struct contains only the counter part of tcp_stat_t.  It is used
+ * in tcp_stats_cpu_t instead of tcp_stat_t to save memory space.
+ */
+typedef struct tcp_stat_counter_s {
+	uint64_t	tcp_time_wait_syn_success;
+	uint64_t	tcp_clean_death_nondetached;
+	uint64_t	tcp_eager_blowoff_q;
+	uint64_t	tcp_eager_blowoff_q0;
+	uint64_t	tcp_no_listener;
+	uint64_t	tcp_listendrop;
+	uint64_t	tcp_listendropq0;
+	uint64_t	tcp_wsrv_called;
+	uint64_t	tcp_flwctl_on;
+	uint64_t	tcp_timer_fire_early;
+	uint64_t	tcp_timer_fire_miss;
+	uint64_t	tcp_zcopy_on;
+	uint64_t	tcp_zcopy_off;
+	uint64_t	tcp_zcopy_backoff;
+	uint64_t	tcp_fusion_flowctl;
+	uint64_t	tcp_fusion_backenabled;
+	uint64_t	tcp_fusion_urg;
+	uint64_t	tcp_fusion_putnext;
+	uint64_t	tcp_fusion_unfusable;
+	uint64_t	tcp_fusion_aborted;
+	uint64_t	tcp_fusion_unqualified;
+	uint64_t	tcp_fusion_rrw_busy;
+	uint64_t	tcp_fusion_rrw_msgcnt;
+	uint64_t	tcp_fusion_rrw_plugged;
+	uint64_t	tcp_in_ack_unsent_drop;
+	uint64_t	tcp_sock_fallback;
+	uint64_t	tcp_lso_enabled;
+	uint64_t	tcp_lso_disabled;
+	uint64_t	tcp_lso_times;
+	uint64_t	tcp_lso_pkt_out;
+	uint64_t	tcp_listen_cnt_drop;
+	uint64_t	tcp_listen_mem_drop;
+	uint64_t	tcp_zwin_mem_drop;
+	uint64_t	tcp_zwin_ack_syn;
+	uint64_t	tcp_rst_unsent;
+	uint64_t	tcp_reclaim_cnt;
+	uint64_t	tcp_reass_timeout;
+#ifdef TCP_DEBUG_COUNTER
+	uint64_t	tcp_time_wait;
+	uint64_t	tcp_rput_time_wait;
+	uint64_t	tcp_detach_time_wait;
+	uint64_t	tcp_timeout_calls;
+	uint64_t	tcp_timeout_cached_alloc;
+	uint64_t	tcp_timeout_cancel_reqs;
+	uint64_t	tcp_timeout_canceled;
+	uint64_t	tcp_timermp_freed;
+	uint64_t	tcp_push_timer_cnt;
+	uint64_t	tcp_ack_timer_cnt;
+#endif
+} tcp_stat_counter_t;
+
 typedef struct tcp_g_stat {
 	kstat_named_t	tcp_timermp_alloced;
 	kstat_named_t	tcp_timermp_allocfail;
@@ -144,9 +200,9 @@
 
 /* Per CPU stats: TCP MIB2, TCP kstat and connection counter. */
 typedef struct {
-	int64_t		tcp_sc_conn_cnt;
-	mib2_tcp_t	tcp_sc_mib;
-	tcp_stat_t	tcp_sc_stats;
+	int64_t			tcp_sc_conn_cnt;
+	mib2_tcp_t		tcp_sc_mib;
+	tcp_stat_counter_t	tcp_sc_stats;
 } tcp_stats_cpu_t;
 
 #define	TCPS_BUMP_MIB(tcps, x) \
@@ -158,7 +214,7 @@
 #if TCP_DEBUG_COUNTER
 #define	TCP_DBGSTAT(tcps, x)	\
 	atomic_inc_64(		\
-	    &((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x.value.ui64))
+	    &((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x))
 #define	TCP_G_DBGSTAT(x)	\
 	atomic_inc_64(&(tcp_g_statistics.x.value.ui64))
 #else
@@ -169,12 +225,13 @@
 #define	TCP_G_STAT(x)	(tcp_g_statistics.x.value.ui64++)
 
 #define	TCP_STAT(tcps, x)		\
-	((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x.value.ui64++)
+	((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x++)
 #define	TCP_STAT_UPDATE(tcps, x, n)	\
-	((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x.value.ui64 += (n))
+	((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x += (n))
 #define	TCP_STAT_SET(tcps, x, n)	\
-	((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x.value.ui64 = (n))
+	((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x = (n))
 
+/* Global TCP stats for all IP stacks. */
 extern tcp_g_stat_t	tcp_g_statistics;
 extern kstat_t	*tcp_g_kstat;
 
--- a/usr/src/uts/common/inet/tunables.h	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/tunables.h	Mon Jul 19 17:27:45 2010 -0700
@@ -140,6 +140,8 @@
 #define	HOURS		(60 * MINUTES)
 #define	DAYS		(24 * HOURS)
 
+#define	MB		(1024 * 1024)
+
 /* Largest TCP/UDP/SCTP port number */
 #define	ULP_MAX_PORT	(64 * 1024 - 1)
 
--- a/usr/src/uts/common/inet/udp/udp.c	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/udp/udp.c	Mon Jul 19 17:27:45 2010 -0700
@@ -181,13 +181,6 @@
 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
 static void	udp_stack_fini(netstackid_t stackid, void *arg);
 
-static void	*udp_kstat_init(netstackid_t stackid);
-static void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
-static void	*udp_kstat2_init(netstackid_t, udp_stat_t *);
-static void	udp_kstat2_fini(netstackid_t, kstat_t *);
-static int	udp_kstat_update(kstat_t *kp, int rw);
-
-
 /* Common routines for TPI and socket module */
 static void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
 
@@ -1128,7 +1121,7 @@
 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
 		    opt_length;
 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
-			BUMP_MIB(&us->us_udp_mib, udpInErrors);
+			UDPS_BUMP_MIB(us, udpInErrors);
 			break;
 		}
 
@@ -2265,7 +2258,7 @@
 			/* Allocation failed. Drop packet */
 			mutex_exit(&connp->conn_lock);
 			freemsg(mp);
-			BUMP_MIB(&us->us_udp_mib, udpInErrors);
+			UDPS_BUMP_MIB(us, udpInErrors);
 			return;
 		}
 		mutex_exit(&connp->conn_lock);
@@ -2328,7 +2321,7 @@
 		mp1 = allocb(udi_size, BPRI_MED);
 		if (mp1 == NULL) {
 			freemsg(mp);
-			BUMP_MIB(&us->us_udp_mib, udpInErrors);
+			UDPS_BUMP_MIB(us, udpInErrors);
 			return;
 		}
 		mp1->b_cont = mp;
@@ -2377,7 +2370,7 @@
 		mp1 = allocb(udi_size, BPRI_MED);
 		if (mp1 == NULL) {
 			freemsg(mp);
-			BUMP_MIB(&us->us_udp_mib, udpInErrors);
+			UDPS_BUMP_MIB(us, udpInErrors);
 			return;
 		}
 		mp1->b_cont = mp;
@@ -2440,312 +2433,13 @@
 		pkt_len -= hdr_length;
 	}
 
-	BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
+	UDPS_BUMP_MIB(us, udpHCInDatagrams);
 	udp_ulp_recv(connp, mp1, pkt_len, ira);
 	return;
 
 tossit:
 	freemsg(mp);
-	BUMP_MIB(&us->us_udp_mib, udpInErrors);
-}
-
-/*
- * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
- * information that can be changing beneath us.
- */
-mblk_t *
-udp_snmp_get(queue_t *q, mblk_t *mpctl)
-{
-	mblk_t			*mpdata;
-	mblk_t			*mp_conn_ctl;
-	mblk_t			*mp_attr_ctl;
-	mblk_t			*mp6_conn_ctl;
-	mblk_t			*mp6_attr_ctl;
-	mblk_t			*mp_conn_tail;
-	mblk_t			*mp_attr_tail;
-	mblk_t			*mp6_conn_tail;
-	mblk_t			*mp6_attr_tail;
-	struct opthdr		*optp;
-	mib2_udpEntry_t		ude;
-	mib2_udp6Entry_t	ude6;
-	mib2_transportMLPEntry_t mlp;
-	int			state;
-	zoneid_t		zoneid;
-	int			i;
-	connf_t			*connfp;
-	conn_t			*connp = Q_TO_CONN(q);
-	int			v4_conn_idx;
-	int			v6_conn_idx;
-	boolean_t		needattr;
-	udp_t			*udp;
-	ip_stack_t		*ipst = connp->conn_netstack->netstack_ip;
-	udp_stack_t		*us = connp->conn_netstack->netstack_udp;
-	mblk_t			*mp2ctl;
-
-	/*
-	 * make a copy of the original message
-	 */
-	mp2ctl = copymsg(mpctl);
-
-	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
-	if (mpctl == NULL ||
-	    (mpdata = mpctl->b_cont) == NULL ||
-	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
-	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
-	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
-	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
-		freemsg(mp_conn_ctl);
-		freemsg(mp_attr_ctl);
-		freemsg(mp6_conn_ctl);
-		freemsg(mpctl);
-		freemsg(mp2ctl);
-		return (0);
-	}
-
-	zoneid = connp->conn_zoneid;
-
-	/* fixed length structure for IPv4 and IPv6 counters */
-	SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
-	SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
-	/* synchronize 64- and 32-bit counters */
-	SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams);
-	SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams);
-
-	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
-	optp->level = MIB2_UDP;
-	optp->name = 0;
-	(void) snmp_append_data(mpdata, (char *)&us->us_udp_mib,
-	    sizeof (us->us_udp_mib));
-	optp->len = msgdsize(mpdata);
-	qreply(q, mpctl);
-
-	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
-	v4_conn_idx = v6_conn_idx = 0;
-
-	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
-		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
-		connp = NULL;
-
-		while ((connp = ipcl_get_next_conn(connfp, connp,
-		    IPCL_UDPCONN))) {
-			udp = connp->conn_udp;
-			if (zoneid != connp->conn_zoneid)
-				continue;
-
-			/*
-			 * Note that the port numbers are sent in
-			 * host byte order
-			 */
-
-			if (udp->udp_state == TS_UNBND)
-				state = MIB2_UDP_unbound;
-			else if (udp->udp_state == TS_IDLE)
-				state = MIB2_UDP_idle;
-			else if (udp->udp_state == TS_DATA_XFER)
-				state = MIB2_UDP_connected;
-			else
-				state = MIB2_UDP_unknown;
-
-			needattr = B_FALSE;
-			bzero(&mlp, sizeof (mlp));
-			if (connp->conn_mlp_type != mlptSingle) {
-				if (connp->conn_mlp_type == mlptShared ||
-				    connp->conn_mlp_type == mlptBoth)
-					mlp.tme_flags |= MIB2_TMEF_SHARED;
-				if (connp->conn_mlp_type == mlptPrivate ||
-				    connp->conn_mlp_type == mlptBoth)
-					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
-				needattr = B_TRUE;
-			}
-			if (connp->conn_anon_mlp) {
-				mlp.tme_flags |= MIB2_TMEF_ANONMLP;
-				needattr = B_TRUE;
-			}
-			switch (connp->conn_mac_mode) {
-			case CONN_MAC_DEFAULT:
-				break;
-			case CONN_MAC_AWARE:
-				mlp.tme_flags |= MIB2_TMEF_MACEXEMPT;
-				needattr = B_TRUE;
-				break;
-			case CONN_MAC_IMPLICIT:
-				mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT;
-				needattr = B_TRUE;
-				break;
-			}
-			mutex_enter(&connp->conn_lock);
-			if (udp->udp_state == TS_DATA_XFER &&
-			    connp->conn_ixa->ixa_tsl != NULL) {
-				ts_label_t *tsl;
-
-				tsl = connp->conn_ixa->ixa_tsl;
-				mlp.tme_flags |= MIB2_TMEF_IS_LABELED;
-				mlp.tme_doi = label2doi(tsl);
-				mlp.tme_label = *label2bslabel(tsl);
-				needattr = B_TRUE;
-			}
-			mutex_exit(&connp->conn_lock);
-
-			/*
-			 * Create an IPv4 table entry for IPv4 entries and also
-			 * any IPv6 entries which are bound to in6addr_any
-			 * (i.e. anything a IPv4 peer could connect/send to).
-			 */
-			if (connp->conn_ipversion == IPV4_VERSION ||
-			    (udp->udp_state <= TS_IDLE &&
-			    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) {
-				ude.udpEntryInfo.ue_state = state;
-				/*
-				 * If in6addr_any this will set it to
-				 * INADDR_ANY
-				 */
-				ude.udpLocalAddress = connp->conn_laddr_v4;
-				ude.udpLocalPort = ntohs(connp->conn_lport);
-				if (udp->udp_state == TS_DATA_XFER) {
-					/*
-					 * Can potentially get here for
-					 * v6 socket if another process
-					 * (say, ping) has just done a
-					 * sendto(), changing the state
-					 * from the TS_IDLE above to
-					 * TS_DATA_XFER by the time we hit
-					 * this part of the code.
-					 */
-					ude.udpEntryInfo.ue_RemoteAddress =
-					    connp->conn_faddr_v4;
-					ude.udpEntryInfo.ue_RemotePort =
-					    ntohs(connp->conn_fport);
-				} else {
-					ude.udpEntryInfo.ue_RemoteAddress = 0;
-					ude.udpEntryInfo.ue_RemotePort = 0;
-				}
-
-				/*
-				 * We make the assumption that all udp_t
-				 * structs will be created within an address
-				 * region no larger than 32-bits.
-				 */
-				ude.udpInstance = (uint32_t)(uintptr_t)udp;
-				ude.udpCreationProcess =
-				    (connp->conn_cpid < 0) ?
-				    MIB2_UNKNOWN_PROCESS :
-				    connp->conn_cpid;
-				ude.udpCreationTime = connp->conn_open_time;
-
-				(void) snmp_append_data2(mp_conn_ctl->b_cont,
-				    &mp_conn_tail, (char *)&ude, sizeof (ude));
-				mlp.tme_connidx = v4_conn_idx++;
-				if (needattr)
-					(void) snmp_append_data2(
-					    mp_attr_ctl->b_cont, &mp_attr_tail,
-					    (char *)&mlp, sizeof (mlp));
-			}
-			if (connp->conn_ipversion == IPV6_VERSION) {
-				ude6.udp6EntryInfo.ue_state  = state;
-				ude6.udp6LocalAddress = connp->conn_laddr_v6;
-				ude6.udp6LocalPort = ntohs(connp->conn_lport);
-				mutex_enter(&connp->conn_lock);
-				if (connp->conn_ixa->ixa_flags &
-				    IXAF_SCOPEID_SET) {
-					ude6.udp6IfIndex =
-					    connp->conn_ixa->ixa_scopeid;
-				} else {
-					ude6.udp6IfIndex = connp->conn_bound_if;
-				}
-				mutex_exit(&connp->conn_lock);
-				if (udp->udp_state == TS_DATA_XFER) {
-					ude6.udp6EntryInfo.ue_RemoteAddress =
-					    connp->conn_faddr_v6;
-					ude6.udp6EntryInfo.ue_RemotePort =
-					    ntohs(connp->conn_fport);
-				} else {
-					ude6.udp6EntryInfo.ue_RemoteAddress =
-					    sin6_null.sin6_addr;
-					ude6.udp6EntryInfo.ue_RemotePort = 0;
-				}
-				/*
-				 * We make the assumption that all udp_t
-				 * structs will be created within an address
-				 * region no larger than 32-bits.
-				 */
-				ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
-				ude6.udp6CreationProcess =
-				    (connp->conn_cpid < 0) ?
-				    MIB2_UNKNOWN_PROCESS :
-				    connp->conn_cpid;
-				ude6.udp6CreationTime = connp->conn_open_time;
-
-				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
-				    &mp6_conn_tail, (char *)&ude6,
-				    sizeof (ude6));
-				mlp.tme_connidx = v6_conn_idx++;
-				if (needattr)
-					(void) snmp_append_data2(
-					    mp6_attr_ctl->b_cont,
-					    &mp6_attr_tail, (char *)&mlp,
-					    sizeof (mlp));
-			}
-		}
-	}
-
-	/* IPv4 UDP endpoints */
-	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
-	    sizeof (struct T_optmgmt_ack)];
-	optp->level = MIB2_UDP;
-	optp->name = MIB2_UDP_ENTRY;
-	optp->len = msgdsize(mp_conn_ctl->b_cont);
-	qreply(q, mp_conn_ctl);
-
-	/* table of MLP attributes... */
-	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
-	    sizeof (struct T_optmgmt_ack)];
-	optp->level = MIB2_UDP;
-	optp->name = EXPER_XPORT_MLP;
-	optp->len = msgdsize(mp_attr_ctl->b_cont);
-	if (optp->len == 0)
-		freemsg(mp_attr_ctl);
-	else
-		qreply(q, mp_attr_ctl);
-
-	/* IPv6 UDP endpoints */
-	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
-	    sizeof (struct T_optmgmt_ack)];
-	optp->level = MIB2_UDP6;
-	optp->name = MIB2_UDP6_ENTRY;
-	optp->len = msgdsize(mp6_conn_ctl->b_cont);
-	qreply(q, mp6_conn_ctl);
-
-	/* table of MLP attributes... */
-	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
-	    sizeof (struct T_optmgmt_ack)];
-	optp->level = MIB2_UDP6;
-	optp->name = EXPER_XPORT_MLP;
-	optp->len = msgdsize(mp6_attr_ctl->b_cont);
-	if (optp->len == 0)
-		freemsg(mp6_attr_ctl);
-	else
-		qreply(q, mp6_attr_ctl);
-
-	return (mp2ctl);
-}
-
-/*
- * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
- * NOTE: Per MIB-II, UDP has no writable data.
- * TODO:  If this ever actually tries to set anything, it needs to be
- * to do the appropriate locking.
- */
-/* ARGSUSED */
-int
-udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
-    uchar_t *ptr, int len)
-{
-	switch (level) {
-	case MIB2_UDP:
-		return (0);
-	default:
-		return (1);
-	}
+	UDPS_BUMP_MIB(us, udpInErrors);
 }
 
 /*
@@ -2922,7 +2616,7 @@
 	 */
 	ixa = conn_get_ixa_exclusive(connp);
 	if (ixa == NULL) {
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		freemsg(mp);
 		return (ENOMEM);
 	}
@@ -2945,7 +2639,7 @@
 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
 		ixa->ixa_cpid = connp->conn_cpid;
 		ixa_refrele(ixa);
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		freemsg(mp);
 		return (ENOMEM);
 	}
@@ -2953,7 +2647,7 @@
 	error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
 	mutex_exit(&connp->conn_lock);
 	if (error != 0) {
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		freemsg(mp);
 		goto done;
 	}
@@ -2989,7 +2683,7 @@
 		 * module for "is_absreq_failure"
 		 */
 		freemsg(mp);
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		goto done;
 	}
 	ASSERT(is_absreq_failure == 0);
@@ -3085,7 +2779,7 @@
 	default:
 	failed:
 		freemsg(mp);
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		goto done;
 	}
 
@@ -3101,7 +2795,7 @@
 		/* Using UDP MLP requires SCM_UCRED from user */
 		if (connp->conn_mlp_type != mlptSingle &&
 		    !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
-			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+			UDPS_BUMP_MIB(us, udpOutErrors);
 			error = ECONNREFUSED;
 			freemsg(mp);
 			goto done;
@@ -3121,7 +2815,7 @@
 		error = conn_update_label(connp, ixa, &v6dst, ipp);
 		if (error != 0) {
 			freemsg(mp);
-			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+			UDPS_BUMP_MIB(us, udpOutErrors);
 			goto done;
 		}
 	}
@@ -3129,17 +2823,17 @@
 	    flowinfo, mp, &error);
 	if (mp == NULL) {
 		ASSERT(error != 0);
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		goto done;
 	}
 	if (ixa->ixa_pktlen > IP_MAXPACKET) {
 		error = EMSGSIZE;
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		freemsg(mp);
 		goto done;
 	}
 	/* We're done.  Pass the packet to ip. */
-	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
+	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
 
 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
@@ -3203,7 +2897,7 @@
 	 */
 	ixa = conn_get_ixa(connp, B_FALSE);
 	if (ixa == NULL) {
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		freemsg(mp);
 		return (ENOMEM);
 	}
@@ -3224,7 +2918,7 @@
 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
 		ixa->ixa_cpid = connp->conn_cpid;
 		ixa_refrele(ixa);
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		freemsg(mp);
 		return (error);
 	}
@@ -3283,7 +2977,7 @@
 			ixa->ixa_cpid = connp->conn_cpid;
 			ixa_refrele(ixa);
 			freemsg(mp);
-			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+			UDPS_BUMP_MIB(us, udpOutErrors);
 			return (error);
 		}
 	} else {
@@ -3293,7 +2987,7 @@
 	ASSERT(ixa->ixa_ire != NULL);
 
 	/* We're done.  Pass the packet to ip. */
-	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
+	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
 
 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
@@ -3355,7 +3049,7 @@
 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
 		ixa->ixa_cpid = connp->conn_cpid;
 		ixa_refrele(ixa);
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		freemsg(mp);
 		return (error);
 	}
@@ -3414,7 +3108,7 @@
 			ixa->ixa_cpid = connp->conn_cpid;
 			ixa_refrele(ixa);
 			freemsg(mp);
-			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+			UDPS_BUMP_MIB(us, udpOutErrors);
 			return (error);
 		}
 	} else {
@@ -3423,7 +3117,7 @@
 	}
 
 	/* We're done.  Pass the packet to ip. */
-	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
+	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
 
 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
@@ -3679,7 +3373,7 @@
 	case M_DATA:
 		if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
 			/* Not connected; address is required */
-			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+			UDPS_BUMP_MIB(us, udpOutErrors);
 			UDP_DBGSTAT(us, udp_data_notconn);
 			UDP_STAT(us, udp_out_err_notconn);
 			freemsg(mp);
@@ -3694,7 +3388,7 @@
 		cr = msg_getcred(mp, &pid);
 		ASSERT(cr != NULL);
 		if (cr == NULL) {
-			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+			UDPS_BUMP_MIB(us, udpOutErrors);
 			freemsg(mp);
 			return;
 		}
@@ -3946,7 +3640,7 @@
 	return;
 
 ud_error2:
-	BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+	UDPS_BUMP_MIB(us, udpOutErrors);
 	freemsg(data_mp);
 	UDP_STAT(us, udp_out_err_output);
 	ASSERT(mp != NULL);
@@ -4227,7 +3921,7 @@
 	}
 
 	/* We're done.  Pass the packet to ip. */
-	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
+	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
 
 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
 	    void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
@@ -4276,7 +3970,7 @@
 	ixa_refrele(ixa);
 
 	freemsg(data_mp);
-	BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+	UDPS_BUMP_MIB(us, udpOutErrors);
 	UDP_STAT(us, udp_out_err_output);
 	return (error);
 }
@@ -4688,7 +4382,18 @@
 	    KM_SLEEP);
 	bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
 
-	us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics);
+	/* Allocate the per netstack stats */
+	mutex_enter(&cpu_lock);
+	us->us_sc_cnt = MAX(ncpus, boot_ncpus);
+	mutex_exit(&cpu_lock);
+	us->us_sc = kmem_zalloc(max_ncpus  * sizeof (udp_stats_cpu_t *),
+	    KM_SLEEP);
+	for (i = 0; i < us->us_sc_cnt; i++) {
+		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
+		    KM_SLEEP);
+	}
+
+	us->us_kstat = udp_kstat2_init(stackid);
 	us->us_mibkp = udp_kstat_init(stackid);
 
 	major = mod_name_to_major(INET_NAME);
@@ -4715,6 +4420,10 @@
 
 	us->us_bind_fanout = NULL;
 
+	for (i = 0; i < us->us_sc_cnt; i++)
+		kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
+	kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
+
 	kmem_free(us->us_propinfo_tbl,
 	    udp_propinfo_count * sizeof (mod_prop_info_t));
 	us->us_propinfo_tbl = NULL;
@@ -4724,132 +4433,12 @@
 
 	udp_kstat2_fini(stackid, us->us_kstat);
 	us->us_kstat = NULL;
-	bzero(&us->us_statistics, sizeof (us->us_statistics));
 
 	mutex_destroy(&us->us_epriv_port_lock);
 	ldi_ident_release(us->us_ldi_ident);
 	kmem_free(us, sizeof (*us));
 }
 
-static void *
-udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp)
-{
-	kstat_t *ksp;
-
-	udp_stat_t template = {
-		{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
-		{ "udp_out_opt",		KSTAT_DATA_UINT64 },
-		{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
-		{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
-		{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
-#ifdef DEBUG
-		{ "udp_data_conn",		KSTAT_DATA_UINT64 },
-		{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
-		{ "udp_out_lastdst",		KSTAT_DATA_UINT64 },
-		{ "udp_out_diffdst",		KSTAT_DATA_UINT64 },
-		{ "udp_out_ipv6",		KSTAT_DATA_UINT64 },
-		{ "udp_out_mapped",		KSTAT_DATA_UINT64 },
-		{ "udp_out_ipv4",		KSTAT_DATA_UINT64 },
-#endif
-	};
-
-	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net",
-	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
-	    KSTAT_FLAG_VIRTUAL, stackid);
-
-	if (ksp == NULL)
-		return (NULL);
-
-	bcopy(&template, us_statisticsp, sizeof (template));
-	ksp->ks_data = (void *)us_statisticsp;
-	ksp->ks_private = (void *)(uintptr_t)stackid;
-
-	kstat_install(ksp);
-	return (ksp);
-}
-
-static void
-udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp)
-{
-	if (ksp != NULL) {
-		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
-		kstat_delete_netstack(ksp, stackid);
-	}
-}
-
-static void *
-udp_kstat_init(netstackid_t stackid)
-{
-	kstat_t	*ksp;
-
-	udp_named_kstat_t template = {
-		{ "inDatagrams",	KSTAT_DATA_UINT64, 0 },
-		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
-		{ "outDatagrams",	KSTAT_DATA_UINT64, 0 },
-		{ "entrySize",		KSTAT_DATA_INT32, 0 },
-		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
-		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
-	};
-
-	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2",
-	    KSTAT_TYPE_NAMED,
-	    NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid);
-
-	if (ksp == NULL || ksp->ks_data == NULL)
-		return (NULL);
-
-	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
-	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
-
-	bcopy(&template, ksp->ks_data, sizeof (template));
-	ksp->ks_update = udp_kstat_update;
-	ksp->ks_private = (void *)(uintptr_t)stackid;
-
-	kstat_install(ksp);
-	return (ksp);
-}
-
-static void
-udp_kstat_fini(netstackid_t stackid, kstat_t *ksp)
-{
-	if (ksp != NULL) {
-		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
-		kstat_delete_netstack(ksp, stackid);
-	}
-}
-
-static int
-udp_kstat_update(kstat_t *kp, int rw)
-{
-	udp_named_kstat_t *udpkp;
-	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
-	netstack_t	*ns;
-	udp_stack_t	*us;
-
-	if ((kp == NULL) || (kp->ks_data == NULL))
-		return (EIO);
-
-	if (rw == KSTAT_WRITE)
-		return (EACCES);
-
-	ns = netstack_find_by_stackid(stackid);
-	if (ns == NULL)
-		return (-1);
-	us = ns->netstack_udp;
-	if (us == NULL) {
-		netstack_rele(ns);
-		return (-1);
-	}
-	udpkp = (udp_named_kstat_t *)kp->ks_data;
-
-	udpkp->inDatagrams.value.ui64 =	us->us_udp_mib.udpHCInDatagrams;
-	udpkp->inErrors.value.ui32 =	us->us_udp_mib.udpInErrors;
-	udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams;
-	udpkp->outErrors.value.ui32 =	us->us_udp_mib.udpOutErrors;
-	netstack_rele(ns);
-	return (0);
-}
-
 static size_t
 udp_set_rcv_hiwat(udp_t *udp, size_t size)
 {
@@ -4897,6 +4486,25 @@
 }
 
 /*
+ * When a CPU is added, we need to allocate the per CPU stats struct.
+ */
+void
+udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
+{
+	int i;
+
+	if (cpu_seqid < us->us_sc_cnt)
+		return;
+	for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
+		ASSERT(us->us_sc[i] == NULL);
+		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
+		    KM_SLEEP);
+	}
+	membar_producer();
+	us->us_sc_cnt = cpu_seqid + 1;
+}
+
+/*
  * Below routines for UDP socket module.
  */
 
@@ -6297,7 +5905,7 @@
 	/* Connected? */
 	if (msg->msg_name == NULL) {
 		if (udp->udp_state != TS_DATA_XFER) {
-			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+			UDPS_BUMP_MIB(us, udpOutErrors);
 			return (EDESTADDRREQ);
 		}
 		if (msg->msg_controllen != 0) {
@@ -6312,13 +5920,13 @@
 			return (error);
 	}
 	if (udp->udp_state == TS_DATA_XFER) {
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		return (EISCONN);
 	}
 	error = proto_verify_ip_addr(connp->conn_family,
 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
 	if (error != 0) {
-		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+		UDPS_BUMP_MIB(us, udpOutErrors);
 		return (error);
 	}
 	switch (connp->conn_family) {
@@ -6341,7 +5949,7 @@
 			 * since it is bound to a mapped address.
 			 */
 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
-				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+				UDPS_BUMP_MIB(us, udpOutErrors);
 				return (EADDRNOTAVAIL);
 			}
 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
@@ -6349,7 +5957,7 @@
 			ipversion = IPV6_VERSION;
 		} else {
 			if (connp->conn_ipv6_v6only) {
-				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+				UDPS_BUMP_MIB(us, udpOutErrors);
 				return (EADDRNOTAVAIL);
 			}
 
@@ -6362,7 +5970,7 @@
 			 */
 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
-				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+				UDPS_BUMP_MIB(us, udpOutErrors);
 				return (EADDRNOTAVAIL);
 			}
 
@@ -6382,7 +5990,7 @@
 		if (msg->msg_controllen == 0) {
 			ixa = conn_get_ixa(connp, B_FALSE);
 			if (ixa == NULL) {
-				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+				UDPS_BUMP_MIB(us, udpOutErrors);
 				return (ENOMEM);
 			}
 		} else {
@@ -6402,7 +6010,7 @@
 			    &sin2->sin6_addr) &&
 			    sin6->sin6_family == sin2->sin6_family) {
 				mutex_exit(&connp->conn_lock);
-				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+				UDPS_BUMP_MIB(us, udpOutErrors);
 				if (ixa != NULL)
 					ixa_refrele(ixa);
 				return (error);
@@ -6445,7 +6053,7 @@
 		if (msg->msg_controllen == 0) {
 			ixa = conn_get_ixa(connp, B_FALSE);
 			if (ixa == NULL) {
-				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+				UDPS_BUMP_MIB(us, udpOutErrors);
 				return (ENOMEM);
 			}
 		} else {
@@ -6463,7 +6071,7 @@
 			if (sin->sin_port == sin2->sin_port &&
 			    sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
 				mutex_exit(&connp->conn_lock);
-				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
+				UDPS_BUMP_MIB(us, udpOutErrors);
 				if (ixa != NULL)
 					ixa_refrele(ixa);
 				return (error);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/udp/udp_stats.c	Mon Jul 19 17:27:45 2010 -0700
@@ -0,0 +1,578 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/tihdr.h>
+#include <sys/policy.h>
+#include <sys/tsol/tnet.h>
+
+#include <inet/common.h>
+#include <inet/kstatcom.h>
+#include <inet/snmpcom.h>
+#include <inet/mib2.h>
+#include <inet/optcom.h>
+#include <inet/snmpcom.h>
+#include <inet/kstatcom.h>
+#include <inet/udp_impl.h>
+
+static int	udp_kstat_update(kstat_t *, int);
+static int	udp_kstat2_update(kstat_t *, int);
+static void	udp_sum_mib(udp_stack_t *, mib2_udp_t *);
+static void	udp_clr_stats(udp_stat_t *);
+static void	udp_add_stats(udp_stat_counter_t *, udp_stat_t *);
+static void	udp_add_mib(mib2_udp_t *, mib2_udp_t *);
+/*
+ * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
+ * information that can be changing beneath us.
+ */
+mblk_t *
+udp_snmp_get(queue_t *q, mblk_t *mpctl)
+{
+	mblk_t			*mpdata;
+	mblk_t			*mp_conn_ctl;
+	mblk_t			*mp_attr_ctl;
+	mblk_t			*mp6_conn_ctl;
+	mblk_t			*mp6_attr_ctl;
+	mblk_t			*mp_conn_tail;
+	mblk_t			*mp_attr_tail;
+	mblk_t			*mp6_conn_tail;
+	mblk_t			*mp6_attr_tail;
+	struct opthdr		*optp;
+	mib2_udpEntry_t		ude;
+	mib2_udp6Entry_t	ude6;
+	mib2_transportMLPEntry_t mlp;
+	int			state;
+	zoneid_t		zoneid;
+	int			i;
+	connf_t			*connfp;
+	conn_t			*connp = Q_TO_CONN(q);
+	int			v4_conn_idx;
+	int			v6_conn_idx;
+	boolean_t		needattr;
+	udp_t			*udp;
+	ip_stack_t		*ipst = connp->conn_netstack->netstack_ip;
+	udp_stack_t		*us = connp->conn_netstack->netstack_udp;
+	mblk_t			*mp2ctl;
+	mib2_udp_t		udp_mib;
+
+	/*
+	 * make a copy of the original message
+	 */
+	mp2ctl = copymsg(mpctl);
+
+	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
+	if (mpctl == NULL ||
+	    (mpdata = mpctl->b_cont) == NULL ||
+	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
+	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
+	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
+	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
+		freemsg(mp_conn_ctl);
+		freemsg(mp_attr_ctl);
+		freemsg(mp6_conn_ctl);
+		freemsg(mpctl);
+		freemsg(mp2ctl);
+		return (0);
+	}
+
+	zoneid = connp->conn_zoneid;
+
+	bzero(&udp_mib, sizeof (udp_mib));
+	/* fixed length structure for IPv4 and IPv6 counters */
+	SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
+	SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
+
+	udp_sum_mib(us, &udp_mib);
+
+	/*
+	 * Synchronize 32- and 64-bit counters.  Note that udpInDatagrams and
+	 * udpOutDatagrams are not updated anywhere in UDP.  The new 64 bits
+	 * counters are used.  Hence the old counters' values in us_sc_mib
+	 * are always 0.
+	 */
+	SYNC32_MIB(&udp_mib, udpInDatagrams, udpHCInDatagrams);
+	SYNC32_MIB(&udp_mib, udpOutDatagrams, udpHCOutDatagrams);
+
+	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
+	optp->level = MIB2_UDP;
+	optp->name = 0;
+	(void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib));
+	optp->len = msgdsize(mpdata);
+	qreply(q, mpctl);
+
+	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
+	v4_conn_idx = v6_conn_idx = 0;
+
+	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
+		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
+		connp = NULL;
+
+		while ((connp = ipcl_get_next_conn(connfp, connp,
+		    IPCL_UDPCONN))) {
+			udp = connp->conn_udp;
+			if (zoneid != connp->conn_zoneid)
+				continue;
+
+			/*
+			 * Note that the port numbers are sent in
+			 * host byte order
+			 */
+
+			if (udp->udp_state == TS_UNBND)
+				state = MIB2_UDP_unbound;
+			else if (udp->udp_state == TS_IDLE)
+				state = MIB2_UDP_idle;
+			else if (udp->udp_state == TS_DATA_XFER)
+				state = MIB2_UDP_connected;
+			else
+				state = MIB2_UDP_unknown;
+
+			needattr = B_FALSE;
+			bzero(&mlp, sizeof (mlp));
+			if (connp->conn_mlp_type != mlptSingle) {
+				if (connp->conn_mlp_type == mlptShared ||
+				    connp->conn_mlp_type == mlptBoth)
+					mlp.tme_flags |= MIB2_TMEF_SHARED;
+				if (connp->conn_mlp_type == mlptPrivate ||
+				    connp->conn_mlp_type == mlptBoth)
+					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
+				needattr = B_TRUE;
+			}
+			if (connp->conn_anon_mlp) {
+				mlp.tme_flags |= MIB2_TMEF_ANONMLP;
+				needattr = B_TRUE;
+			}
+			switch (connp->conn_mac_mode) {
+			case CONN_MAC_DEFAULT:
+				break;
+			case CONN_MAC_AWARE:
+				mlp.tme_flags |= MIB2_TMEF_MACEXEMPT;
+				needattr = B_TRUE;
+				break;
+			case CONN_MAC_IMPLICIT:
+				mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT;
+				needattr = B_TRUE;
+				break;
+			}
+			mutex_enter(&connp->conn_lock);
+			if (udp->udp_state == TS_DATA_XFER &&
+			    connp->conn_ixa->ixa_tsl != NULL) {
+				ts_label_t *tsl;
+
+				tsl = connp->conn_ixa->ixa_tsl;
+				mlp.tme_flags |= MIB2_TMEF_IS_LABELED;
+				mlp.tme_doi = label2doi(tsl);
+				mlp.tme_label = *label2bslabel(tsl);
+				needattr = B_TRUE;
+			}
+			mutex_exit(&connp->conn_lock);
+
+			/*
+			 * Create an IPv4 table entry for IPv4 entries and also
+			 * any IPv6 entries which are bound to in6addr_any
+			 * (i.e. anything a IPv4 peer could connect/send to).
+			 */
+			if (connp->conn_ipversion == IPV4_VERSION ||
+			    (udp->udp_state <= TS_IDLE &&
+			    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) {
+				ude.udpEntryInfo.ue_state = state;
+				/*
+				 * If in6addr_any this will set it to
+				 * INADDR_ANY
+				 */
+				ude.udpLocalAddress = connp->conn_laddr_v4;
+				ude.udpLocalPort = ntohs(connp->conn_lport);
+				if (udp->udp_state == TS_DATA_XFER) {
+					/*
+					 * Can potentially get here for
+					 * v6 socket if another process
+					 * (say, ping) has just done a
+					 * sendto(), changing the state
+					 * from the TS_IDLE above to
+					 * TS_DATA_XFER by the time we hit
+					 * this part of the code.
+					 */
+					ude.udpEntryInfo.ue_RemoteAddress =
+					    connp->conn_faddr_v4;
+					ude.udpEntryInfo.ue_RemotePort =
+					    ntohs(connp->conn_fport);
+				} else {
+					ude.udpEntryInfo.ue_RemoteAddress = 0;
+					ude.udpEntryInfo.ue_RemotePort = 0;
+				}
+
+				/*
+				 * We make the assumption that all udp_t
+				 * structs will be created within an address
+				 * region no larger than 32-bits.
+				 */
+				ude.udpInstance = (uint32_t)(uintptr_t)udp;
+				ude.udpCreationProcess =
+				    (connp->conn_cpid < 0) ?
+				    MIB2_UNKNOWN_PROCESS :
+				    connp->conn_cpid;
+				ude.udpCreationTime = connp->conn_open_time;
+
+				(void) snmp_append_data2(mp_conn_ctl->b_cont,
+				    &mp_conn_tail, (char *)&ude, sizeof (ude));
+				mlp.tme_connidx = v4_conn_idx++;
+				if (needattr)
+					(void) snmp_append_data2(
+					    mp_attr_ctl->b_cont, &mp_attr_tail,
+					    (char *)&mlp, sizeof (mlp));
+			}
+			if (connp->conn_ipversion == IPV6_VERSION) {
+				ude6.udp6EntryInfo.ue_state  = state;
+				ude6.udp6LocalAddress = connp->conn_laddr_v6;
+				ude6.udp6LocalPort = ntohs(connp->conn_lport);
+				mutex_enter(&connp->conn_lock);
+				if (connp->conn_ixa->ixa_flags &
+				    IXAF_SCOPEID_SET) {
+					ude6.udp6IfIndex =
+					    connp->conn_ixa->ixa_scopeid;
+				} else {
+					ude6.udp6IfIndex = connp->conn_bound_if;
+				}
+				mutex_exit(&connp->conn_lock);
+				if (udp->udp_state == TS_DATA_XFER) {
+					ude6.udp6EntryInfo.ue_RemoteAddress =
+					    connp->conn_faddr_v6;
+					ude6.udp6EntryInfo.ue_RemotePort =
+					    ntohs(connp->conn_fport);
+				} else {
+					ude6.udp6EntryInfo.ue_RemoteAddress =
+					    sin6_null.sin6_addr;
+					ude6.udp6EntryInfo.ue_RemotePort = 0;
+				}
+				/*
+				 * We make the assumption that all udp_t
+				 * structs will be created within an address
+				 * region no larger than 32-bits.
+				 */
+				ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
+				ude6.udp6CreationProcess =
+				    (connp->conn_cpid < 0) ?
+				    MIB2_UNKNOWN_PROCESS :
+				    connp->conn_cpid;
+				ude6.udp6CreationTime = connp->conn_open_time;
+
+				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
+				    &mp6_conn_tail, (char *)&ude6,
+				    sizeof (ude6));
+				mlp.tme_connidx = v6_conn_idx++;
+				if (needattr)
+					(void) snmp_append_data2(
+					    mp6_attr_ctl->b_cont,
+					    &mp6_attr_tail, (char *)&mlp,
+					    sizeof (mlp));
+			}
+		}
+	}
+
+	/* IPv4 UDP endpoints */
+	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
+	    sizeof (struct T_optmgmt_ack)];
+	optp->level = MIB2_UDP;
+	optp->name = MIB2_UDP_ENTRY;
+	optp->len = msgdsize(mp_conn_ctl->b_cont);
+	qreply(q, mp_conn_ctl);
+
+	/* table of MLP attributes... */
+	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
+	    sizeof (struct T_optmgmt_ack)];
+	optp->level = MIB2_UDP;
+	optp->name = EXPER_XPORT_MLP;
+	optp->len = msgdsize(mp_attr_ctl->b_cont);
+	if (optp->len == 0)
+		freemsg(mp_attr_ctl);
+	else
+		qreply(q, mp_attr_ctl);
+
+	/* IPv6 UDP endpoints */
+	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
+	    sizeof (struct T_optmgmt_ack)];
+	optp->level = MIB2_UDP6;
+	optp->name = MIB2_UDP6_ENTRY;
+	optp->len = msgdsize(mp6_conn_ctl->b_cont);
+	qreply(q, mp6_conn_ctl);
+
+	/* table of MLP attributes... */
+	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
+	    sizeof (struct T_optmgmt_ack)];
+	optp->level = MIB2_UDP6;
+	optp->name = EXPER_XPORT_MLP;
+	optp->len = msgdsize(mp6_attr_ctl->b_cont);
+	if (optp->len == 0)
+		freemsg(mp6_attr_ctl);
+	else
+		qreply(q, mp6_attr_ctl);
+
+	return (mp2ctl);
+}
+
+/*
+ * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
+ * NOTE: Per MIB-II, UDP has no writable data.
+ * TODO:  If this ever actually tries to set anything, it needs to be
+ * to do the appropriate locking.
+ */
+/* ARGSUSED */
+int
+udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
+    uchar_t *ptr, int len)
+{
+	switch (level) {
+	case MIB2_UDP:
+		return (0);
+	default:
+		return (1);
+	}
+}
+
+void
+udp_kstat_fini(netstackid_t stackid, kstat_t *ksp)
+{
+	if (ksp != NULL) {
+		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
+		kstat_delete_netstack(ksp, stackid);
+	}
+}
+
+/*
+ * To add stats from one mib2_udp_t to another.  Static fields are not added.
+ * The caller should set them up propertly.
+ */
+static void
+udp_add_mib(mib2_udp_t *from, mib2_udp_t *to)
+{
+	to->udpHCInDatagrams += from->udpHCInDatagrams;
+	to->udpInErrors += from->udpInErrors;
+	to->udpHCOutDatagrams += from->udpHCOutDatagrams;
+	to->udpOutErrors += from->udpOutErrors;
+}
+
+
+void *
+udp_kstat2_init(netstackid_t stackid)
+{
+	kstat_t *ksp;
+
+	udp_stat_t template = {
+		{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
+		{ "udp_out_opt",		KSTAT_DATA_UINT64 },
+		{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
+		{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
+		{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
+#ifdef DEBUG
+		{ "udp_data_conn",		KSTAT_DATA_UINT64 },
+		{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
+		{ "udp_out_lastdst",		KSTAT_DATA_UINT64 },
+		{ "udp_out_diffdst",		KSTAT_DATA_UINT64 },
+		{ "udp_out_ipv6",		KSTAT_DATA_UINT64 },
+		{ "udp_out_mapped",		KSTAT_DATA_UINT64 },
+		{ "udp_out_ipv4",		KSTAT_DATA_UINT64 },
+#endif
+	};
+
+	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net",
+	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
+	    0, stackid);
+
+	if (ksp == NULL)
+		return (NULL);
+
+	bcopy(&template, ksp->ks_data, sizeof (template));
+	ksp->ks_update = udp_kstat2_update;
+	ksp->ks_private = (void *)(uintptr_t)stackid;
+
+	kstat_install(ksp);
+	return (ksp);
+}
+
+void
+udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp)
+{
+	if (ksp != NULL) {
+		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
+		kstat_delete_netstack(ksp, stackid);
+	}
+}
+
+/*
+ * To copy counters from the per CPU udpp_stat_counter_t to the stack
+ * udp_stat_t.
+ */
+static void
+udp_add_stats(udp_stat_counter_t *from, udp_stat_t *to)
+{
+	to->udp_sock_fallback.value.ui64 += from->udp_sock_fallback;
+	to->udp_out_opt.value.ui64 += from->udp_out_opt;
+	to->udp_out_err_notconn.value.ui64 += from->udp_out_err_notconn;
+	to->udp_out_err_output.value.ui64 += from->udp_out_err_output;
+	to->udp_out_err_tudr.value.ui64 += from->udp_out_err_tudr;
+#ifdef DEBUG
+	to->udp_data_conn.value.ui64 += from->udp_data_conn;
+	to->udp_data_notconn.value.ui64 += from->udp_data_notconn;
+	to->udp_out_lastdst.value.ui64 += from->udp_out_lastdst;
+	to->udp_out_diffdst.value.ui64 += from->udp_out_diffdst;
+	to->udp_out_ipv6.value.ui64 += from->udp_out_ipv6;
+	to->udp_out_mapped.value.ui64 += from->udp_out_mapped;
+	to->udp_out_ipv4.value.ui64 += from->udp_out_ipv4;
+#endif
+}
+
+/*
+ * To set all udp_stat_t counters to 0.
+ */
+static void
+udp_clr_stats(udp_stat_t *stats)
+{
+	stats->udp_sock_fallback.value.ui64 = 0;
+	stats->udp_out_opt.value.ui64 = 0;
+	stats->udp_out_err_notconn.value.ui64 = 0;
+	stats->udp_out_err_output.value.ui64 = 0;
+	stats->udp_out_err_tudr.value.ui64 = 0;
+#ifdef DEBUG
+	stats->udp_data_conn.value.ui64 = 0;
+	stats->udp_data_notconn.value.ui64 = 0;
+	stats->udp_out_lastdst.value.ui64 = 0;
+	stats->udp_out_diffdst.value.ui64 = 0;
+	stats->udp_out_ipv6.value.ui64 = 0;
+	stats->udp_out_mapped.value.ui64 = 0;
+	stats->udp_out_ipv4.value.ui64 = 0;
+#endif
+}
+
+int
+udp_kstat2_update(kstat_t *kp, int rw)
+{
+	udp_stat_t	*stats;
+	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
+	netstack_t	*ns;
+	udp_stack_t	*us;
+	int		i;
+	int		cnt;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	ns = netstack_find_by_stackid(stackid);
+	if (ns == NULL)
+		return (-1);
+	us = ns->netstack_udp;
+	if (us == NULL) {
+		netstack_rele(ns);
+		return (-1);
+	}
+	stats = (udp_stat_t *)kp->ks_data;
+	udp_clr_stats(stats);
+
+	cnt = us->us_sc_cnt;
+	for (i = 0; i < cnt; i++)
+		udp_add_stats(&us->us_sc[i]->udp_sc_stats, stats);
+
+	netstack_rele(ns);
+	return (0);
+}
+
+void *
+udp_kstat_init(netstackid_t stackid)
+{
+	kstat_t	*ksp;
+
+	udp_named_kstat_t template = {
+		{ "inDatagrams",	KSTAT_DATA_UINT64, 0 },
+		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
+		{ "outDatagrams",	KSTAT_DATA_UINT64, 0 },
+		{ "entrySize",		KSTAT_DATA_INT32, 0 },
+		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
+		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
+	};
+
+	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2",
+	    KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid);
+
+	if (ksp == NULL)
+		return (NULL);
+
+	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
+	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
+
+	bcopy(&template, ksp->ks_data, sizeof (template));
+	ksp->ks_update = udp_kstat_update;
+	ksp->ks_private = (void *)(uintptr_t)stackid;
+
+	kstat_install(ksp);
+	return (ksp);
+}
+
+/*
+ * To sum up all MIB2 stats for a udp_stack_t from all per CPU stats.  The
+ * caller should initialize the target mib2_udp_t properly as this function
+ * just adds up all the per CPU stats.
+ */
+static void
+udp_sum_mib(udp_stack_t *us, mib2_udp_t *udp_mib)
+{
+	int i;
+	int cnt;
+
+	cnt = us->us_sc_cnt;
+	for (i = 0; i < cnt; i++)
+		udp_add_mib(&us->us_sc[i]->udp_sc_mib, udp_mib);
+}
+
+static int
+udp_kstat_update(kstat_t *kp, int rw)
+{
+	udp_named_kstat_t *udpkp;
+	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
+	netstack_t	*ns;
+	udp_stack_t	*us;
+	mib2_udp_t	udp_mib;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	ns = netstack_find_by_stackid(stackid);
+	if (ns == NULL)
+		return (-1);
+	us = ns->netstack_udp;
+	if (us == NULL) {
+		netstack_rele(ns);
+		return (-1);
+	}
+	udpkp = (udp_named_kstat_t *)kp->ks_data;
+
+	bzero(&udp_mib, sizeof (udp_mib));
+	udp_sum_mib(us, &udp_mib);
+
+	udpkp->inDatagrams.value.ui64 =	udp_mib.udpHCInDatagrams;
+	udpkp->inErrors.value.ui32 =	udp_mib.udpInErrors;
+	udpkp->outDatagrams.value.ui64 = udp_mib.udpHCOutDatagrams;
+	udpkp->outErrors.value.ui32 =	udp_mib.udpOutErrors;
+	netstack_rele(ns);
+	return (0);
+}
--- a/usr/src/uts/common/inet/udp_impl.h	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/common/inet/udp_impl.h	Mon Jul 19 17:27:45 2010 -0700
@@ -86,8 +86,34 @@
 	kstat_named_t	udp_out_mapped;
 	kstat_named_t	udp_out_ipv4;
 #endif
+} udp_stat_t;
 
-} udp_stat_t;
+/*
+ * This struct contains only the counter part of udp_stat_t.  It is used
+ * in udp_stats_cpu_t instead of udp_stat_t to save memory space.
+ */
+typedef struct {
+	uint64_t	udp_sock_fallback;
+	uint64_t	udp_out_opt;
+	uint64_t	udp_out_err_notconn;
+	uint64_t	udp_out_err_output;
+	uint64_t	udp_out_err_tudr;
+#ifdef DEBUG
+	uint64_t	udp_data_conn;
+	uint64_t	udp_data_notconn;
+	uint64_t	udp_out_lastdst;
+	uint64_t	udp_out_diffdst;
+	uint64_t	udp_out_ipv6;
+	uint64_t	udp_out_mapped;
+	uint64_t	udp_out_ipv4;
+#endif
+} udp_stat_counter_t;
+
+/* Per CPU stats: UDP MIB2 and UDP kstat. */
+typedef struct {
+	mib2_udp_t		udp_sc_mib;
+	udp_stat_counter_t	udp_sc_stats;
+} udp_stats_cpu_t;
 
 #define	UDP_NUM_EPRIV_PORTS	64
 
@@ -118,9 +144,6 @@
 
 	kstat_t		*us_mibkp;	/* kstats exporting mib data */
 	kstat_t		*us_kstat;
-	udp_stat_t	us_statistics;
-
-	mib2_udp_t	us_udp_mib;	/* SNMP fixed size info */
 
 /*
  * The smallest anonymous port in the priviledged port range which UDP
@@ -129,6 +152,9 @@
 	in_port_t	us_min_anonpriv_port;
 
 	ldi_ident_t	us_ldi_ident;
+
+	udp_stats_cpu_t	**us_sc;
+	int		us_sc_cnt;
 };
 
 typedef struct udp_stack udp_stack_t;
@@ -194,9 +220,12 @@
 #define	us_pmtu_discovery		us_propinfo_tbl[11].prop_cur_bval
 #define	us_sendto_ignerr		us_propinfo_tbl[12].prop_cur_bval
 
-#define	UDP_STAT(us, x)		((us)->us_statistics.x.value.ui64++)
+#define	UDPS_BUMP_MIB(us, x)	\
+	BUMP_MIB(&(us)->us_sc[CPU->cpu_seqid]->udp_sc_mib, x)
+
+#define	UDP_STAT(us, x)		((us)->us_sc[CPU->cpu_seqid]->udp_sc_stats.x++)
 #define	UDP_STAT_UPDATE(us, x, n)	\
-			((us)->us_statistics.x.value.ui64 += (n))
+	((us)->us->sc[CPU->cpu_seqid]->udp_sc_stats.x.value.ui64 += (n))
 #ifdef DEBUG
 #define	UDP_DBGSTAT(us, x)	UDP_STAT(us, x)
 #else
@@ -215,6 +244,13 @@
 		    socklen_t addrlen);
 extern void	udp_wput(queue_t *, mblk_t *);
 
+extern void	*udp_kstat_init(netstackid_t stackid);
+extern void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
+extern void	*udp_kstat2_init(netstackid_t stackid);
+extern void	udp_kstat2_fini(netstackid_t, kstat_t *);
+
+extern void	udp_stack_cpu_add(udp_stack_t *, processorid_t);
+
 /*
  * Object to represent database of options to search passed to
  * {sock,tpi}optcom_req() interface routine to take care of option
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64	Mon Jul 19 17:27:45 2010 -0700
@@ -215,14 +215,18 @@
 sctp_asconf_dispatch_tbl
 sctp_conn_cache
 sctp_conn_hash_size
+sctp_do_reclaim
 sctp_kmem_faddr_cache
 sctp_kmem_ftsn_set_cache
 sctp_kmem_set_cache
+sctp_min_assoc_listener
 sctp_opt_arr
 sctp_opt_arr_size
+sctp_pa_early_abort
+sctp_pp_early_abort
 sctp_propinfo_tbl
 sctp_propinfo_count
-sctp_recvq_tq_task_max
+sctp_recvq_tq_list_max
 sctp_recvq_tq_task_min
 sctp_recvq_tq_thr_max
 sctp_recvq_tq_thr_min
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64	Mon Jul 19 17:27:45 2010 -0700
@@ -213,14 +213,18 @@
 sctp_asconf_dispatch_tbl
 sctp_conn_cache
 sctp_conn_hash_size
+sctp_do_reclaim
 sctp_kmem_faddr_cache
 sctp_kmem_ftsn_set_cache
 sctp_kmem_set_cache
+sctp_min_assoc_listener
 sctp_opt_arr
 sctp_opt_arr_size
+sctp_pa_early_abort
+sctp_pp_early_abort
 sctp_propinfo_tbl
 sctp_propinfo_count
-sctp_recvq_tq_task_max
+sctp_recvq_tq_list_max
 sctp_recvq_tq_task_min
 sctp_recvq_tq_thr_max
 sctp_recvq_tq_thr_min
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64	Mon Jul 19 17:27:45 2010 -0700
@@ -215,14 +215,18 @@
 sctp_asconf_dispatch_tbl
 sctp_conn_cache
 sctp_conn_hash_size
+sctp_do_reclaim
 sctp_kmem_faddr_cache
 sctp_kmem_ftsn_set_cache
 sctp_kmem_set_cache
+sctp_min_assoc_listener
 sctp_opt_arr
 sctp_opt_arr_size
+sctp_pa_early_abort
+sctp_pp_early_abort
 sctp_propinfo_tbl
 sctp_propinfo_count
-sctp_recvq_tq_task_max
+sctp_recvq_tq_list_max
 sctp_recvq_tq_task_min
 sctp_recvq_tq_thr_max
 sctp_recvq_tq_thr_min
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64	Tue Jul 20 14:28:29 2010 +0800
+++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64	Mon Jul 19 17:27:45 2010 -0700
@@ -213,14 +213,18 @@
 sctp_asconf_dispatch_tbl
 sctp_conn_cache
 sctp_conn_hash_size
+sctp_do_reclaim
 sctp_kmem_faddr_cache
 sctp_kmem_ftsn_set_cache
 sctp_kmem_set_cache
+sctp_min_assoc_listener
 sctp_opt_arr
 sctp_opt_arr_size
+sctp_pa_early_abort
+sctp_pp_early_abort
 sctp_propinfo_tbl
 sctp_propinfo_count
-sctp_recvq_tq_task_max
+sctp_recvq_tq_list_max
 sctp_recvq_tq_task_min
 sctp_recvq_tq_thr_max
 sctp_recvq_tq_thr_min