Mercurial > illumos > illumos-gate
changeset 12881:fb36eaeb6ee0
6910078 SCTP should have similar memory handling mechanism as TCP
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/sctp/sctp.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/cmd/mdb/common/modules/sctp/sctp.c Mon Jul 19 17:27:45 2010 -0700 @@ -1499,6 +1499,68 @@ wsp->walk_cbdata)); } +/* + * Initialization function for the per CPU SCTP stats counter walker of a given + * SCTP stack. + */ +int +sctps_sc_walk_init(mdb_walk_state_t *wsp) +{ + sctp_stack_t sctps; + + if (wsp->walk_addr == NULL) + return (WALK_ERR); + + if (mdb_vread(&sctps, sizeof (sctps), wsp->walk_addr) == -1) { + mdb_warn("failed to read sctp_stack_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + if (sctps.sctps_sc_cnt == 0) + return (WALK_DONE); + + /* + * Store the sctp_stack_t pointer in walk_data. The stepping function + * used it to calculate if the end of the counter has reached. + */ + wsp->walk_data = (void *)wsp->walk_addr; + wsp->walk_addr = (uintptr_t)sctps.sctps_sc; + return (WALK_NEXT); +} + +/* + * Stepping function for the per CPU SCTP stats counterwalker. + */ +int +sctps_sc_walk_step(mdb_walk_state_t *wsp) +{ + int status; + sctp_stack_t sctps; + sctp_stats_cpu_t *stats; + char *next, *end; + + if (mdb_vread(&sctps, sizeof (sctps), (uintptr_t)wsp->walk_data) == + -1) { + mdb_warn("failed to read sctp_stack_t at %p", wsp->walk_addr); + return (WALK_ERR); + } + if (mdb_vread(&stats, sizeof (stats), wsp->walk_addr) == -1) { + mdb_warn("failed ot read sctp_stats_cpu_t at %p", + wsp->walk_addr); + return (WALK_ERR); + } + status = wsp->walk_callback((uintptr_t)stats, &stats, wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + + next = (char *)wsp->walk_addr + sizeof (sctp_stats_cpu_t *); + end = (char *)sctps.sctps_sc + sctps.sctps_sc_cnt * + sizeof (sctp_stats_cpu_t *); + if (next >= end) + return (WALK_DONE); + wsp->walk_addr = (uintptr_t)next; + return (WALK_NEXT); +} + static void sctp_help(void) { @@ -1522,6 +1584,7 @@ mdb_printf("\t-d\t Local and Peer addresses\n"); mdb_printf("\t-P\t Peer addresses\n"); } + static const mdb_dcmd_t dcmds[] = { { "sctp", ":[-afhoimrSFHpRCcedP]", "display sctp control structure", sctp, sctp_help }, @@ -1591,8 +1654,8 @@ sctp_stack_ill_walk_init, sctp_stack_ill_walk_step, NULL }, { "sctp_stack_walk_ipif", "walk the sctp_g_ipif list for one stack", sctp_stack_ipif_walk_init, sctp_stack_ipif_walk_step, NULL }, - { "sctp_stacks", "walk all the sctp_stack_t", - sctp_stacks_walk_init, sctp_stacks_walk_step, NULL }, + { "sctps_sc", "walk all the per CPU stats counters of a sctp_stack_t", + sctps_sc_walk_init, sctps_sc_walk_step, NULL }, { NULL } };
--- a/usr/src/uts/common/Makefile.files Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/Makefile.files Mon Jul 19 17:27:45 2010 -0700 @@ -528,14 +528,15 @@ IP_TCP_OBJS = tcp.o tcp_fusion.o tcp_opt_data.o tcp_sack.o tcp_stats.o \ tcp_misc.o tcp_timers.o tcp_time_wait.o tcp_tpi.o tcp_output.o \ tcp_input.o tcp_socket.o tcp_bind.o tcp_cluster.o tcp_tunables.o -IP_UDP_OBJS = udp.o udp_opt_data.o udp_tunables.o +IP_UDP_OBJS = udp.o udp_opt_data.o udp_tunables.o udp_stats.o IP_SCTP_OBJS = sctp.o sctp_opt_data.o sctp_output.o \ sctp_init.o sctp_input.o sctp_cookie.o \ sctp_conn.o sctp_error.o sctp_snmp.o \ sctp_tunables.o sctp_shutdown.o sctp_common.o \ sctp_timer.o sctp_heartbeat.o sctp_hash.o \ sctp_bind.o sctp_notify.o sctp_asconf.o \ - sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o + sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o \ + sctp_misc.o IP_ILB_OBJS = ilb.o ilb_nat.o ilb_conn.o ilb_alg_hash.o ilb_alg_rr.o IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o \
--- a/usr/src/uts/common/inet/ip/ip.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/ip/ip.c Mon Jul 19 17:27:45 2010 -0700 @@ -740,6 +740,8 @@ static void ipobs_init(ip_stack_t *); static void ipobs_fini(ip_stack_t *); +static int ip_tp_cpu_update(cpu_setup_t, int, void *); + ipaddr_t ip_g_all_ones = IP_HOST_MASK; static long ip_rput_pullups; @@ -4274,6 +4276,11 @@ void ip_ddi_destroy(void) { + /* This needs to be called before destroying any transports. */ + mutex_enter(&cpu_lock); + unregister_cpu_setup_func(ip_tp_cpu_update, NULL); + mutex_exit(&cpu_lock); + tnet_fini(); icmp_ddi_g_destroy(); @@ -4531,6 +4538,11 @@ rts_ddi_g_init(); icmp_ddi_g_init(); ilb_ddi_g_init(); + + /* This needs to be called after all transports are initialized. */ + mutex_enter(&cpu_lock); + register_cpu_setup_func(ip_tp_cpu_update, NULL); + mutex_exit(&cpu_lock); } /* @@ -14152,7 +14164,7 @@ * Drop the packet here if the sctp checksum failed. */ if (iraflags & IRAF_SCTP_CSUM_ERR) { - BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); + SCTPS_BUMP_MIB(sctps, sctpChecksumError); freemsg(mp); return; } @@ -15088,3 +15100,45 @@ *v4srcp)); return (B_FALSE); } + +/* + * Transport protocol call back function for CPU state change. + */ +/* ARGSUSED */ +static int +ip_tp_cpu_update(cpu_setup_t what, int id, void *arg) +{ + processorid_t cpu_seqid; + netstack_handle_t nh; + netstack_t *ns; + + ASSERT(MUTEX_HELD(&cpu_lock)); + cpu_seqid = cpu[id]->cpu_seqid; + + switch (what) { + case CPU_CONFIG: + case CPU_ON: + case CPU_INIT: + case CPU_CPUPART_IN: + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + tcp_stack_cpu_add(ns->netstack_tcp, cpu_seqid); + sctp_stack_cpu_add(ns->netstack_sctp, cpu_seqid); + udp_stack_cpu_add(ns->netstack_udp, cpu_seqid); + netstack_rele(ns); + } + netstack_next_fini(&nh); + break; + case CPU_UNCONFIG: + case CPU_OFF: + case CPU_CPUPART_OUT: + /* + * Nothing to do. We don't remove the per CPU stats from + * the IP stack even when the CPU goes offline. + */ + break; + default: + break; + } + return (0); +}
--- a/usr/src/uts/common/inet/sctp/sctp.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp.c Mon Jul 19 17:27:45 2010 -0700 @@ -109,13 +109,13 @@ /* /etc/system variables */ /* The minimum number of threads for each taskq. */ -int sctp_recvq_tq_thr_min = 4; +int sctp_recvq_tq_thr_min = 4; /* The maximum number of threads for each taskq. */ -int sctp_recvq_tq_thr_max = 16; -/* The minimum number of tasks for each taskq. */ -int sctp_recvq_tq_task_min = 5; -/* The maxiimum number of tasks for each taskq. */ -int sctp_recvq_tq_task_max = 50; +int sctp_recvq_tq_thr_max = 48; +/* The mnimum number of tasks for each taskq. */ +int sctp_recvq_tq_task_min = 8; +/* Default value of sctp_recvq_tq_list_max_sz. */ +int sctp_recvq_tq_list_max = 16; /* * SCTP tunables related declarations. Definitions are in sctp_tunables.c @@ -225,6 +225,9 @@ */ SCTP_LINK(sctp, sctps); + /* If the listener has a limit, inherit the counter info. */ + sctp->sctp_listen_cnt = psctp->sctp_listen_cnt; + return (sctp); } @@ -374,7 +377,7 @@ sctp->sctp_running = B_FALSE; while (sctp->sctp_state >= SCTPS_ESTABLISHED && sctp->sctp_client_errno == 0) { - cv_broadcast(&sctp->sctp_cv); + cv_signal(&sctp->sctp_cv); ret = cv_timedwait_sig(&sctp->sctp_cv, &sctp->sctp_lock, stoptime); if (ret < 0) { @@ -457,6 +460,13 @@ mblk_t *mp; conn_t *connp = sctp->sctp_connp; + /* The counter is incremented only for established associations. */ + if (sctp->sctp_state >= SCTPS_ESTABLISHED) + SCTPS_ASSOC_DEC(sctp->sctp_sctps); + + if (sctp->sctp_listen_cnt != NULL) + SCTP_DECR_LISTEN_CNT(sctp); + /* Sanity check, don't do the same thing twice. */ if (connp->conn_state_flags & CONN_CLOSING) { ASSERT(sctp->sctp_state == SCTPS_IDLE); @@ -717,19 +727,17 @@ bzero(&sctp->sctp_bits, sizeof (sctp->sctp_bits)); /* It is time to update the global statistics. */ - UPDATE_MIB(&sctps->sctps_mib, sctpOutSCTPPkts, sctp->sctp_opkts); - UPDATE_MIB(&sctps->sctps_mib, sctpOutCtrlChunks, sctp->sctp_obchunks); - UPDATE_MIB(&sctps->sctps_mib, sctpOutOrderChunks, sctp->sctp_odchunks); - UPDATE_MIB(&sctps->sctps_mib, - sctpOutUnorderChunks, sctp->sctp_oudchunks); - UPDATE_MIB(&sctps->sctps_mib, sctpRetransChunks, sctp->sctp_rxtchunks); - UPDATE_MIB(&sctps->sctps_mib, sctpInSCTPPkts, sctp->sctp_ipkts); - UPDATE_MIB(&sctps->sctps_mib, sctpInCtrlChunks, sctp->sctp_ibchunks); - UPDATE_MIB(&sctps->sctps_mib, sctpInOrderChunks, sctp->sctp_idchunks); - UPDATE_MIB(&sctps->sctps_mib, - sctpInUnorderChunks, sctp->sctp_iudchunks); - UPDATE_MIB(&sctps->sctps_mib, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); - UPDATE_MIB(&sctps->sctps_mib, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); + SCTPS_UPDATE_MIB(sctps, sctpOutSCTPPkts, sctp->sctp_opkts); + SCTPS_UPDATE_MIB(sctps, sctpOutCtrlChunks, sctp->sctp_obchunks); + SCTPS_UPDATE_MIB(sctps, sctpOutOrderChunks, sctp->sctp_odchunks); + SCTPS_UPDATE_MIB(sctps, sctpOutUnorderChunks, sctp->sctp_oudchunks); + SCTPS_UPDATE_MIB(sctps, sctpRetransChunks, sctp->sctp_rxtchunks); + SCTPS_UPDATE_MIB(sctps, sctpInSCTPPkts, sctp->sctp_ipkts); + SCTPS_UPDATE_MIB(sctps, sctpInCtrlChunks, sctp->sctp_ibchunks); + SCTPS_UPDATE_MIB(sctps, sctpInOrderChunks, sctp->sctp_idchunks); + SCTPS_UPDATE_MIB(sctps, sctpInUnorderChunks, sctp->sctp_iudchunks); + SCTPS_UPDATE_MIB(sctps, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); + SCTPS_UPDATE_MIB(sctps, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); sctp->sctp_opkts = 0; sctp->sctp_obchunks = 0; sctp->sctp_odchunks = 0; @@ -766,64 +774,6 @@ kmem_cache_free(sctp_conn_cache, connp); } -/* Diagnostic routine used to return a string associated with the sctp state. */ -char * -sctp_display(sctp_t *sctp, char *sup_buf) -{ - char *buf; - char buf1[30]; - static char priv_buf[INET6_ADDRSTRLEN * 2 + 80]; - char *cp; - conn_t *connp; - - if (sctp == NULL) - return ("NULL_SCTP"); - - connp = sctp->sctp_connp; - buf = (sup_buf != NULL) ? sup_buf : priv_buf; - - switch (sctp->sctp_state) { - case SCTPS_IDLE: - cp = "SCTP_IDLE"; - break; - case SCTPS_BOUND: - cp = "SCTP_BOUND"; - break; - case SCTPS_LISTEN: - cp = "SCTP_LISTEN"; - break; - case SCTPS_COOKIE_WAIT: - cp = "SCTP_COOKIE_WAIT"; - break; - case SCTPS_COOKIE_ECHOED: - cp = "SCTP_COOKIE_ECHOED"; - break; - case SCTPS_ESTABLISHED: - cp = "SCTP_ESTABLISHED"; - break; - case SCTPS_SHUTDOWN_PENDING: - cp = "SCTP_SHUTDOWN_PENDING"; - break; - case SCTPS_SHUTDOWN_SENT: - cp = "SCTPS_SHUTDOWN_SENT"; - break; - case SCTPS_SHUTDOWN_RECEIVED: - cp = "SCTPS_SHUTDOWN_RECEIVED"; - break; - case SCTPS_SHUTDOWN_ACK_SENT: - cp = "SCTPS_SHUTDOWN_ACK_SENT"; - break; - default: - (void) mi_sprintf(buf1, "SCTPUnkState(%d)", sctp->sctp_state); - cp = buf1; - break; - } - (void) mi_sprintf(buf, "[%u, %u] %s", - ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp); - - return (buf); -} - /* * Initialize protocol control block. If a parent exists, inherit * all values set through setsockopt(). @@ -1208,7 +1158,7 @@ if (!sctp_icmp_verf(sctp, sctph, mp)) { break; } - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, ECONNREFUSED); @@ -1315,7 +1265,7 @@ } if (sctp->sctp_state == SCTPS_COOKIE_WAIT || sctp->sctp_state == SCTPS_COOKIE_ECHOED) { - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, ECONNREFUSED); @@ -1344,7 +1294,7 @@ break; } if (sctp->sctp_state == SCTPS_COOKIE_WAIT) { - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, ECONNREFUSED); @@ -1386,13 +1336,18 @@ sctps = psctp->sctp_sctps; /* Increase here to have common decrease at end */ netstack_hold(sctps->sctps_netstack); + ASSERT(sctps->sctps_recvq_tq_list_cur_sz > 0); } else { netstack_t *ns; ns = netstack_find_by_cred(credp); - ASSERT(ns != NULL); sctps = ns->netstack_sctp; - ASSERT(sctps != NULL); + /* + * Check if the receive queue taskq for this sctp_stack_t has + * been set up. + */ + if (sctps->sctps_recvq_tq_list_cur_sz == 0) + sctp_rq_tq_init(sctps); /* * For exclusive stacks we set the zoneid to zero @@ -1570,6 +1525,7 @@ { sctp_stack_t *sctps; size_t arrsz; + int i; sctps = kmem_zalloc(sizeof (*sctps), KM_SLEEP); sctps->sctps_netstack = ns; @@ -1589,9 +1545,6 @@ KM_SLEEP); bcopy(sctp_propinfo_tbl, sctps->sctps_propinfo_tbl, arrsz); - /* Initialize the recvq taskq. */ - sctp_rq_tq_init(sctps); - /* saddr init */ sctp_saddr_init(sctps); @@ -1599,10 +1552,29 @@ list_create(&sctps->sctps_g_list, sizeof (sctp_t), offsetof(sctp_t, sctp_list)); - /* Initialize sctp kernel stats. */ + /* Initialize SCTP kstats. */ sctps->sctps_mibkp = sctp_kstat_init(stackid); - sctps->sctps_kstat = - sctp_kstat2_init(stackid, &sctps->sctps_statistics); + sctps->sctps_kstat = sctp_kstat2_init(stackid); + + mutex_init(&sctps->sctps_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); + sctps->sctps_reclaim = B_FALSE; + sctps->sctps_reclaim_tid = 0; + sctps->sctps_reclaim_period = sctps->sctps_rto_maxg; + + /* Allocate the per netstack stats */ + mutex_enter(&cpu_lock); + sctps->sctps_sc_cnt = MAX(ncpus, boot_ncpus); + mutex_exit(&cpu_lock); + sctps->sctps_sc = kmem_zalloc(max_ncpus * sizeof (sctp_stats_cpu_t *), + KM_SLEEP); + for (i = 0; i < sctps->sctps_sc_cnt; i++) { + sctps->sctps_sc[i] = kmem_zalloc(sizeof (sctp_stats_cpu_t), + KM_SLEEP); + } + + mutex_init(&sctps->sctps_listener_conf_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&sctps->sctps_listener_conf, sizeof (sctp_listener_t), + offsetof(sctp_listener_t, sl_link)); return (sctps); } @@ -1635,6 +1607,20 @@ sctp_stack_fini(netstackid_t stackid, void *arg) { sctp_stack_t *sctps = (sctp_stack_t *)arg; + int i; + + /* + * Set sctps_reclaim to false tells sctp_reclaim_timer() not to restart + * the timer. + */ + mutex_enter(&sctps->sctps_reclaim_lock); + sctps->sctps_reclaim = B_FALSE; + mutex_exit(&sctps->sctps_reclaim_lock); + if (sctps->sctps_reclaim_tid != 0) + (void) untimeout(sctps->sctps_reclaim_tid); + mutex_destroy(&sctps->sctps_reclaim_lock); + + sctp_listener_conf_cleanup(sctps); kmem_free(sctps->sctps_propinfo_tbl, sctp_propinfo_count * sizeof (mod_prop_info_t)); @@ -1653,12 +1639,14 @@ sctp_hash_destroy(sctps); /* Destroy SCTP kernel stats. */ - sctp_kstat2_fini(stackid, sctps->sctps_kstat); - sctps->sctps_kstat = NULL; - bzero(&sctps->sctps_statistics, sizeof (sctps->sctps_statistics)); + for (i = 0; i < sctps->sctps_sc_cnt; i++) + kmem_free(sctps->sctps_sc[i], sizeof (sctp_stats_cpu_t)); + kmem_free(sctps->sctps_sc, max_ncpus * sizeof (sctp_stats_cpu_t *)); sctp_kstat_fini(stackid, sctps->sctps_mibkp); sctps->sctps_mibkp = NULL; + sctp_kstat2_fini(stackid, sctps->sctps_kstat); + sctps->sctps_kstat = NULL; mutex_destroy(&sctps->sctps_g_lock); mutex_destroy(&sctps->sctps_epriv_port_lock); @@ -1666,26 +1654,30 @@ kmem_free(sctps, sizeof (*sctps)); } -void -sctp_display_all(sctp_stack_t *sctps) -{ - sctp_t *sctp_walker; - - mutex_enter(&sctps->sctps_g_lock); - for (sctp_walker = list_head(&sctps->sctps_g_list); - sctp_walker != NULL; - sctp_walker = (sctp_t *)list_next(&sctps->sctps_g_list, - sctp_walker)) { - (void) sctp_display(sctp_walker, NULL); - } - mutex_exit(&sctps->sctps_g_lock); -} - static void sctp_rq_tq_init(sctp_stack_t *sctps) { - sctps->sctps_recvq_tq_list_max_sz = 16; + char tq_name[TASKQ_NAMELEN]; + int thrs; + int max_tasks; + + thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, + MAX(ncpus, boot_ncpus))); + /* + * Make sure that the maximum number of tasks is at least thrice as + * large as the number of threads. + */ + max_tasks = MAX(sctp_recvq_tq_task_min, thrs) * 3; + + /* + * This helps differentiate the default taskqs in different IP stacks. + */ + (void) snprintf(tq_name, sizeof (tq_name), "sctp_def_rq_taskq_%d", + sctps->sctps_netstack->netstack_stackid); + + sctps->sctps_recvq_tq_list_max_sz = sctp_recvq_tq_list_max; sctps->sctps_recvq_tq_list_cur_sz = 1; + /* * Initialize the recvq_tq_list and create the first recvq taskq. * What to do if it fails? @@ -1693,10 +1685,8 @@ sctps->sctps_recvq_tq_list = kmem_zalloc(sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *), KM_SLEEP); - sctps->sctps_recvq_tq_list[0] = taskq_create("sctp_def_recvq_taskq", - MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, ncpus)), - minclsyspri, sctp_recvq_tq_task_min, sctp_recvq_tq_task_max, - TASKQ_PREPOPULATE); + sctps->sctps_recvq_tq_list[0] = taskq_create(tq_name, thrs, + minclsyspri, sctp_recvq_tq_task_min, max_tasks, TASKQ_PREPOPULATE); mutex_init(&sctps->sctps_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL); } @@ -1705,6 +1695,9 @@ { int i; + if (sctps->sctps_recvq_tq_list_cur_sz == 0) + return; + for (i = 0; i < sctps->sctps_recvq_tq_list_cur_sz; i++) { ASSERT(sctps->sctps_recvq_tq_list[i] != NULL); taskq_destroy(sctps->sctps_recvq_tq_list[i]); @@ -1720,6 +1713,16 @@ { taskq_t *tq; char tq_name[TASKQ_NAMELEN]; + int thrs; + int max_tasks; + + thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, + MAX(ncpus, boot_ncpus))); + /* + * Make sure that the maximum number of tasks is at least thrice as + * large as the number of threads. + */ + max_tasks = MAX(sctp_recvq_tq_task_min, thrs) * 3; mutex_enter(&sctps->sctps_rq_tq_lock); if (sctps->sctps_recvq_tq_list_cur_sz + 1 > @@ -1729,12 +1732,11 @@ return; } - (void) snprintf(tq_name, sizeof (tq_name), "sctp_recvq_taskq_%u", + (void) snprintf(tq_name, sizeof (tq_name), "sctp_rq_taskq_%d_%u", + sctps->sctps_netstack->netstack_stackid, sctps->sctps_recvq_tq_list_cur_sz); - tq = taskq_create(tq_name, - MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, ncpus)), - minclsyspri, sctp_recvq_tq_task_min, sctp_recvq_tq_task_max, - TASKQ_PREPOPULATE); + tq = taskq_create(tq_name, thrs, minclsyspri, sctp_recvq_tq_task_min, + max_tasks, TASKQ_PREPOPULATE); if (tq == NULL) { mutex_exit(&sctps->sctps_rq_tq_lock); cmn_err(CE_NOTE, "SCTP recvq taskq creation failed"); @@ -2072,7 +2074,7 @@ { sctp_conn_cache = kmem_cache_create("sctp_conn_cache", sizeof (sctp_t) + sizeof (conn_t), 0, sctp_conn_cache_constructor, - sctp_conn_cache_destructor, NULL, NULL, NULL, 0); + sctp_conn_cache_destructor, sctp_conn_reclaim, NULL, NULL, 0); } static void
--- a/usr/src/uts/common/inet/sctp/sctp_asconf.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_asconf.c Mon Jul 19 17:27:45 2010 -0700 @@ -870,7 +870,7 @@ /* Retransmission */ if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) { /* time to give up */ - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, ETIMEDOUT); return;
--- a/usr/src/uts/common/inet/sctp/sctp_bind.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_bind.c Mon Jul 19 17:27:45 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -50,6 +49,12 @@ #include "sctp_addr.h" /* + * Minimum number of associations which can be created per listener. Used + * when the listener association count is in effect. + */ +static uint32_t sctp_min_assoc_listener = 2; + +/* * Returns 0 on success, EACCES on permission failure. */ static int @@ -160,9 +165,54 @@ (void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN); sctp->sctp_last_secret_update = ddi_get_lbolt64(); bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN); + + /* + * If there is an association limit, allocate and initialize + * the counter struct. Note that since listen can be called + * multiple times, the struct may have been allready allocated. + */ + if (!list_is_empty(&sctps->sctps_listener_conf) && + sctp->sctp_listen_cnt == NULL) { + sctp_listen_cnt_t *slc; + uint32_t ratio; + + ratio = sctp_find_listener_conf(sctps, + ntohs(connp->conn_lport)); + if (ratio != 0) { + uint32_t mem_ratio, tot_buf; + + slc = kmem_alloc(sizeof (sctp_listen_cnt_t), KM_SLEEP); + /* + * Calculate the connection limit based on + * the configured ratio and maxusers. Maxusers + * are calculated based on memory size, + * ~ 1 user per MB. Note that the conn_rcvbuf + * and conn_sndbuf may change after a + * connection is accepted. So what we have + * is only an approximation. + */ + if ((tot_buf = connp->conn_rcvbuf + + connp->conn_sndbuf) < MB) { + mem_ratio = MB / tot_buf; + slc->slc_max = maxusers / ratio * mem_ratio; + } else { + mem_ratio = tot_buf / MB; + slc->slc_max = maxusers / ratio / mem_ratio; + } + /* At least we should allow some associations! */ + if (slc->slc_max < sctp_min_assoc_listener) + slc->slc_max = sctp_min_assoc_listener; + slc->slc_cnt = 1; + slc->slc_drop = 0; + sctp->sctp_listen_cnt = slc; + } + } + + tf = &sctps->sctps_listen_fanout[SCTP_LISTEN_HASH( ntohs(connp->conn_lport))]; sctp_listen_hash_insert(tf, sctp); + WAKE_SCTP(sctp); return (0); }
--- a/usr/src/uts/common/inet/sctp/sctp_common.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_common.c Mon Jul 19 17:27:45 2010 -0700 @@ -618,10 +618,19 @@ void sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) { - int64_t now = ddi_get_lbolt64(); + int64_t now = LBOLT_FASTPATH64; + /* + * If we are under memory pressure, we abort association waiting + * in zero window probing state for too long. We do this by not + * resetting sctp_strikes. So if sctp_zero_win_probe continues + * while under memory pressure, this association will eventually + * time out. + */ + if (!sctp->sctp_zero_win_probe || !sctp->sctp_sctps->sctps_reclaim) { + sctp->sctp_strikes = 0; + } fp->strikes = 0; - sctp->sctp_strikes = 0; fp->lastactive = now; fp->hb_expiry = now + SET_HB_INTVL(fp); fp->hb_pending = B_FALSE; @@ -646,18 +655,22 @@ } } -int +/* + * Return B_TRUE if there is still an active peer address with zero strikes; + * otherwise rturn B_FALSE. + */ +boolean_t sctp_is_a_faddr_clean(sctp_t *sctp) { sctp_faddr_t *fp; for (fp = sctp->sctp_faddrs; fp; fp = fp->next) { if (fp->state == SCTP_FADDRS_ALIVE && fp->strikes == 0) { - return (1); + return (B_TRUE); } } - return (0); + return (B_FALSE); } /* @@ -723,7 +736,7 @@ /* All faddrs are down; kill the association */ dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n")); - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ? SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ?
--- a/usr/src/uts/common/inet/sctp/sctp_conn.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_conn.c Mon Jul 19 17:27:45 2010 -0700 @@ -63,7 +63,7 @@ uint_t sctp_options; conn_t *aconnp; conn_t *lconnp; - sctp_stack_t *sctps = listener->sctp_sctps; + sctp_stack_t *sctps = listener->sctp_sctps; sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); ASSERT(OK_32PTR(sctph)); @@ -127,15 +127,8 @@ sctp_bind_hash_insert(&sctps->sctps_bind_fanout[ SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0); - /* - * No need to check for multicast destination since ip will only pass - * up multicasts to those that have expressed interest - * TODO: what about rejecting broadcasts? - * Also check that source is not a multicast or broadcast address. - */ - /* XXXSCTP */ - acceptor->sctp_state = SCTPS_ESTABLISHED; - acceptor->sctp_assoc_start_time = (uint32_t)ddi_get_lbolt(); + SCTP_ASSOC_EST(sctps, acceptor); + /* * listener->sctp_rwnd should be the default window size or a * window size changed via SO_RCVBUF option. @@ -163,6 +156,8 @@ pid_t cpid; in6_addr_t faddr, laddr; ip_xmit_attr_t *ixa; + sctp_listen_cnt_t *slc = sctp->sctp_listen_cnt; + boolean_t slc_set = B_FALSE; /* * No need to check for duplicate as this is the listener @@ -173,19 +168,48 @@ */ ASSERT(OK_32PTR(mp->b_rptr)); + connp = sctp->sctp_connp; + sctps = sctp->sctp_sctps; + + /* + * Enforce the limit set on the number of connections per listener. + * Note that tlc_cnt starts with 1. So need to add 1 to tlc_max + * for comparison. + */ + if (slc != NULL) { + int64_t now; + + if (atomic_add_32_nv(&slc->slc_cnt, 1) > slc->slc_max + 1) { + now = ddi_get_lbolt64(); + atomic_add_32(&slc->slc_cnt, -1); + SCTP_KSTAT(sctps, sctp_listen_cnt_drop); + slc->slc_drop++; + if (now - slc->slc_report_time > + MSEC_TO_TICK(SCTP_SLC_REPORT_INTERVAL)) { + zcmn_err(connp->conn_zoneid, CE_WARN, + "SCTP listener (port %d) association max " + "(%u) reached: %u attempts dropped total\n", + ntohs(connp->conn_lport), + slc->slc_max, slc->slc_drop); + slc->slc_report_time = now; + } + return (NULL); + } + slc_set = B_TRUE; + } + if ((eager = sctp_create_eager(sctp)) == NULL) { + if (slc_set) + atomic_add_32(&slc->slc_cnt, -1); return (NULL); } - - connp = sctp->sctp_connp; - sctps = sctp->sctp_sctps; econnp = eager->sctp_connp; if (connp->conn_policy != NULL) { /* Inherit the policy from the listener; use actions from ira */ if (!ip_ipsec_policy_inherit(econnp, connp, ira)) { sctp_close_eager(eager); - BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); + SCTPS_BUMP_MIB(sctps, sctpListenDrop); return (NULL); } } @@ -217,7 +241,7 @@ if (ipsec_conn_cache_policy(econnp, (ira->ira_flags & IRAF_IS_IPV4) != 0) != 0) { sctp_close_eager(eager); - BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); + SCTPS_BUMP_MIB(sctps, sctpListenDrop); return (NULL); } @@ -261,7 +285,7 @@ err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); if (err != 0) { sctp_close_eager(eager); - BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); + SCTPS_BUMP_MIB(sctps, sctpListenDrop); return (NULL); } @@ -301,7 +325,7 @@ if (flist != NULL) kmem_free(flist, fsize); sctp_close_eager(eager); - BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); + SCTPS_BUMP_MIB(sctps, sctpListenDrop); SCTP_KSTAT(sctps, sctp_cl_connect); return (NULL); } @@ -319,7 +343,7 @@ (sock_lower_handle_t)eager, NULL, cr, cpid, &eager->sctp_upcalls)) == NULL) { sctp_close_eager(eager); - BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); + SCTPS_BUMP_MIB(sctps, sctpListenDrop); return (NULL); } ASSERT(SCTP_IS_DETACHED(eager));
--- a/usr/src/uts/common/inet/sctp/sctp_cookie.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_cookie.c Mon Jul 19 17:27:45 2010 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -749,7 +749,7 @@ /* timestamp */ now = (int64_t *)(cookieph + 1); - nowt = ddi_get_lbolt64(); + nowt = LBOLT_FASTPATH64; bcopy(&nowt, now, sizeof (*now)); /* cookie lifetime -- need configuration */ @@ -951,7 +951,7 @@ cph = NULL; if (validate_init_params(sctp, iackch, iack, iackmp, &cph, &errmp, &pad, &sctp_options, ira) == 0) { /* result in 'pad' ignored */ - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, ECONNABORTED); return; @@ -1292,7 +1292,7 @@ * So it is lbolt64 - (ts + *lt). If it is positive, it means * that the Cookie has expired. */ - diff = ddi_get_lbolt64() - (ts + *lt); + diff = LBOLT_FASTPATH64 - (ts + *lt); if (diff > 0 && (init->sic_inittag != sctp->sctp_fvtag || iack->sic_inittag != sctp->sctp_lvtag)) { uint32_t staleness; @@ -1354,11 +1354,8 @@ sctp->sctp_frwnd = ntohl(init->sic_a_rwnd); sctp->sctp_fcsn = sctp->sctp_lastacked; - if (sctp->sctp_state < SCTPS_ESTABLISHED) { - sctp->sctp_state = SCTPS_ESTABLISHED; - sctp->sctp_assoc_start_time = - (uint32_t)ddi_get_lbolt(); - } + if (sctp->sctp_state < SCTPS_ESTABLISHED) + SCTP_ASSOC_EST(sctps, sctp); dprint(1, ("sctp peer %x:%x:%x:%x (%d) restarted\n", SCTP_PRINTADDR(sctp->sctp_current->faddr), @@ -1384,9 +1381,7 @@ if (sctp->sctp_state < SCTPS_ESTABLISHED) { if (!sctp_initialize_params(sctp, init, iack)) return (-1); /* Drop? */ - sctp->sctp_state = SCTPS_ESTABLISHED; - sctp->sctp_assoc_start_time = - (uint32_t)ddi_get_lbolt(); + SCTP_ASSOC_EST(sctps, sctp); } dprint(1, ("init collision with %x:%x:%x:%x (%d)\n", @@ -1416,9 +1411,7 @@ if (sctp->sctp_state < SCTPS_ESTABLISHED) { if (!sctp_initialize_params(sctp, init, iack)) return (-1); /* Drop? */ - sctp->sctp_state = SCTPS_ESTABLISHED; - sctp->sctp_assoc_start_time = - (uint32_t)ddi_get_lbolt(); + SCTP_ASSOC_EST(sctps, sctp); } return (0); } else {
--- a/usr/src/uts/common/inet/sctp/sctp_error.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_error.c Mon Jul 19 17:27:45 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -150,7 +149,7 @@ freemsg(mp); return; } - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); BUMP_LOCAL(sctp->sctp_opkts); BUMP_LOCAL(sctp->sctp_obchunks); @@ -282,7 +281,7 @@ ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr6_len; } - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); BUMP_LOCAL(sctp->sctp_obchunks); if (is_system_labeled() && ixa->ixa_tsl != NULL) { @@ -435,7 +434,7 @@ ixas.ixa_ipst = ipst; ixas.ixa_ifindex = 0; - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); if (is_system_labeled()) { ASSERT(ira->ira_tsl != NULL);
--- a/usr/src/uts/common/inet/sctp/sctp_heartbeat.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_heartbeat.c Mon Jul 19 17:27:45 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -190,7 +189,7 @@ fp->hb_pending = B_TRUE; BUMP_LOCAL(sctp->sctp_obchunks); - BUMP_MIB(&sctps->sctps_mib, sctpTimHeartBeatProbe); + SCTPS_BUMP_MIB(sctps, sctpTimHeartBeatProbe); sctp_set_iplen(sctp, hbmp, fp->ixa); (void) conn_ip_output(hbmp, fp->ixa);
--- a/usr/src/uts/common/inet/sctp/sctp_impl.h Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_impl.h Mon Jul 19 17:27:45 2010 -0700 @@ -31,6 +31,9 @@ #include <sys/list.h> #include <sys/strsun.h> #include <sys/zone.h> +#include <sys/cpuvar.h> +#include <sys/clock_impl.h> + #include <netinet/ip6.h> #include <inet/optcom.h> #include <inet/tunables.h> @@ -349,6 +352,80 @@ ((sctps)->sctps_conn_hash_size - 1)) /* + * Linked list struct to store SCTP listener association limit configuration + * per IP stack. The list is stored at sctps_listener_conf in sctp_stack_t. + * + * sl_port: the listener port of this limit configuration + * sl_ratio: the maximum amount of memory consumed by all concurrent SCTP + * connections created by a listener does not exceed 1/tl_ratio + * of the total system memory. Note that this is only an + * approximation. + * sl_link: linked list struct + */ +typedef struct sctp_listener_s { + in_port_t sl_port; + uint32_t sl_ratio; + list_node_t sl_link; +} sctp_listener_t; + +/* + * If there is a limit set on the number of association allowed per each + * listener, the following struct is used to store that counter. It keeps + * the number of SCTP association created by a listener. Note that this needs + * to be separated from the listener since the listener can go away before + * all the associations are gone. + * + * When the struct is allocated, slc_cnt is set to 1. When a new association + * is created by the listener, slc_cnt is incremented by 1. When an + * association created by the listener goes away, slc_count is decremented by + * 1. When the listener itself goes away, slc_cnt is decremented by one. + * The last association (or the listener) which decrements slc_cnt to zero + * frees the struct. + * + * slc_max is the maximum number of concurrent associations created from a + * listener. It is calculated when the sctp_listen_cnt_t is allocated. + * + * slc_report_time stores the time when cmn_err() is called to report that the + * max has been exceeeded. Report is done at most once every + * SCTP_SLC_REPORT_INTERVAL mins for a listener. + * + * slc_drop stores the number of connection attempt dropped because the + * limit has reached. + */ +typedef struct sctp_listen_cnt_s { + uint32_t slc_max; + uint32_t slc_cnt; + int64_t slc_report_time; + uint32_t slc_drop; +} sctp_listen_cnt_t; + +#define SCTP_SLC_REPORT_INTERVAL (30 * MINUTES) + +#define SCTP_DECR_LISTEN_CNT(sctp) \ +{ \ + ASSERT((sctp)->sctp_listen_cnt->slc_cnt > 0); \ + if (atomic_add_32_nv(&(sctp)->sctp_listen_cnt->slc_cnt, -1) == 0) \ + kmem_free((sctp)->sctp_listen_cnt, sizeof (sctp_listen_cnt_t));\ + (sctp)->sctp_listen_cnt = NULL; \ +} + +/* Increment and decrement the number of associations in sctp_stack_t. */ +#define SCTPS_ASSOC_INC(sctps) \ + atomic_inc_64( \ + (uint64_t *)&(sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_assoc_cnt) + +#define SCTPS_ASSOC_DEC(sctps) \ + atomic_dec_64( \ + (uint64_t *)&(sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_assoc_cnt) + +#define SCTP_ASSOC_EST(sctps, sctp) \ +{ \ + (sctp)->sctp_state = SCTPS_ESTABLISHED; \ + (sctp)->sctp_assoc_start_time = (uint32_t)LBOLT_FASTPATH64; \ + SCTPS_ASSOC_INC(sctps); \ +} + +/* * Bind hash array size and hash function. The size must be a power * of 2 and lport must be in host byte order. */ @@ -873,6 +950,9 @@ * user request for stats on this endpoint. */ int sctp_prev_maxrto; + + /* For association counting. */ + sctp_listen_cnt_t *sctp_listen_cnt; } sctp_t; #define SCTP_TXQ_LEN(sctp) ((sctp)->sctp_unsent + (sctp)->sctp_unacked) @@ -925,6 +1005,7 @@ extern void sctp_conn_init(conn_t *); extern sctp_t *sctp_conn_match(in6_addr_t **, uint32_t, in6_addr_t *, uint32_t, zoneid_t, iaflags_t, sctp_stack_t *); +extern void sctp_conn_reclaim(void *); extern sctp_t *sctp_conn_request(sctp_t *, mblk_t *, uint_t, uint_t, sctp_init_chunk_t *, ip_recv_attr_t *); extern uint32_t sctp_cumack(sctp_t *, uint32_t, mblk_t **); @@ -943,6 +1024,7 @@ extern void sctp_faddr_init(void); extern void sctp_fast_rexmit(sctp_t *); extern void sctp_fill_sack(sctp_t *, unsigned char *, int); +extern uint32_t sctp_find_listener_conf(sctp_stack_t *, in_port_t); extern void sctp_free_faddr_timers(sctp_t *); extern void sctp_free_ftsn_set(sctp_ftsn_set_t *); extern void sctp_free_msg(mblk_t *); @@ -978,17 +1060,18 @@ extern void sctp_intf_event(sctp_t *, in6_addr_t, int, int); extern void sctp_input_data(sctp_t *, mblk_t *, ip_recv_attr_t *); extern void sctp_instream_cleanup(sctp_t *, boolean_t); -extern int sctp_is_a_faddr_clean(sctp_t *); +extern boolean_t sctp_is_a_faddr_clean(sctp_t *); extern void *sctp_kstat_init(netstackid_t); extern void sctp_kstat_fini(netstackid_t, kstat_t *); -extern void *sctp_kstat2_init(netstackid_t, sctp_kstat_t *); +extern void *sctp_kstat2_init(netstackid_t); extern void sctp_kstat2_fini(netstackid_t, kstat_t *); extern ssize_t sctp_link_abort(mblk_t *, uint16_t, char *, size_t, int, boolean_t); extern void sctp_listen_hash_insert(sctp_tf_t *, sctp_t *); extern void sctp_listen_hash_remove(sctp_t *); +extern void sctp_listener_conf_cleanup(sctp_stack_t *); extern sctp_t *sctp_lookup(sctp_t *, in6_addr_t *, sctp_tf_t *, uint32_t *, int); extern sctp_faddr_t *sctp_lookup_faddr(sctp_t *, in6_addr_t *); @@ -1058,6 +1141,7 @@ extern void sctp_set_iplen(sctp_t *, mblk_t *, ip_xmit_attr_t *); extern void sctp_set_ulp_prop(sctp_t *); extern void sctp_ss_rexmit(sctp_t *); +extern void sctp_stack_cpu_add(sctp_stack_t *, processorid_t); extern size_t sctp_supaddr_param_len(sctp_t *); extern size_t sctp_supaddr_param(sctp_t *, uchar_t *);
--- a/usr/src/uts/common/inet/sctp/sctp_input.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_input.c Mon Jul 19 17:27:45 2010 -0700 @@ -1349,7 +1349,7 @@ /* We can no longer deliver anything up, but still need to handle it. */ if (SCTP_IS_DETACHED(sctp)) { - BUMP_MIB(&sctps->sctps_mib, sctpInClosed); + SCTPS_BUMP_MIB(sctps, sctpInClosed); can_deliver = B_FALSE; } @@ -1937,9 +1937,9 @@ (void *)sctp->sctp_lastdata, SCTP_PRINTADDR(sctp->sctp_lastdata->faddr))); - sctp->sctp_active = ddi_get_lbolt64(); - - BUMP_MIB(&sctps->sctps_mib, sctpOutAck); + sctp->sctp_active = LBOLT_FASTPATH64; + + SCTPS_BUMP_MIB(sctps, sctpOutAck); sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->ixa); (void) conn_ip_output(smp, sctp->sctp_lastdata->ixa); @@ -2124,7 +2124,7 @@ cum_ack_done: *first_unacked = mp; if (cumack_forward > 0) { - BUMP_MIB(&sctps->sctps_mib, sctpInAck); + SCTPS_BUMP_MIB(sctps, sctpInAck); if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) { sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd; } @@ -2143,7 +2143,7 @@ sctp->sctp_xmit_unacked = mp; } else { /* dup ack */ - BUMP_MIB(&sctps->sctps_mib, sctpInDupAck); + SCTPS_BUMP_MIB(sctps, sctpInDupAck); } sctp->sctp_lastack_rxd = tsn; if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd)) @@ -2298,7 +2298,7 @@ remaining = ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn); if (SCTP_IS_DETACHED(sctp)) { - BUMP_MIB(&sctps->sctps_mib, sctpInClosed); + SCTPS_BUMP_MIB(sctps, sctpInClosed); can_deliver = B_FALSE; } /* @@ -2543,7 +2543,7 @@ */ if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) || SEQ_GT(gapend, sctp->sctp_ltsn - 1)) { - BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent); + SCTPS_BUMP_MIB(sctps, sctpInAckUnsent); *trysend = -1; return (acked); } else if (SEQ_LT(gapend, gapstart) || @@ -2742,7 +2742,7 @@ return (0); if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) { - BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent); + SCTPS_BUMP_MIB(sctps, sctpInAckUnsent); /* Send an ABORT */ return (-1); } @@ -2768,7 +2768,7 @@ mp = sctp->sctp_xmit_head->b_cont; else mp = NULL; - BUMP_MIB(&sctps->sctps_mib, sctpInDupAck); + SCTPS_BUMP_MIB(sctps, sctpInDupAck); /* * If we were doing a zero win probe and the win * has now opened to at least MSS, re-transmit the @@ -2880,8 +2880,7 @@ sctp->sctp_xmit_head, mp1, &trysend, &fast_recovery, gapstart); if (trysend < 0) { - BUMP_MIB(&sctps->sctps_mib, - sctpInAckUnsent); + SCTPS_BUMP_MIB(sctps, sctpInAckUnsent); return (-1); } break; @@ -2898,7 +2897,7 @@ */ if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) || SEQ_GT(gapend, sctp->sctp_ltsn - 1)) { - BUMP_MIB(&sctps->sctps_mib, sctpInAckUnsent); + SCTPS_BUMP_MIB(sctps, sctpInAckUnsent); return (-1); } else if (SEQ_LT(gapend, gapstart) || SEQ_LEQ(gapstart, cumtsn)) { @@ -3409,8 +3408,8 @@ sctps = ipst->ips_netstack->netstack_sctp; - BUMP_MIB(&sctps->sctps_mib, sctpOutOfBlue); - BUMP_MIB(&sctps->sctps_mib, sctpInSCTPPkts); + SCTPS_BUMP_MIB(sctps, sctpOutOfBlue); + SCTPS_BUMP_MIB(sctps, sctpInSCTPPkts); if (mp->b_cont != NULL) { /* @@ -3578,7 +3577,7 @@ { sctp_stack_t *sctps = sctp->sctp_sctps; - BUMP_MIB(&sctps->sctps_mib, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpAborted); BUMP_LOCAL(sctp->sctp_ibchunks); sctp_assoc_event(sctp, SCTP_COMM_LOST, @@ -3753,7 +3752,7 @@ gotdata = 0; trysend = 0; - now = ddi_get_lbolt64(); + now = LBOLT_FASTPATH64; /* Process the chunks */ do { dprint(3, ("sctp_dispatch_rput: state=%d, chunk id=%d\n", @@ -3861,8 +3860,7 @@ BUMP_LOCAL(sctp->sctp_ibchunks); if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) { sctp_shutdown_complete(sctp); - BUMP_MIB(&sctps->sctps_mib, - sctpShutdowns); + SCTPS_BUMP_MIB(sctps, sctpShutdowns); sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0, NULL); sctp_clean_death(sctp, 0); @@ -3897,7 +3895,7 @@ sctp_adaptation_event(sctp); } } else { - BUMP_MIB(&sctps->sctps_mib, + SCTPS_BUMP_MIB(sctps, sctpInInvalidCookie); } break; @@ -3953,7 +3951,7 @@ if (sctp_process_cookie(sctp, ch, mp, &iack, sctph, &recv_adaptation, &peer_src, ira) == -1) { - BUMP_MIB(&sctps->sctps_mib, + SCTPS_BUMP_MIB(sctps, sctpInInvalidCookie); goto done; } @@ -3997,7 +3995,7 @@ * properly reprocessed on the * eager's queue. */ - BUMP_MIB(&sctps->sctps_mib, sctpPassiveEstab); + SCTPS_BUMP_MIB(sctps, sctpPassiveEstab); if (mlen > ntohs(ch->sch_len)) { eager->sctp_cookie_mp = dupb(mp); /* @@ -4075,7 +4073,7 @@ if (sctp_process_cookie(sctp, ch, mp, &iack, sctph, &recv_adaptation, NULL, ira) == -1) { - BUMP_MIB(&sctps->sctps_mib, + SCTPS_BUMP_MIB(sctps, sctpInInvalidCookie); break; } @@ -4087,10 +4085,8 @@ sctp_set_ulp_prop(sctp); } - sctp->sctp_state = SCTPS_ESTABLISHED; - sctp->sctp_assoc_start_time = - (uint32_t)ddi_get_lbolt(); - BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab); + SCTP_ASSOC_EST(sctps, sctp); + SCTPS_BUMP_MIB(sctps, sctpActiveEstab); if (sctp->sctp_cookie_mp) { freemsg(sctp->sctp_cookie_mp); sctp->sctp_cookie_mp = NULL; @@ -4129,10 +4125,8 @@ } if (sctp->sctp_unacked == 0) sctp_stop_faddr_timers(sctp); - sctp->sctp_state = SCTPS_ESTABLISHED; - sctp->sctp_assoc_start_time = - (uint32_t)ddi_get_lbolt(); - BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab); + SCTP_ASSOC_EST(sctps, sctp); + SCTPS_BUMP_MIB(sctps, sctpActiveEstab); BUMP_LOCAL(sctp->sctp_ibchunks); if (sctp->sctp_cookie_mp) { freemsg(sctp->sctp_cookie_mp); @@ -4157,7 +4151,7 @@ if (sctp_process_cookie(sctp, ch, mp, &iack, sctph, &recv_adaptation, NULL, ira) == -1) { - BUMP_MIB(&sctps->sctps_mib, + SCTPS_BUMP_MIB(sctps, sctpInInvalidCookie); break; } @@ -4171,10 +4165,8 @@ } if (sctp->sctp_unacked == 0) sctp_stop_faddr_timers(sctp); - sctp->sctp_state = SCTPS_ESTABLISHED; - sctp->sctp_assoc_start_time = - (uint32_t)ddi_get_lbolt(); - BUMP_MIB(&sctps->sctps_mib, sctpActiveEstab); + SCTP_ASSOC_EST(sctps, sctp); + SCTPS_BUMP_MIB(sctps, sctpActiveEstab); if (sctp->sctp_cookie_mp) { freemsg(sctp->sctp_cookie_mp); sctp->sctp_cookie_mp = NULL; @@ -4206,7 +4198,7 @@ p = (sctp_parm_hdr_t *)(ch + 1); if (p->sph_type == htons(SCTP_ERR_STALE_COOKIE)) { - BUMP_MIB(&sctps->sctps_mib, + SCTPS_BUMP_MIB(sctps, sctpAborted); sctp_error_event(sctp, ch, B_FALSE); @@ -4241,7 +4233,7 @@ goto done; case CHUNK_SHUTDOWN_COMPLETE: BUMP_LOCAL(sctp->sctp_ibchunks); - BUMP_MIB(&sctps->sctps_mib, sctpShutdowns); + SCTPS_BUMP_MIB(sctps, sctpShutdowns); sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0, NULL); @@ -4252,7 +4244,7 @@ case CHUNK_SHUTDOWN_ACK: sctp_shutdown_complete(sctp); BUMP_LOCAL(sctp->sctp_ibchunks); - BUMP_MIB(&sctps->sctps_mib, sctpShutdowns); + SCTPS_BUMP_MIB(sctps, sctpShutdowns); sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0, NULL); sctp_clean_death(sctp, 0); @@ -4435,7 +4427,7 @@ if (sctp->sctp_state >= SCTPS_ESTABLISHED && ((old <= new >> 1) || (old < sctp->sctp_mss))) { sctp->sctp_force_sack = 1; - BUMP_MIB(&sctps->sctps_mib, sctpOutWinUpdate); + SCTPS_BUMP_MIB(sctps, sctpOutWinUpdate); (void) sctp_sack(sctp, NULL); } WAKE_SCTP(sctp);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/inet/sctp/sctp_misc.c Mon Jul 19 17:27:45 2010 -0700 @@ -0,0 +1,277 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <inet/common.h> +#include "sctp_impl.h" + +/* Control whether SCTP can enter defensive mode when under memory pressure. */ +static boolean_t sctp_do_reclaim = B_TRUE; + +static void sctp_reclaim_timer(void *); + +/* Diagnostic routine used to return a string associated with the sctp state. */ +char * +sctp_display(sctp_t *sctp, char *sup_buf) +{ + char *buf; + char buf1[30]; + static char priv_buf[INET6_ADDRSTRLEN * 2 + 80]; + char *cp; + conn_t *connp; + + if (sctp == NULL) + return ("NULL_SCTP"); + + connp = sctp->sctp_connp; + buf = (sup_buf != NULL) ? sup_buf : priv_buf; + + switch (sctp->sctp_state) { + case SCTPS_IDLE: + cp = "SCTP_IDLE"; + break; + case SCTPS_BOUND: + cp = "SCTP_BOUND"; + break; + case SCTPS_LISTEN: + cp = "SCTP_LISTEN"; + break; + case SCTPS_COOKIE_WAIT: + cp = "SCTP_COOKIE_WAIT"; + break; + case SCTPS_COOKIE_ECHOED: + cp = "SCTP_COOKIE_ECHOED"; + break; + case SCTPS_ESTABLISHED: + cp = "SCTP_ESTABLISHED"; + break; + case SCTPS_SHUTDOWN_PENDING: + cp = "SCTP_SHUTDOWN_PENDING"; + break; + case SCTPS_SHUTDOWN_SENT: + cp = "SCTPS_SHUTDOWN_SENT"; + break; + case SCTPS_SHUTDOWN_RECEIVED: + cp = "SCTPS_SHUTDOWN_RECEIVED"; + break; + case SCTPS_SHUTDOWN_ACK_SENT: + cp = "SCTPS_SHUTDOWN_ACK_SENT"; + break; + default: + (void) mi_sprintf(buf1, "SCTPUnkState(%d)", sctp->sctp_state); + cp = buf1; + break; + } + (void) mi_sprintf(buf, "[%u, %u] %s", + ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp); + + return (buf); +} + +void +sctp_display_all(sctp_stack_t *sctps) +{ + sctp_t *sctp_walker; + + mutex_enter(&sctps->sctps_g_lock); + for (sctp_walker = list_head(&sctps->sctps_g_list); + sctp_walker != NULL; + sctp_walker = (sctp_t *)list_next(&sctps->sctps_g_list, + sctp_walker)) { + (void) sctp_display(sctp_walker, NULL); + } + mutex_exit(&sctps->sctps_g_lock); +} + +/* + * Given a sctp_stack_t and a port (in host byte order), find a listener + * configuration for that port and return the ratio. + */ +uint32_t +sctp_find_listener_conf(sctp_stack_t *sctps, in_port_t port) +{ + sctp_listener_t *sl; + uint32_t ratio = 0; + + mutex_enter(&sctps->sctps_listener_conf_lock); + for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL; + sl = list_next(&sctps->sctps_listener_conf, sl)) { + if (sl->sl_port == port) { + ratio = sl->sl_ratio; + break; + } + } + mutex_exit(&sctps->sctps_listener_conf_lock); + return (ratio); +} + +/* + * To remove all listener limit configuration in a sctp_stack_t. + */ +void +sctp_listener_conf_cleanup(sctp_stack_t *sctps) +{ + sctp_listener_t *sl; + + mutex_enter(&sctps->sctps_listener_conf_lock); + while ((sl = list_head(&sctps->sctps_listener_conf)) != NULL) { + list_remove(&sctps->sctps_listener_conf, sl); + kmem_free(sl, sizeof (sctp_listener_t)); + } + mutex_destroy(&sctps->sctps_listener_conf_lock); + list_destroy(&sctps->sctps_listener_conf); +} + + +/* + * Timeout function to reset the SCTP stack variable sctps_reclaim to false. + */ +static void +sctp_reclaim_timer(void *arg) +{ + sctp_stack_t *sctps = (sctp_stack_t *)arg; + int64_t tot_assoc = 0; + int i; + extern pgcnt_t lotsfree, needfree; + + for (i = 0; i < sctps->sctps_sc_cnt; i++) + tot_assoc += sctps->sctps_sc[i]->sctp_sc_assoc_cnt; + + /* + * This happens only when a stack is going away. sctps_reclaim_tid + * should not be reset to 0 when returning in this case. + */ + mutex_enter(&sctps->sctps_reclaim_lock); + if (!sctps->sctps_reclaim) { + mutex_exit(&sctps->sctps_reclaim_lock); + return; + } + + if ((freemem >= lotsfree + needfree) || tot_assoc < maxusers) { + sctps->sctps_reclaim = B_FALSE; + sctps->sctps_reclaim_tid = 0; + } else { + /* Stay in defensive mode and restart the timer */ + sctps->sctps_reclaim_tid = timeout(sctp_reclaim_timer, + sctps, MSEC_TO_TICK(sctps->sctps_reclaim_period)); + } + mutex_exit(&sctps->sctps_reclaim_lock); +} + +/* + * Kmem reclaim call back function. When the system is under memory + * pressure, we set the SCTP stack variable sctps_reclaim to true. This + * variable is reset to false after sctps_reclaim_period msecs. During this + * period, SCTP will be more aggressive in aborting connections not making + * progress, meaning retransmitting for shorter time (sctp_pa_early_abort/ + * sctp_pp_early_abort number of strikes). + */ +/* ARGSUSED */ +void +sctp_conn_reclaim(void *arg) +{ + netstack_handle_t nh; + netstack_t *ns; + sctp_stack_t *sctps; + extern pgcnt_t lotsfree, needfree; + + if (!sctp_do_reclaim) + return; + + /* + * The reclaim function may be called even when the system is not + * really under memory pressure. + */ + if (freemem >= lotsfree + needfree) + return; + + netstack_next_init(&nh); + while ((ns = netstack_next(&nh)) != NULL) { + int i; + int64_t tot_assoc = 0; + + /* + * During boot time, the first netstack_t is created and + * initialized before SCTP has registered with the netstack + * framework. If this reclaim function is called before SCTP + * has finished its initialization, netstack_next() will + * return the first netstack_t (since its netstack_flags is + * not NSF_UNINIT). And its netstack_sctp will be NULL. We + * need to catch it. + * + * All subsequent netstack_t creation will not have this + * problem since the initialization is not finished until SCTP + * has finished its own sctp_stack_t initialization. Hence + * netstack_next() will not return one with NULL netstack_sctp. + */ + if ((sctps = ns->netstack_sctp) == NULL) { + netstack_rele(ns); + continue; + } + + /* + * Even if the system is under memory pressure, the reason may + * not be because of SCTP activity. Check the number of + * associations in each stack. If the number exceeds the + * threshold (maxusers), turn on defensive mode. + */ + for (i = 0; i < sctps->sctps_sc_cnt; i++) + tot_assoc += sctps->sctps_sc[i]->sctp_sc_assoc_cnt; + if (tot_assoc < maxusers) { + netstack_rele(ns); + continue; + } + + mutex_enter(&sctps->sctps_reclaim_lock); + if (!sctps->sctps_reclaim) { + sctps->sctps_reclaim = B_TRUE; + sctps->sctps_reclaim_tid = timeout(sctp_reclaim_timer, + sctps, MSEC_TO_TICK(sctps->sctps_reclaim_period)); + SCTP_KSTAT(sctps, sctp_reclaim_cnt); + } + mutex_exit(&sctps->sctps_reclaim_lock); + netstack_rele(ns); + } + netstack_next_fini(&nh); +} + +/* + * When a CPU is added, we need to allocate the per CPU stats struct. + */ +void +sctp_stack_cpu_add(sctp_stack_t *sctps, processorid_t cpu_seqid) +{ + int i; + + if (cpu_seqid < sctps->sctps_sc_cnt) + return; + for (i = sctps->sctps_sc_cnt; i <= cpu_seqid; i++) { + ASSERT(sctps->sctps_sc[i] == NULL); + sctps->sctps_sc[i] = kmem_zalloc(sizeof (sctp_stats_cpu_t), + KM_SLEEP); + } + membar_producer(); + sctps->sctps_sc_cnt = cpu_seqid + 1; +}
--- a/usr/src/uts/common/inet/sctp/sctp_output.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_output.c Mon Jul 19 17:27:45 2010 -0700 @@ -759,7 +759,7 @@ return (start_mp); } } - BUMP_MIB(&sctps->sctps_mib, sctpOutFastRetrans); + SCTPS_BUMP_MIB(sctps, sctpOutFastRetrans); BUMP_LOCAL(sctp->sctp_rxtchunks); SCTP_CHUNK_CLEAR_REXMIT(mp); if (start_mp == NULL) { @@ -997,7 +997,7 @@ int32_t pad = 0; int32_t pathmax; int extra; - int64_t now = ddi_get_lbolt64(); + int64_t now = LBOLT_FASTPATH64; sctp_faddr_t *fp; sctp_faddr_t *lfp; sctp_data_hdr_t *sdc; @@ -1772,7 +1772,7 @@ sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1; ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn); sctp->sctp_zero_win_probe = B_TRUE; - BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); + SCTPS_BUMP_MIB(sctps, sctpOutWinProbe); } return; out: @@ -1818,7 +1818,7 @@ if (oldfp != fp && oldfp->suna != 0) SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->rto); SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->rto); - BUMP_MIB(&sctps->sctps_mib, sctpOutWinProbe); + SCTPS_BUMP_MIB(sctps, sctpOutWinProbe); return; }
--- a/usr/src/uts/common/inet/sctp/sctp_snmp.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_snmp.c Mon Jul 19 17:27:45 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -44,8 +43,11 @@ #include "sctp_impl.h" #include "sctp_addr.h" -static int sctp_snmp_state(sctp_t *sctp); - +static void sctp_clr_kstats2(sctp_kstat_t *); +static void sctp_add_kstats2(sctp_kstat_counter_t *, sctp_kstat_t *); +static int sctp_snmp_state(sctp_t *); +static void sctp_sum_mib(sctp_stack_t *, mib2_sctp_t *); +static void sctp_add_mib(mib2_sctp_t *, mib2_sctp_t *); static int sctp_kstat_update(kstat_t *kp, int rw) @@ -56,6 +58,7 @@ netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; netstack_t *ns; sctp_stack_t *sctps; + mib2_sctp_t sctp_mib; if (kp == NULL|| kp->ks_data == NULL) return (EIO); @@ -71,13 +74,21 @@ netstack_rele(ns); return (-1); } - myzoneid = netstackid_to_zoneid(stackid); + + /* + * For all exclusive netstacks, the zone ID is always GLOBAL_ZONEID. + */ + if (stackid != GLOBAL_NETSTACKID) + myzoneid = GLOBAL_ZONEID; + else + myzoneid = curproc->p_zone->zone_id; + + bzero(&sctp_mib, sizeof (sctp_mib)); /* * Get the number of current associations and gather their * individual set of statistics. */ - SET_MIB(sctps->sctps_mib.sctpCurrEstab, 0); sctp_prev = NULL; mutex_enter(&sctps->sctps_g_lock); sctp = list_head(&sctps->sctps_g_list); @@ -98,17 +109,21 @@ if (sctp->sctp_state == SCTPS_ESTABLISHED || sctp->sctp_state == SCTPS_SHUTDOWN_PENDING || sctp->sctp_state == SCTPS_SHUTDOWN_RECEIVED) { - BUMP_MIB(&sctps->sctps_mib, sctpCurrEstab); + /* + * Just bump the local sctp_mib. The number of + * existing associations is not kept in kernel. + */ + BUMP_MIB(&sctp_mib, sctpCurrEstab); } if (sctp->sctp_opkts) { - UPDATE_MIB(&sctps->sctps_mib, sctpOutSCTPPkts, + SCTPS_UPDATE_MIB(sctps, sctpOutSCTPPkts, sctp->sctp_opkts); sctp->sctp_opkts = 0; } if (sctp->sctp_obchunks) { - UPDATE_MIB(&sctps->sctps_mib, sctpOutCtrlChunks, + SCTPS_UPDATE_MIB(sctps, sctpOutCtrlChunks, sctp->sctp_obchunks); UPDATE_LOCAL(sctp->sctp_cum_obchunks, sctp->sctp_obchunks); @@ -116,7 +131,7 @@ } if (sctp->sctp_odchunks) { - UPDATE_MIB(&sctps->sctps_mib, sctpOutOrderChunks, + SCTPS_UPDATE_MIB(sctps, sctpOutOrderChunks, sctp->sctp_odchunks); UPDATE_LOCAL(sctp->sctp_cum_odchunks, sctp->sctp_odchunks); @@ -124,7 +139,7 @@ } if (sctp->sctp_oudchunks) { - UPDATE_MIB(&sctps->sctps_mib, sctpOutUnorderChunks, + SCTPS_UPDATE_MIB(sctps, sctpOutUnorderChunks, sctp->sctp_oudchunks); UPDATE_LOCAL(sctp->sctp_cum_oudchunks, sctp->sctp_oudchunks); @@ -132,7 +147,7 @@ } if (sctp->sctp_rxtchunks) { - UPDATE_MIB(&sctps->sctps_mib, sctpRetransChunks, + SCTPS_UPDATE_MIB(sctps, sctpRetransChunks, sctp->sctp_rxtchunks); UPDATE_LOCAL(sctp->sctp_cum_rxtchunks, sctp->sctp_rxtchunks); @@ -140,13 +155,13 @@ } if (sctp->sctp_ipkts) { - UPDATE_MIB(&sctps->sctps_mib, sctpInSCTPPkts, + SCTPS_UPDATE_MIB(sctps, sctpInSCTPPkts, sctp->sctp_ipkts); sctp->sctp_ipkts = 0; } if (sctp->sctp_ibchunks) { - UPDATE_MIB(&sctps->sctps_mib, sctpInCtrlChunks, + SCTPS_UPDATE_MIB(sctps, sctpInCtrlChunks, sctp->sctp_ibchunks); UPDATE_LOCAL(sctp->sctp_cum_ibchunks, sctp->sctp_ibchunks); @@ -154,7 +169,7 @@ } if (sctp->sctp_idchunks) { - UPDATE_MIB(&sctps->sctps_mib, sctpInOrderChunks, + SCTPS_UPDATE_MIB(sctps, sctpInOrderChunks, sctp->sctp_idchunks); UPDATE_LOCAL(sctp->sctp_cum_idchunks, sctp->sctp_idchunks); @@ -162,7 +177,7 @@ } if (sctp->sctp_iudchunks) { - UPDATE_MIB(&sctps->sctps_mib, sctpInUnorderChunks, + SCTPS_UPDATE_MIB(sctps, sctpInUnorderChunks, sctp->sctp_iudchunks); UPDATE_LOCAL(sctp->sctp_cum_iudchunks, sctp->sctp_iudchunks); @@ -170,13 +185,13 @@ } if (sctp->sctp_fragdmsgs) { - UPDATE_MIB(&sctps->sctps_mib, sctpFragUsrMsgs, + SCTPS_UPDATE_MIB(sctps, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); sctp->sctp_fragdmsgs = 0; } if (sctp->sctp_reassmsgs) { - UPDATE_MIB(&sctps->sctps_mib, sctpReasmUsrMsgs, + SCTPS_UPDATE_MIB(sctps, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); sctp->sctp_reassmsgs = 0; } @@ -190,6 +205,8 @@ if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); + sctp_sum_mib(sctps, &sctp_mib); + /* Copy data from the SCTP MIB */ sctpkp = (sctp_named_kstat_t *)kp->ks_data; @@ -200,52 +217,41 @@ sctpkp->sctpValCookieLife.value.ui32 = sctps->sctps_cookie_life; sctpkp->sctpMaxInitRetr.value.ui32 = sctps->sctps_max_init_retr; - sctpkp->sctpCurrEstab.value.i32 = sctps->sctps_mib.sctpCurrEstab; - sctpkp->sctpActiveEstab.value.i32 = sctps->sctps_mib.sctpActiveEstab; - sctpkp->sctpPassiveEstab.value.i32 = sctps->sctps_mib.sctpPassiveEstab; - sctpkp->sctpAborted.value.i32 = sctps->sctps_mib.sctpAborted; - sctpkp->sctpShutdowns.value.i32 = sctps->sctps_mib.sctpShutdowns; - sctpkp->sctpOutOfBlue.value.i32 = sctps->sctps_mib.sctpOutOfBlue; - sctpkp->sctpChecksumError.value.i32 = - sctps->sctps_mib.sctpChecksumError; - sctpkp->sctpOutCtrlChunks.value.i64 = - sctps->sctps_mib.sctpOutCtrlChunks; - sctpkp->sctpOutOrderChunks.value.i64 = - sctps->sctps_mib.sctpOutOrderChunks; - sctpkp->sctpOutUnorderChunks.value.i64 = - sctps->sctps_mib.sctpOutUnorderChunks; - sctpkp->sctpRetransChunks.value.i64 = - sctps->sctps_mib.sctpRetransChunks; - sctpkp->sctpOutAck.value.i32 = sctps->sctps_mib.sctpOutAck; - sctpkp->sctpOutAckDelayed.value.i32 = - sctps->sctps_mib.sctpOutAckDelayed; - sctpkp->sctpOutWinUpdate.value.i32 = sctps->sctps_mib.sctpOutWinUpdate; - sctpkp->sctpOutFastRetrans.value.i32 = - sctps->sctps_mib.sctpOutFastRetrans; - sctpkp->sctpOutWinProbe.value.i32 = sctps->sctps_mib.sctpOutWinProbe; - sctpkp->sctpInCtrlChunks.value.i64 = sctps->sctps_mib.sctpInCtrlChunks; - sctpkp->sctpInOrderChunks.value.i64 = - sctps->sctps_mib.sctpInOrderChunks; - sctpkp->sctpInUnorderChunks.value.i64 = - sctps->sctps_mib.sctpInUnorderChunks; - sctpkp->sctpInAck.value.i32 = sctps->sctps_mib.sctpInAck; - sctpkp->sctpInDupAck.value.i32 = sctps->sctps_mib.sctpInDupAck; - sctpkp->sctpInAckUnsent.value.i32 = sctps->sctps_mib.sctpInAckUnsent; - sctpkp->sctpFragUsrMsgs.value.i64 = sctps->sctps_mib.sctpFragUsrMsgs; - sctpkp->sctpReasmUsrMsgs.value.i64 = sctps->sctps_mib.sctpReasmUsrMsgs; - sctpkp->sctpOutSCTPPkts.value.i64 = sctps->sctps_mib.sctpOutSCTPPkts; - sctpkp->sctpInSCTPPkts.value.i64 = sctps->sctps_mib.sctpInSCTPPkts; - sctpkp->sctpInInvalidCookie.value.i32 = - sctps->sctps_mib.sctpInInvalidCookie; - sctpkp->sctpTimRetrans.value.i32 = sctps->sctps_mib.sctpTimRetrans; - sctpkp->sctpTimRetransDrop.value.i32 = - sctps->sctps_mib.sctpTimRetransDrop; + /* Copy data from the local sctp_mib to the provided kstat. */ + sctpkp->sctpCurrEstab.value.i32 = sctp_mib.sctpCurrEstab; + sctpkp->sctpActiveEstab.value.i32 = sctp_mib.sctpActiveEstab; + sctpkp->sctpPassiveEstab.value.i32 = sctp_mib.sctpPassiveEstab; + sctpkp->sctpAborted.value.i32 = sctp_mib.sctpAborted; + sctpkp->sctpShutdowns.value.i32 = sctp_mib.sctpShutdowns; + sctpkp->sctpOutOfBlue.value.i32 = sctp_mib.sctpOutOfBlue; + sctpkp->sctpChecksumError.value.i32 = sctp_mib.sctpChecksumError; + sctpkp->sctpOutCtrlChunks.value.i64 = sctp_mib.sctpOutCtrlChunks; + sctpkp->sctpOutOrderChunks.value.i64 = sctp_mib.sctpOutOrderChunks; + sctpkp->sctpOutUnorderChunks.value.i64 = sctp_mib.sctpOutUnorderChunks; + sctpkp->sctpRetransChunks.value.i64 = sctp_mib.sctpRetransChunks; + sctpkp->sctpOutAck.value.i32 = sctp_mib.sctpOutAck; + sctpkp->sctpOutAckDelayed.value.i32 = sctp_mib.sctpOutAckDelayed; + sctpkp->sctpOutWinUpdate.value.i32 = sctp_mib.sctpOutWinUpdate; + sctpkp->sctpOutFastRetrans.value.i32 = sctp_mib.sctpOutFastRetrans; + sctpkp->sctpOutWinProbe.value.i32 = sctp_mib.sctpOutWinProbe; + sctpkp->sctpInCtrlChunks.value.i64 = sctp_mib.sctpInCtrlChunks; + sctpkp->sctpInOrderChunks.value.i64 = sctp_mib.sctpInOrderChunks; + sctpkp->sctpInUnorderChunks.value.i64 = sctp_mib.sctpInUnorderChunks; + sctpkp->sctpInAck.value.i32 = sctp_mib.sctpInAck; + sctpkp->sctpInDupAck.value.i32 = sctp_mib.sctpInDupAck; + sctpkp->sctpInAckUnsent.value.i32 = sctp_mib.sctpInAckUnsent; + sctpkp->sctpFragUsrMsgs.value.i64 = sctp_mib.sctpFragUsrMsgs; + sctpkp->sctpReasmUsrMsgs.value.i64 = sctp_mib.sctpReasmUsrMsgs; + sctpkp->sctpOutSCTPPkts.value.i64 = sctp_mib.sctpOutSCTPPkts; + sctpkp->sctpInSCTPPkts.value.i64 = sctp_mib.sctpInSCTPPkts; + sctpkp->sctpInInvalidCookie.value.i32 = sctp_mib.sctpInInvalidCookie; + sctpkp->sctpTimRetrans.value.i32 = sctp_mib.sctpTimRetrans; + sctpkp->sctpTimRetransDrop.value.i32 = sctp_mib.sctpTimRetransDrop; sctpkp->sctpTimHeartBeatProbe.value.i32 = - sctps->sctps_mib.sctpTimHeartBeatProbe; - sctpkp->sctpTimHeartBeatDrop.value.i32 = - sctps->sctps_mib.sctpTimHeartBeatDrop; - sctpkp->sctpListenDrop.value.i32 = sctps->sctps_mib.sctpListenDrop; - sctpkp->sctpInClosed.value.i32 = sctps->sctps_mib.sctpInClosed; + sctp_mib.sctpTimHeartBeatProbe; + sctpkp->sctpTimHeartBeatDrop.value.i32 = sctp_mib.sctpTimHeartBeatDrop; + sctpkp->sctpListenDrop.value.i32 = sctp_mib.sctpListenDrop; + sctpkp->sctpInClosed.value.i32 = sctp_mib.sctpInClosed; netstack_rele(ns); return (0); @@ -302,7 +308,7 @@ ksp = kstat_create_netstack(SCTP_MOD_NAME, 0, "sctp", "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(sctp_named_kstat_t), 0, stackid); - if (ksp == NULL || ksp->ks_data == NULL) + if (ksp == NULL) return (NULL); /* These won't change. */ @@ -318,6 +324,123 @@ } /* + * To set all sctp_stat_t counters to 0. + */ +static void +sctp_clr_kstats2(sctp_kstat_t *stats) +{ + stats->sctp_add_faddr.value.ui64 = 0; + stats->sctp_add_timer.value.ui64 = 0; + stats->sctp_conn_create.value.ui64 = 0; + stats->sctp_find_next_tq.value.ui64 = 0; + stats->sctp_fr_add_hdr.value.ui64 = 0; + stats->sctp_fr_not_found.value.ui64 = 0; + stats->sctp_output_failed.value.ui64 = 0; + stats->sctp_rexmit_failed.value.ui64 = 0; + stats->sctp_send_init_failed.value.ui64 = 0; + stats->sctp_send_cookie_failed.value.ui64 = 0; + stats->sctp_send_cookie_ack_failed.value.ui64 = 0; + stats->sctp_send_err_failed.value.ui64 = 0; + stats->sctp_send_sack_failed.value.ui64 = 0; + stats->sctp_send_shutdown_failed.value.ui64 = 0; + stats->sctp_send_shutdown_ack_failed.value.ui64 = 0; + stats->sctp_send_shutdown_comp_failed.value.ui64 = 0; + stats->sctp_send_user_abort_failed.value.ui64 = 0; + stats->sctp_send_asconf_failed.value.ui64 = 0; + stats->sctp_send_asconf_ack_failed.value.ui64 = 0; + stats->sctp_send_ftsn_failed.value.ui64 = 0; + stats->sctp_send_hb_failed.value.ui64 = 0; + stats->sctp_return_hb_failed.value.ui64 = 0; + stats->sctp_ss_rexmit_failed.value.ui64 = 0; + stats->sctp_cl_connect.value.ui64 = 0; + stats->sctp_cl_assoc_change.value.ui64 = 0; + stats->sctp_cl_check_addrs.value.ui64 = 0; + stats->sctp_reclaim_cnt.value.ui64 = 0; + stats->sctp_listen_cnt_drop.value.ui64 = 0; +} + +/* + * To add counters from the per CPU sctp_kstat_counter_t to the stack + * sctp_kstat_t. + */ +static void +sctp_add_kstats2(sctp_kstat_counter_t *from, sctp_kstat_t *to) +{ + to->sctp_add_faddr.value.ui64 += from->sctp_add_faddr; + to->sctp_add_timer.value.ui64 += from->sctp_add_timer; + to->sctp_conn_create.value.ui64 += from->sctp_conn_create; + to->sctp_find_next_tq.value.ui64 += from->sctp_find_next_tq; + to->sctp_fr_add_hdr.value.ui64 += from->sctp_fr_add_hdr; + to->sctp_fr_not_found.value.ui64 += from->sctp_fr_not_found; + to->sctp_output_failed.value.ui64 += from->sctp_output_failed; + to->sctp_rexmit_failed.value.ui64 += from->sctp_rexmit_failed; + to->sctp_send_init_failed.value.ui64 += from->sctp_send_init_failed; + to->sctp_send_cookie_failed.value.ui64 += from->sctp_send_cookie_failed; + to->sctp_send_cookie_ack_failed.value.ui64 += + from->sctp_send_cookie_ack_failed; + to->sctp_send_err_failed.value.ui64 += from->sctp_send_err_failed; + to->sctp_send_sack_failed.value.ui64 += from->sctp_send_sack_failed; + to->sctp_send_shutdown_failed.value.ui64 += + from->sctp_send_shutdown_failed; + to->sctp_send_shutdown_ack_failed.value.ui64 += + from->sctp_send_shutdown_ack_failed; + to->sctp_send_shutdown_comp_failed.value.ui64 += + from->sctp_send_shutdown_comp_failed; + to->sctp_send_user_abort_failed.value.ui64 += + from->sctp_send_user_abort_failed; + to->sctp_send_asconf_failed.value.ui64 += from->sctp_send_asconf_failed; + to->sctp_send_asconf_ack_failed.value.ui64 += + from->sctp_send_asconf_ack_failed; + to->sctp_send_ftsn_failed.value.ui64 += from->sctp_send_ftsn_failed; + to->sctp_send_hb_failed.value.ui64 += from->sctp_send_hb_failed; + to->sctp_return_hb_failed.value.ui64 += from->sctp_return_hb_failed; + to->sctp_ss_rexmit_failed.value.ui64 += from->sctp_ss_rexmit_failed; + to->sctp_cl_connect.value.ui64 += from->sctp_cl_connect; + to->sctp_cl_assoc_change.value.ui64 += from->sctp_cl_assoc_change; + to->sctp_cl_check_addrs.value.ui64 += from->sctp_cl_check_addrs; +} + +/* + * Sum up all per CPU tcp_stat_t kstat counters. + */ +static int +sctp_kstat2_update(kstat_t *kp, int rw) +{ + netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + sctp_stack_t *sctps; + sctp_kstat_t *stats; + int i; + int cnt; + + if (rw == KSTAT_WRITE) + return (EACCES); + + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + sctps = ns->netstack_sctp; + if (sctps == NULL) { + netstack_rele(ns); + return (-1); + } + + stats = (sctp_kstat_t *)kp->ks_data; + sctp_clr_kstats2(stats); + + /* + * sctps_sc_cnt may change in the middle of the loop. It is better + * to get its value first. + */ + cnt = sctps->sctps_sc_cnt; + for (i = 0; i < cnt; i++) + sctp_add_kstats2(&sctps->sctps_sc[i]->sctp_sc_stats, stats); + + netstack_rele(ns); + return (0); +} + +/* * The following kstats are for debugging purposes. They keep * track of problems which should not happen normally. But in * those cases which they do happen, these kstats would be handy @@ -325,7 +448,7 @@ * to be consumed by customers. */ void * -sctp_kstat2_init(netstackid_t stackid, sctp_kstat_t *sctps_statisticsp) +sctp_kstat2_init(netstackid_t stackid) { kstat_t *ksp; @@ -356,18 +479,19 @@ { "sctp_cl_connect", KSTAT_DATA_UINT64 }, { "sctp_cl_assoc_change", KSTAT_DATA_UINT64 }, { "sctp_cl_check_addrs", KSTAT_DATA_UINT64 }, + { "sctp_reclaim_drop", KSTAT_DATA_UINT64 }, + { "sctp_listen_cnt_drop", KSTAT_DATA_UINT64 }, }; ksp = kstat_create_netstack(SCTP_MOD_NAME, 0, "sctpstat", "net", - KSTAT_TYPE_NAMED, NUM_OF_FIELDS(template), KSTAT_FLAG_VIRTUAL, - stackid); + KSTAT_TYPE_NAMED, NUM_OF_FIELDS(template), 0, stackid); if (ksp == NULL) return (NULL); - bcopy(&template, sctps_statisticsp, sizeof (template)); - ksp->ks_data = (void *)sctps_statisticsp; + bcopy(&template, ksp->ks_data, sizeof (template)); ksp->ks_private = (void *)(uintptr_t)stackid; + ksp->ks_update = sctp_kstat2_update; kstat_install(ksp); return (ksp); @@ -427,6 +551,7 @@ conn_t *connp; boolean_t needattr; int idx; + mib2_sctp_t sctp_mib; /* * Make copies of the original message. @@ -456,19 +581,13 @@ mp_rem_data = mp_rem_ctl->b_cont; mp_attr_data = mp_attr_ctl->b_cont; + bzero(&sctp_mib, sizeof (sctp_mib)); + /* hostname address parameters are not supported in Solaris */ sce.sctpAssocRemHostName.o_length = 0; sce.sctpAssocRemHostName.o_bytes[0] = 0; /* build table of connections -- need count in fixed part */ - SET_MIB(sctps->sctps_mib.sctpRtoAlgorithm, MIB2_SCTP_RTOALGO_VANJ); - SET_MIB(sctps->sctps_mib.sctpRtoMin, sctps->sctps_rto_ming); - SET_MIB(sctps->sctps_mib.sctpRtoMax, sctps->sctps_rto_maxg); - SET_MIB(sctps->sctps_mib.sctpRtoInitial, sctps->sctps_rto_initialg); - SET_MIB(sctps->sctps_mib.sctpMaxAssocs, -1); - SET_MIB(sctps->sctps_mib.sctpValCookieLife, sctps->sctps_cookie_life); - SET_MIB(sctps->sctps_mib.sctpMaxInitRetr, sctps->sctps_max_init_retr); - SET_MIB(sctps->sctps_mib.sctpCurrEstab, 0); idx = 0; mutex_enter(&sctps->sctps_g_lock); @@ -490,54 +609,51 @@ if (sctp->sctp_state == SCTPS_ESTABLISHED || sctp->sctp_state == SCTPS_SHUTDOWN_PENDING || sctp->sctp_state == SCTPS_SHUTDOWN_RECEIVED) { - BUMP_MIB(&sctps->sctps_mib, sctpCurrEstab); + /* + * Just bump the local sctp_mib. The number of + * existing associations is not kept in kernel. + */ + BUMP_MIB(&sctp_mib, sctpCurrEstab); } - UPDATE_MIB(&sctps->sctps_mib, - sctpOutSCTPPkts, sctp->sctp_opkts); + SCTPS_UPDATE_MIB(sctps, sctpOutSCTPPkts, sctp->sctp_opkts); sctp->sctp_opkts = 0; - UPDATE_MIB(&sctps->sctps_mib, - sctpOutCtrlChunks, sctp->sctp_obchunks); + SCTPS_UPDATE_MIB(sctps, sctpOutCtrlChunks, sctp->sctp_obchunks); UPDATE_LOCAL(sctp->sctp_cum_obchunks, sctp->sctp_obchunks); sctp->sctp_obchunks = 0; - UPDATE_MIB(&sctps->sctps_mib, - sctpOutOrderChunks, sctp->sctp_odchunks); + SCTPS_UPDATE_MIB(sctps, sctpOutOrderChunks, + sctp->sctp_odchunks); UPDATE_LOCAL(sctp->sctp_cum_odchunks, sctp->sctp_odchunks); sctp->sctp_odchunks = 0; - UPDATE_MIB(&sctps->sctps_mib, sctpOutUnorderChunks, + SCTPS_UPDATE_MIB(sctps, sctpOutUnorderChunks, sctp->sctp_oudchunks); UPDATE_LOCAL(sctp->sctp_cum_oudchunks, sctp->sctp_oudchunks); sctp->sctp_oudchunks = 0; - UPDATE_MIB(&sctps->sctps_mib, - sctpRetransChunks, sctp->sctp_rxtchunks); + SCTPS_UPDATE_MIB(sctps, sctpRetransChunks, + sctp->sctp_rxtchunks); UPDATE_LOCAL(sctp->sctp_cum_rxtchunks, sctp->sctp_rxtchunks); sctp->sctp_rxtchunks = 0; - UPDATE_MIB(&sctps->sctps_mib, - sctpInSCTPPkts, sctp->sctp_ipkts); + SCTPS_UPDATE_MIB(sctps, sctpInSCTPPkts, sctp->sctp_ipkts); sctp->sctp_ipkts = 0; - UPDATE_MIB(&sctps->sctps_mib, - sctpInCtrlChunks, sctp->sctp_ibchunks); + SCTPS_UPDATE_MIB(sctps, sctpInCtrlChunks, sctp->sctp_ibchunks); UPDATE_LOCAL(sctp->sctp_cum_ibchunks, sctp->sctp_ibchunks); sctp->sctp_ibchunks = 0; - UPDATE_MIB(&sctps->sctps_mib, - sctpInOrderChunks, sctp->sctp_idchunks); + SCTPS_UPDATE_MIB(sctps, sctpInOrderChunks, sctp->sctp_idchunks); UPDATE_LOCAL(sctp->sctp_cum_idchunks, sctp->sctp_idchunks); sctp->sctp_idchunks = 0; - UPDATE_MIB(&sctps->sctps_mib, sctpInUnorderChunks, + SCTPS_UPDATE_MIB(sctps, sctpInUnorderChunks, sctp->sctp_iudchunks); UPDATE_LOCAL(sctp->sctp_cum_iudchunks, sctp->sctp_iudchunks); sctp->sctp_iudchunks = 0; - UPDATE_MIB(&sctps->sctps_mib, - sctpFragUsrMsgs, sctp->sctp_fragdmsgs); + SCTPS_UPDATE_MIB(sctps, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); sctp->sctp_fragdmsgs = 0; - UPDATE_MIB(&sctps->sctps_mib, - sctpReasmUsrMsgs, sctp->sctp_reassmsgs); + SCTPS_UPDATE_MIB(sctps, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); sctp->sctp_reassmsgs = 0; sce.sctpAssocId = ntohl(sctp->sctp_lvtag); @@ -700,15 +816,12 @@ if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); - /* fixed length structure for IPv4 and IPv6 counters */ - SET_MIB(sctps->sctps_mib.sctpEntrySize, sizeof (sce)); - SET_MIB(sctps->sctps_mib.sctpLocalEntrySize, sizeof (scle)); - SET_MIB(sctps->sctps_mib.sctpRemoteEntrySize, sizeof (scre)); + sctp_sum_mib(sctps, &sctp_mib); + optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; optp->level = MIB2_SCTP; optp->name = 0; - (void) snmp_append_data(mpdata, (char *)&sctps->sctps_mib, - sizeof (sctps->sctps_mib)); + (void) snmp_append_data(mpdata, (char *)&sctp_mib, sizeof (sctp_mib)); optp->len = msgdsize(mpdata); qreply(q, mpctl); @@ -781,3 +894,76 @@ return (0); } } + +/* + * To sum up all MIB2 stats for a sctp_stack_t from all per CPU stats. The + * caller should initialize the target mib2_sctp_t properly as this function + * just adds up all the per CPU stats. + */ +static void +sctp_sum_mib(sctp_stack_t *sctps, mib2_sctp_t *sctp_mib) +{ + int i; + int cnt; + + /* Static componets of mib2_sctp_t. */ + SET_MIB(sctp_mib->sctpRtoAlgorithm, MIB2_SCTP_RTOALGO_VANJ); + SET_MIB(sctp_mib->sctpRtoMin, sctps->sctps_rto_ming); + SET_MIB(sctp_mib->sctpRtoMax, sctps->sctps_rto_maxg); + SET_MIB(sctp_mib->sctpRtoInitial, sctps->sctps_rto_initialg); + SET_MIB(sctp_mib->sctpMaxAssocs, -1); + SET_MIB(sctp_mib->sctpValCookieLife, sctps->sctps_cookie_life); + SET_MIB(sctp_mib->sctpMaxInitRetr, sctps->sctps_max_init_retr); + + /* fixed length structure for IPv4 and IPv6 counters */ + SET_MIB(sctp_mib->sctpEntrySize, sizeof (mib2_sctpConnEntry_t)); + SET_MIB(sctp_mib->sctpLocalEntrySize, + sizeof (mib2_sctpConnLocalEntry_t)); + SET_MIB(sctp_mib->sctpRemoteEntrySize, + sizeof (mib2_sctpConnRemoteEntry_t)); + + /* + * sctps_sc_cnt may change in the middle of the loop. It is better + * to get its value first. + */ + cnt = sctps->sctps_sc_cnt; + for (i = 0; i < cnt; i++) + sctp_add_mib(&sctps->sctps_sc[i]->sctp_sc_mib, sctp_mib); +} + +static void +sctp_add_mib(mib2_sctp_t *from, mib2_sctp_t *to) +{ + to->sctpActiveEstab += from->sctpActiveEstab; + to->sctpPassiveEstab += from->sctpPassiveEstab; + to->sctpAborted += from->sctpAborted; + to->sctpShutdowns += from->sctpShutdowns; + to->sctpOutOfBlue += from->sctpOutOfBlue; + to->sctpChecksumError += from->sctpChecksumError; + to->sctpOutCtrlChunks += from->sctpOutCtrlChunks; + to->sctpOutOrderChunks += from->sctpOutOrderChunks; + to->sctpOutUnorderChunks += from->sctpOutUnorderChunks; + to->sctpRetransChunks += from->sctpRetransChunks; + to->sctpOutAck += from->sctpOutAck; + to->sctpOutAckDelayed += from->sctpOutAckDelayed; + to->sctpOutWinUpdate += from->sctpOutWinUpdate; + to->sctpOutFastRetrans += from->sctpOutFastRetrans; + to->sctpOutWinProbe += from->sctpOutWinProbe; + to->sctpInCtrlChunks += from->sctpInCtrlChunks; + to->sctpInOrderChunks += from->sctpInOrderChunks; + to->sctpInUnorderChunks += from->sctpInUnorderChunks; + to->sctpInAck += from->sctpInAck; + to->sctpInDupAck += from->sctpInDupAck; + to->sctpInAckUnsent += from->sctpInAckUnsent; + to->sctpFragUsrMsgs += from->sctpFragUsrMsgs; + to->sctpReasmUsrMsgs += from->sctpReasmUsrMsgs; + to->sctpOutSCTPPkts += from->sctpOutSCTPPkts; + to->sctpInSCTPPkts += from->sctpInSCTPPkts; + to->sctpInInvalidCookie += from->sctpInInvalidCookie; + to->sctpTimRetrans += from->sctpTimRetrans; + to->sctpTimRetransDrop += from->sctpTimRetransDrop; + to->sctpTimHeartBeatProbe += from->sctpTimHeartBeatProbe; + to->sctpTimHeartBeatDrop += from->sctpTimHeartBeatDrop; + to->sctpListenDrop += from->sctpListenDrop; + to->sctpInClosed += from->sctpInClosed; +}
--- a/usr/src/uts/common/inet/sctp/sctp_stack.h Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_stack.h Mon Jul 19 17:27:45 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _INET_SCTP_SCTP_STACK_H @@ -62,9 +61,60 @@ kstat_named_t sctp_cl_connect; kstat_named_t sctp_cl_assoc_change; kstat_named_t sctp_cl_check_addrs; + kstat_named_t sctp_reclaim_cnt; + kstat_named_t sctp_listen_cnt_drop; } sctp_kstat_t; -#define SCTP_KSTAT(sctps, x) ((sctps)->sctps_statistics.x.value.ui64++) +/* + * This struct contains only the counter part of sctp_kstat_t. It is used + * in sctp_stats_cpu_t instead of sctp_kstat_t to save memory space. + */ +typedef struct sctp_kstat_counter_s { + uint64_t sctp_add_faddr; + uint64_t sctp_add_timer; + uint64_t sctp_conn_create; + uint64_t sctp_find_next_tq; + uint64_t sctp_fr_add_hdr; + uint64_t sctp_fr_not_found; + uint64_t sctp_output_failed; + uint64_t sctp_rexmit_failed; + uint64_t sctp_send_init_failed; + uint64_t sctp_send_cookie_failed; + uint64_t sctp_send_cookie_ack_failed; + uint64_t sctp_send_err_failed; + uint64_t sctp_send_sack_failed; + uint64_t sctp_send_shutdown_failed; + uint64_t sctp_send_shutdown_ack_failed; + uint64_t sctp_send_shutdown_comp_failed; + uint64_t sctp_send_user_abort_failed; + uint64_t sctp_send_asconf_failed; + uint64_t sctp_send_asconf_ack_failed; + uint64_t sctp_send_ftsn_failed; + uint64_t sctp_send_hb_failed; + uint64_t sctp_return_hb_failed; + uint64_t sctp_ss_rexmit_failed; + uint64_t sctp_cl_connect; + uint64_t sctp_cl_assoc_change; + uint64_t sctp_cl_check_addrs; + uint64_t sctp_reclaim_cnt; + uint64_t sctp_listen_cnt_drop; +} sctp_kstat_counter_t; + +/* Per CPU SCTP statistics counters. */ +typedef struct { + int64_t sctp_sc_assoc_cnt; + mib2_sctp_t sctp_sc_mib; + sctp_kstat_counter_t sctp_sc_stats; +} sctp_stats_cpu_t; + +#define SCTP_KSTAT(sctps, x) \ + ((sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_stats.x++) + +#define SCTPS_BUMP_MIB(sctps, x) \ + BUMP_MIB(&(sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_mib, x) + +#define SCTPS_UPDATE_MIB(sctps, x, y) \ + UPDATE_MIB(&(sctps)->sctps_sc[CPU->cpu_seqid]->sctp_sc_mib, x, y) /* * SCTP stack instances @@ -72,8 +122,6 @@ struct sctp_stack { netstack_t *sctps_netstack; /* Common netstack */ - mib2_sctp_t sctps_mib; - /* Protected by sctps_g_lock */ struct list sctps_g_list; /* SCTP instance data chain */ kmutex_t sctps_g_lock; @@ -94,7 +142,10 @@ /* holds sctp tunables */ struct mod_prop_info_s *sctps_propinfo_tbl; -/* This lock protects the SCTP recvq_tq_list array and recvq_tq_list_cur_sz. */ + /* + * This lock protects the SCTP recvq_tq_list array and + * recvq_tq_list_cur_sz. + */ kmutex_t sctps_rq_tq_lock; int sctps_recvq_tq_list_max_sz; taskq_t **sctps_recvq_tq_list; @@ -113,11 +164,31 @@ uint32_t sctps_g_ipifs_count; krwlock_t sctps_g_ipifs_lock; - /* kstat exporting sctp_mib data */ + /* kstat exporting mib2_sctp_t and sctp_kstat_t data */ kstat_t *sctps_mibkp; kstat_t *sctps_kstat; - sctp_kstat_t sctps_statistics; + + /* Variables for handling kmem reclaim call back. */ + kmutex_t sctps_reclaim_lock; + boolean_t sctps_reclaim; + timeout_id_t sctps_reclaim_tid; + uint32_t sctps_reclaim_period; + + /* Listener association limit configuration. */ + kmutex_t sctps_listener_conf_lock; + list_t sctps_listener_conf; + + /* + * Per CPU stats + * + * sctps_sc: array of pointer to per CPU stats. The i-th element in + * the array represents the stats of the CPU with cpu_seqid. + * sctps_sc_cnt: number of CPU stats in the sctps_sc array. + */ + sctp_stats_cpu_t **sctps_sc; + int sctps_sc_cnt; }; + typedef struct sctp_stack sctp_stack_t; #ifdef __cplusplus
--- a/usr/src/uts/common/inet/sctp/sctp_timer.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_timer.c Mon Jul 19 17:27:45 2010 -0700 @@ -57,6 +57,16 @@ clock_t sctp_tb_time_left; } sctp_tb_t; +/* + * Early abort threshold when the system is under pressure, sctps_reclaim + * is on. + * + * sctp_pa_early_abort: number of strikes per association before abort + * sctp_pp_early_abort: number of strikes per peer address before abort + */ +uint32_t sctp_pa_early_abort = 5; +uint32_t sctp_pp_early_abort = 3; + static void sctp_timer_fire(sctp_tb_t *); /* @@ -371,7 +381,7 @@ sctp->sctp_ack_timer_running = 0; sctp->sctp_sack_toggle = sctps->sctps_deferred_acks_max; - BUMP_MIB(&sctps->sctps_mib, sctpOutAckDelayed); + SCTPS_BUMP_MIB(sctps, sctpOutAckDelayed); (void) sctp_sack(sctp, NULL); } @@ -386,21 +396,21 @@ int64_t earliest_expiry; int cnt; sctp_stack_t *sctps = sctp->sctp_sctps; + int pp_max_retr; if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) { /* - * If there is a peer address with no strikes, - * don't give up yet. If enough other peer - * address are down, we could otherwise fail - * the association prematurely. This is a - * byproduct of our aggressive probe approach - * when a heartbeat fails to connect. We may - * wish to revisit this... + * If there is a peer address with no strikes, don't give up + * yet unless we are under memory pressure. If enough other + * peer address are down, we could otherwise fail the + * association prematurely. This is a byproduct of our + * aggressive probe approach when a heartbeat fails to + * connect. We may wish to revisit this... */ - if (!sctp_is_a_faddr_clean(sctp)) { + if (sctps->sctps_reclaim || !sctp_is_a_faddr_clean(sctp)) { /* time to give up */ - BUMP_MIB(&sctps->sctps_mib, sctpAborted); - BUMP_MIB(&sctps->sctps_mib, sctpTimHeartBeatDrop); + SCTPS_BUMP_MIB(sctps, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpTimHeartBeatDrop); sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ? sctp->sctp_client_errno : ETIMEDOUT); @@ -424,6 +434,11 @@ * be OK. */ for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { + if (sctps->sctps_reclaim) + pp_max_retr = MIN(sctp_pp_early_abort, fp->max_retr); + else + pp_max_retr = fp->max_retr; + /* * If the peer is unreachable because there is no available * source address, call sctp_get_dest() to see if it is @@ -438,7 +453,7 @@ sctp_get_dest(sctp, fp); if (fp->state == SCTP_FADDRS_UNREACH) { if (fp->hb_enabled && - ++fp->strikes > fp->max_retr && + ++fp->strikes > pp_max_retr && sctp_faddr_dead(sctp, fp, SCTP_FADDRS_DOWN) == -1) { /* Assoc is dead */ @@ -489,7 +504,7 @@ */ fp->rtt_updates = 0; fp->strikes++; - if (fp->strikes > fp->max_retr) { + if (fp->strikes > pp_max_retr) { if (sctp_faddr_dead(sctp, fp, SCTP_FADDRS_DOWN) == -1) { /* Assoc is dead */ @@ -570,6 +585,7 @@ mblk_t *mp; uint32_t rto_max = sctp->sctp_rto_max; sctp_stack_t *sctps = sctp->sctp_sctps; + int pp_max_retr, pa_max_retr; ASSERT(fp != NULL); @@ -578,22 +594,31 @@ fp->timer_running = 0; + if (!sctps->sctps_reclaim) { + pp_max_retr = fp->max_retr; + pa_max_retr = sctp->sctp_pa_max_rxt; + } else { + /* App may have set a very aggressive retransmission limit. */ + pp_max_retr = MIN(sctp_pp_early_abort, fp->max_retr); + pa_max_retr = MIN(sctp_pa_early_abort, sctp->sctp_pa_max_rxt); + } + /* Check is we've reached the max for retries */ if (sctp->sctp_state < SCTPS_ESTABLISHED) { if (fp->strikes >= sctp->sctp_max_init_rxt) { /* time to give up */ - BUMP_MIB(&sctps->sctps_mib, sctpAborted); - BUMP_MIB(&sctps->sctps_mib, sctpTimRetransDrop); + SCTPS_BUMP_MIB(sctps, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpTimRetransDrop); sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ? sctp->sctp_client_errno : ETIMEDOUT); return; } } else if (sctp->sctp_state >= SCTPS_ESTABLISHED) { - if (sctp->sctp_strikes >= sctp->sctp_pa_max_rxt) { + if (sctp->sctp_strikes >= pa_max_retr) { /* time to give up */ - BUMP_MIB(&sctps->sctps_mib, sctpAborted); - BUMP_MIB(&sctps->sctps_mib, sctpTimRetransDrop); + SCTPS_BUMP_MIB(sctps, sctpAborted); + SCTPS_BUMP_MIB(sctps, sctpTimRetransDrop); sctp_assoc_event(sctp, SCTP_COMM_LOST, 0, NULL); sctp_clean_death(sctp, sctp->sctp_client_errno ? sctp->sctp_client_errno : ETIMEDOUT); @@ -601,7 +626,7 @@ } } - if (fp->strikes >= fp->max_retr) { + if (fp->strikes >= pp_max_retr) { if (sctp_faddr_dead(sctp, fp, SCTP_FADDRS_DOWN) == -1) { return; } @@ -624,7 +649,7 @@ return; } - BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); + SCTPS_BUMP_MIB(sctps, sctpTimRetrans); sctp_rexmit(sctp, fp); /* @@ -643,7 +668,7 @@ */ mp = sctp_init_mp(sctp, fp); if (mp != NULL) { - BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); + SCTPS_BUMP_MIB(sctps, sctpTimRetrans); (void) conn_ip_output(mp, fp->ixa); BUMP_LOCAL(sctp->sctp_opkts); } @@ -660,13 +685,13 @@ break; (void) conn_ip_output(mp, fp->ixa); BUMP_LOCAL(sctp->sctp_opkts); - BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); + SCTPS_BUMP_MIB(sctps, sctpTimRetrans); rto_max = sctp->sctp_rto_max_init; break; case SCTPS_SHUTDOWN_SENT: BUMP_LOCAL(sctp->sctp_T2expire); sctp_send_shutdown(sctp, 1); - BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); + SCTPS_BUMP_MIB(sctps, sctpTimRetrans); break; case SCTPS_SHUTDOWN_ACK_SENT: /* We shouldn't have any more outstanding data */ @@ -676,7 +701,7 @@ BUMP_LOCAL(sctp->sctp_T2expire); (void) sctp_shutdown_received(sctp, NULL, B_FALSE, B_TRUE, NULL); - BUMP_MIB(&sctps->sctps_mib, sctpTimRetrans); + SCTPS_BUMP_MIB(sctps, sctpTimRetrans); break; default: ASSERT(0);
--- a/usr/src/uts/common/inet/sctp/sctp_tunables.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/sctp/sctp_tunables.c Mon Jul 19 17:27:45 2010 -0700 @@ -18,9 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <inet/ip.h> @@ -38,6 +38,132 @@ #define SCTP_MSS_MAX SCTP_MSS_MAX_IPV4 /* + * returns the current list of listener limit configuration. + */ +/* ARGSUSED */ +static int +sctp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname, + void *val, uint_t psize, uint_t flags) +{ + sctp_stack_t *sctps = (sctp_stack_t *)cbarg; + sctp_listener_t *sl; + char *pval = val; + size_t nbytes = 0, tbytes = 0; + uint_t size; + int err = 0; + + bzero(pval, psize); + size = psize; + + if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE)) + return (0); + + mutex_enter(&sctps->sctps_listener_conf_lock); + for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL; + sl = list_next(&sctps->sctps_listener_conf, sl)) { + if (psize == size) + nbytes = snprintf(pval, size, "%d:%d", sl->sl_port, + sl->sl_ratio); + else + nbytes = snprintf(pval, size, ",%d:%d", sl->sl_port, + sl->sl_ratio); + size -= nbytes; + pval += nbytes; + tbytes += nbytes; + if (tbytes >= psize) { + /* Buffer overflow, stop copying information */ + err = ENOBUFS; + break; + } + } + + mutex_exit(&sctps->sctps_listener_conf_lock); + return (err); +} + +/* + * add a new listener limit configuration. + */ +/* ARGSUSED */ +static int +sctp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, + const char *ifname, const void* pval, uint_t flags) +{ + sctp_listener_t *new_sl; + sctp_listener_t *sl; + long lport; + long ratio; + char *colon; + sctp_stack_t *sctps = (sctp_stack_t *)cbarg; + + if (flags & MOD_PROP_DEFAULT) + return (ENOTSUP); + + if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 || + lport > USHRT_MAX || *colon != ':') { + return (EINVAL); + } + if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0) + return (EINVAL); + + mutex_enter(&sctps->sctps_listener_conf_lock); + for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL; + sl = list_next(&sctps->sctps_listener_conf, sl)) { + /* There is an existing entry, so update its ratio value. */ + if (sl->sl_port == lport) { + sl->sl_ratio = ratio; + mutex_exit(&sctps->sctps_listener_conf_lock); + return (0); + } + } + + if ((new_sl = kmem_alloc(sizeof (sctp_listener_t), KM_NOSLEEP)) == + NULL) { + mutex_exit(&sctps->sctps_listener_conf_lock); + return (ENOMEM); + } + + new_sl->sl_port = lport; + new_sl->sl_ratio = ratio; + list_insert_tail(&sctps->sctps_listener_conf, new_sl); + mutex_exit(&sctps->sctps_listener_conf_lock); + return (0); +} + +/* + * remove a listener limit configuration. + */ +/* ARGSUSED */ +static int +sctp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, + const char *ifname, const void* pval, uint_t flags) +{ + sctp_listener_t *sl; + long lport; + sctp_stack_t *sctps = (sctp_stack_t *)cbarg; + + if (flags & MOD_PROP_DEFAULT) + return (ENOTSUP); + + if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 || + lport > USHRT_MAX) { + return (EINVAL); + } + mutex_enter(&sctps->sctps_listener_conf_lock); + for (sl = list_head(&sctps->sctps_listener_conf); sl != NULL; + sl = list_next(&sctps->sctps_listener_conf, sl)) { + if (sl->sl_port == lport) { + list_remove(&sctps->sctps_listener_conf, sl); + mutex_exit(&sctps->sctps_listener_conf_lock); + kmem_free(sl, sizeof (sctp_listener_t)); + return (0); + } + } + mutex_exit(&sctps->sctps_listener_conf_lock); + return (ESRCH); +} + +/* * All of these are alterable, within the min/max values given, at run time. * * Note: All those tunables which do not start with "sctp_" are Committed and @@ -210,6 +336,15 @@ mod_set_extra_privports, mod_get_extra_privports, {1, ULP_MAX_PORT, 0}, {0} }, + { "sctp_listener_limit_conf", MOD_PROTO_SCTP, + NULL, sctp_listener_conf_get, {0}, {0} }, + + { "sctp_listener_limit_conf_add", MOD_PROTO_SCTP, + sctp_listener_conf_add, NULL, {0}, {0} }, + + { "sctp_listener_limit_conf_del", MOD_PROTO_SCTP, + sctp_listener_conf_del, NULL, {0}, {0} }, + { "?", MOD_PROTO_SCTP, NULL, mod_get_allprop, {0}, {0} }, { NULL, 0, NULL, NULL, {0}, {0} }
--- a/usr/src/uts/common/inet/tcp/tcp.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tcp/tcp.c Mon Jul 19 17:27:45 2010 -0700 @@ -250,7 +250,10 @@ ((uint_t)(accid) & (TCP_ACCEPTOR_FANOUT_SIZE - 1)) #endif /* _ILP32 */ -/* Minimum number of connections per listener. */ +/* + * Minimum number of connections which can be created per listener. Used + * when the listener connection count is in effect. + */ static uint32_t tcp_min_conn_listener = 2; uint32_t tcp_early_abort = 30; @@ -400,8 +403,6 @@ extern mod_prop_info_t tcp_propinfo_tbl[]; extern int tcp_propinfo_count; -#define MB (1024 * 1024) - #define IS_VMLOANED_MBLK(mp) \ (((mp)->b_datap->db_struioflag & STRUIO_ZC) != 0) @@ -3700,10 +3701,6 @@ * set of tcp_stack_t's. */ netstack_register(NS_TCP, tcp_stack_init, NULL, tcp_stack_fini); - - mutex_enter(&cpu_lock); - register_cpu_setup_func(tcp_cpu_update, NULL); - mutex_exit(&cpu_lock); } @@ -3804,7 +3801,9 @@ * are not freed until the stack is going away. So there is no need * to grab a lock to access the per CPU tcps_sc[x] pointer. */ + mutex_enter(&cpu_lock); tcps->tcps_sc_cnt = MAX(ncpus, boot_ncpus); + mutex_exit(&cpu_lock); tcps->tcps_sc = kmem_zalloc(max_ncpus * sizeof (tcp_stats_cpu_t *), KM_SLEEP); for (i = 0; i < tcps->tcps_sc_cnt; i++) { @@ -3825,10 +3824,6 @@ void tcp_ddi_g_destroy(void) { - mutex_enter(&cpu_lock); - unregister_cpu_setup_func(tcp_cpu_update, NULL); - mutex_exit(&cpu_lock); - tcp_g_kstat_fini(tcp_g_kstat); tcp_g_kstat = NULL; bzero(&tcp_g_statistics, sizeof (tcp_g_statistics));
--- a/usr/src/uts/common/inet/tcp/tcp_misc.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tcp/tcp_misc.c Mon Jul 19 17:27:45 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -649,52 +648,22 @@ } /* - * Call back function for CPU state change. + * When a CPU is added, we need to allocate the per CPU stats struct. */ -/* ARGSUSED */ -int -tcp_cpu_update(cpu_setup_t what, int id, void *arg) +void +tcp_stack_cpu_add(tcp_stack_t *tcps, processorid_t cpu_seqid) { - cpu_t *cp; - netstack_handle_t nh; - netstack_t *ns; - tcp_stack_t *tcps; int i; - ASSERT(MUTEX_HELD(&cpu_lock)); - cp = cpu[id]; - - switch (what) { - case CPU_CONFIG: - case CPU_ON: - case CPU_INIT: - case CPU_CPUPART_IN: - netstack_next_init(&nh); - while ((ns = netstack_next(&nh)) != NULL) { - tcps = ns->netstack_tcp; - if (cp->cpu_seqid >= tcps->tcps_sc_cnt) { - for (i = tcps->tcps_sc_cnt; i <= cp->cpu_seqid; - i++) { - ASSERT(tcps->tcps_sc[i] == NULL); - tcps->tcps_sc[i] = kmem_zalloc( - sizeof (tcp_stats_cpu_t), KM_SLEEP); - } - membar_producer(); - tcps->tcps_sc_cnt = cp->cpu_seqid + 1; - } - netstack_rele(ns); - } - netstack_next_fini(&nh); - break; - case CPU_UNCONFIG: - case CPU_OFF: - case CPU_CPUPART_OUT: - /* Nothing to do */ - break; - default: - break; + if (cpu_seqid < tcps->tcps_sc_cnt) + return; + for (i = tcps->tcps_sc_cnt; i <= cpu_seqid; i++) { + ASSERT(tcps->tcps_sc[i] == NULL); + tcps->tcps_sc[i] = kmem_zalloc(sizeof (tcp_stats_cpu_t), + KM_SLEEP); } - return (0); + membar_producer(); + tcps->tcps_sc_cnt = cpu_seqid + 1; } /*
--- a/usr/src/uts/common/inet/tcp/tcp_stats.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tcp/tcp_stats.c Mon Jul 19 17:27:45 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -41,8 +40,8 @@ static int tcp_kstat2_update(kstat_t *kp, int rw); static void tcp_sum_mib(tcp_stack_t *, mib2_tcp_t *); -static void tcp_cp_mib(mib2_tcp_t *, mib2_tcp_t *); -static void tcp_cp_stats(tcp_stat_t *, tcp_stat_t *); +static void tcp_add_mib(mib2_tcp_t *, mib2_tcp_t *); +static void tcp_add_stats(tcp_stat_counter_t *, tcp_stat_t *); static void tcp_clr_stats(tcp_stat_t *); tcp_g_stat_t tcp_g_statistics; @@ -773,18 +772,18 @@ */ cnt = tcps->tcps_sc_cnt; for (i = 0; i < cnt; i++) - tcp_cp_stats(&tcps->tcps_sc[i]->tcp_sc_stats, stats); + tcp_add_stats(&tcps->tcps_sc[i]->tcp_sc_stats, stats); netstack_rele(ns); return (0); } /* - * To copy stats from one mib2_tcp_t to another. Static fields are not copied. + * To add stats from one mib2_tcp_t to another. Static fields are not added. * The caller should set them up propertly. */ void -tcp_cp_mib(mib2_tcp_t *from, mib2_tcp_t *to) +tcp_add_mib(mib2_tcp_t *from, mib2_tcp_t *to) { to->tcpActiveOpens += from->tcpActiveOpens; to->tcpPassiveOpens += from->tcpPassiveOpens; @@ -855,7 +854,7 @@ */ cnt = tcps->tcps_sc_cnt; for (i = 0; i < cnt; i++) - tcp_cp_mib(&tcps->tcps_sc[i]->tcp_sc_mib, tcp_mib); + tcp_add_mib(&tcps->tcps_sc[i]->tcp_sc_mib, tcp_mib); /* Fixed length structure for IPv4 and IPv6 counters */ SET_MIB(tcp_mib->tcpConnTableSize, sizeof (mib2_tcpConnEntry_t)); @@ -865,7 +864,7 @@ /* * To set all tcp_stat_t counters to 0. */ -void +static void tcp_clr_stats(tcp_stat_t *stats) { stats->tcp_time_wait_syn_success.value.ui64 = 0; @@ -921,106 +920,107 @@ } /* - * To copy counters from one tcp_stat_t to another. + * To add counters from the per CPU tcp_stat_counter_t to the stack + * tcp_stat_t. */ -void -tcp_cp_stats(tcp_stat_t *from, tcp_stat_t *to) +static void +tcp_add_stats(tcp_stat_counter_t *from, tcp_stat_t *to) { to->tcp_time_wait_syn_success.value.ui64 += - from->tcp_time_wait_syn_success.value.ui64; + from->tcp_time_wait_syn_success; to->tcp_clean_death_nondetached.value.ui64 += - from->tcp_clean_death_nondetached.value.ui64; + from->tcp_clean_death_nondetached; to->tcp_eager_blowoff_q.value.ui64 += - from->tcp_eager_blowoff_q.value.ui64; + from->tcp_eager_blowoff_q; to->tcp_eager_blowoff_q0.value.ui64 += - from->tcp_eager_blowoff_q0.value.ui64; + from->tcp_eager_blowoff_q0; to->tcp_no_listener.value.ui64 += - from->tcp_no_listener.value.ui64; + from->tcp_no_listener; to->tcp_listendrop.value.ui64 += - from->tcp_listendrop.value.ui64; + from->tcp_listendrop; to->tcp_listendropq0.value.ui64 += - from->tcp_listendropq0.value.ui64; + from->tcp_listendropq0; to->tcp_wsrv_called.value.ui64 += - from->tcp_wsrv_called.value.ui64; + from->tcp_wsrv_called; to->tcp_flwctl_on.value.ui64 += - from->tcp_flwctl_on.value.ui64; + from->tcp_flwctl_on; to->tcp_timer_fire_early.value.ui64 += - from->tcp_timer_fire_early.value.ui64; + from->tcp_timer_fire_early; to->tcp_timer_fire_miss.value.ui64 += - from->tcp_timer_fire_miss.value.ui64; + from->tcp_timer_fire_miss; to->tcp_zcopy_on.value.ui64 += - from->tcp_zcopy_on.value.ui64; + from->tcp_zcopy_on; to->tcp_zcopy_off.value.ui64 += - from->tcp_zcopy_off.value.ui64; + from->tcp_zcopy_off; to->tcp_zcopy_backoff.value.ui64 += - from->tcp_zcopy_backoff.value.ui64; + from->tcp_zcopy_backoff; to->tcp_fusion_flowctl.value.ui64 += - from->tcp_fusion_flowctl.value.ui64; + from->tcp_fusion_flowctl; to->tcp_fusion_backenabled.value.ui64 += - from->tcp_fusion_backenabled.value.ui64; + from->tcp_fusion_backenabled; to->tcp_fusion_urg.value.ui64 += - from->tcp_fusion_urg.value.ui64; + from->tcp_fusion_urg; to->tcp_fusion_putnext.value.ui64 += - from->tcp_fusion_putnext.value.ui64; + from->tcp_fusion_putnext; to->tcp_fusion_unfusable.value.ui64 += - from->tcp_fusion_unfusable.value.ui64; + from->tcp_fusion_unfusable; to->tcp_fusion_aborted.value.ui64 += - from->tcp_fusion_aborted.value.ui64; + from->tcp_fusion_aborted; to->tcp_fusion_unqualified.value.ui64 += - from->tcp_fusion_unqualified.value.ui64; + from->tcp_fusion_unqualified; to->tcp_fusion_rrw_busy.value.ui64 += - from->tcp_fusion_rrw_busy.value.ui64; + from->tcp_fusion_rrw_busy; to->tcp_fusion_rrw_msgcnt.value.ui64 += - from->tcp_fusion_rrw_msgcnt.value.ui64; + from->tcp_fusion_rrw_msgcnt; to->tcp_fusion_rrw_plugged.value.ui64 += - from->tcp_fusion_rrw_plugged.value.ui64; + from->tcp_fusion_rrw_plugged; to->tcp_in_ack_unsent_drop.value.ui64 += - from->tcp_in_ack_unsent_drop.value.ui64; + from->tcp_in_ack_unsent_drop; to->tcp_sock_fallback.value.ui64 += - from->tcp_sock_fallback.value.ui64; + from->tcp_sock_fallback; to->tcp_lso_enabled.value.ui64 += - from->tcp_lso_enabled.value.ui64; + from->tcp_lso_enabled; to->tcp_lso_disabled.value.ui64 += - from->tcp_lso_disabled.value.ui64; + from->tcp_lso_disabled; to->tcp_lso_times.value.ui64 += - from->tcp_lso_times.value.ui64; + from->tcp_lso_times; to->tcp_lso_pkt_out.value.ui64 += - from->tcp_lso_pkt_out.value.ui64; + from->tcp_lso_pkt_out; to->tcp_listen_cnt_drop.value.ui64 += - from->tcp_listen_cnt_drop.value.ui64; + from->tcp_listen_cnt_drop; to->tcp_listen_mem_drop.value.ui64 += - from->tcp_listen_mem_drop.value.ui64; + from->tcp_listen_mem_drop; to->tcp_zwin_mem_drop.value.ui64 += - from->tcp_zwin_mem_drop.value.ui64; + from->tcp_zwin_mem_drop; to->tcp_zwin_ack_syn.value.ui64 += - from->tcp_zwin_ack_syn.value.ui64; + from->tcp_zwin_ack_syn; to->tcp_rst_unsent.value.ui64 += - from->tcp_rst_unsent.value.ui64; + from->tcp_rst_unsent; to->tcp_reclaim_cnt.value.ui64 += - from->tcp_reclaim_cnt.value.ui64; + from->tcp_reclaim_cnt; to->tcp_reass_timeout.value.ui64 += - from->tcp_reass_timeout.value.ui64; + from->tcp_reass_timeout; #ifdef TCP_DEBUG_COUNTER to->tcp_time_wait.value.ui64 += - from->tcp_time_wait.value.ui64; + from->tcp_time_wait; to->tcp_rput_time_wait.value.ui64 += - from->tcp_rput_time_wait.value.ui64; + from->tcp_rput_time_wait; to->tcp_detach_time_wait.value.ui64 += - from->tcp_detach_time_wait.value.ui64; + from->tcp_detach_time_wait; to->tcp_timeout_calls.value.ui64 += - from->tcp_timeout_calls.value.ui64; + from->tcp_timeout_calls; to->tcp_timeout_cached_alloc.value.ui64 += - from->tcp_timeout_cached_alloc.value.ui64; + from->tcp_timeout_cached_alloc; to->tcp_timeout_cancel_reqs.value.ui64 += - from->tcp_timeout_cancel_reqs.value.ui64; + from->tcp_timeout_cancel_reqs; to->tcp_timeout_canceled.value.ui64 += - from->tcp_timeout_canceled.value.ui64; + from->tcp_timeout_canceled; to->tcp_timermp_freed.value.ui64 += - from->tcp_timermp_freed.value.ui64; + from->tcp_timermp_freed; to->tcp_push_timer_cnt.value.ui64 += - from->tcp_push_timer_cnt.value.ui64; + from->tcp_push_timer_cnt; to->tcp_ack_timer_cnt.value.ui64 += - from->tcp_ack_timer_cnt.value.ui64; + from->tcp_ack_timer_cnt; #endif }
--- a/usr/src/uts/common/inet/tcp/tcp_timers.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tcp/tcp_timers.c Mon Jul 19 17:27:45 2010 -0700 @@ -695,14 +695,18 @@ first_threshold = tcp->tcp_first_ctimer_threshold; second_threshold = tcp->tcp_second_ctimer_threshold; - /* Retransmit forever unless this is a passive open... */ + /* + * If an app has set the second_threshold to 0, it means that + * we need to retransmit forever, unless this is a passive + * open. We need to set second_threshold back to a normal + * value such that later comparison with it still makes + * sense. But we set dont_timeout to B_TRUE so that we will + * never time out. + */ if (second_threshold == 0) { - if (!tcp->tcp_active_open) { - second_threshold = - tcps->tcps_ip_abort_linterval; - } else { + second_threshold = tcps->tcps_ip_abort_linterval; + if (tcp->tcp_active_open) dont_timeout = B_TRUE; - } } break; case TCPS_ESTABLISHED: @@ -712,8 +716,10 @@ * forever. But if the end point is closed, the normal * timeout applies. */ - if (second_threshold == 0) + if (second_threshold == 0) { + second_threshold = tcps->tcps_ip_abort_linterval; dont_timeout = B_TRUE; + } /* FALLTHRU */ case TCPS_FIN_WAIT_1: case TCPS_CLOSING: @@ -892,8 +898,7 @@ dont_timeout = B_FALSE; } - if (!dont_timeout && second_threshold == 0) - second_threshold = tcps->tcps_ip_abort_interval; + ASSERT(second_threshold != 0); if ((ms = tcp->tcp_ms_we_have_waited) > second_threshold) { /* @@ -903,8 +908,14 @@ tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp, tcp->tcp_xmit_head, B_TRUE); - if (dont_timeout) + if (dont_timeout) { + /* + * Reset tcp_ms_we_have_waited to avoid overflow since + * we are going to retransmit forever. + */ + tcp->tcp_ms_we_have_waited = second_threshold; goto timer_rexmit; + } /* * For zero window probe, we need to send indefinitely,
--- a/usr/src/uts/common/inet/tcp/tcp_tunables.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tcp/tcp_tunables.c Mon Jul 19 17:27:45 2010 -0700 @@ -98,7 +98,7 @@ break; } } -ret: + mutex_exit(&tcps->tcps_listener_conf_lock); return (err); }
--- a/usr/src/uts/common/inet/tcp_impl.h Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tcp_impl.h Mon Jul 19 17:27:45 2010 -0700 @@ -308,8 +308,8 @@ * connection (or the listener) which decrements tlc_cnt to zero frees the * struct. * - * tlc_max is the threshold value tcps_conn_listen_port. It is set when the - * tcp_listen_cnt_t is allocated. + * tlc_max is the maximum number of concurrent TCP connections created from a + * listner. It is calculated when the tcp_listen_cnt_t is allocated. * * tlc_report_time stores the time when cmn_err() is called to report that the * max has been exceeeded. Report is done at most once every @@ -694,10 +694,10 @@ /* * Misc functions in tcp_misc.c. */ -extern int tcp_cpu_update(cpu_setup_t, int, void *); +extern uint32_t tcp_find_listener_conf(tcp_stack_t *, in_port_t); extern void tcp_ioctl_abort_conn(queue_t *, mblk_t *); -extern uint32_t tcp_find_listener_conf(tcp_stack_t *, in_port_t); extern void tcp_listener_conf_cleanup(tcp_stack_t *); +extern void tcp_stack_cpu_add(tcp_stack_t *, processorid_t); #endif /* _KERNEL */
--- a/usr/src/uts/common/inet/tcp_stack.h Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tcp_stack.h Mon Jul 19 17:27:45 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _INET_TCP_STACK_H @@ -77,8 +76,8 @@ * MIB-2 stuff for SNMP * Note: tcpInErrs {tcp 15} is accumulated in ip.c */ - kstat_t *tcps_mibkp; /* kstat exporting tcp_mib data */ - kstat_t *tcps_kstat; + kstat_t *tcps_mibkp; /* kstat exporting mib2_tcp_t data */ + kstat_t *tcps_kstat; /* kstat exporting tcp_stat_t data */ uint32_t tcps_iss_incr_extra; /* Incremented for each connection */
--- a/usr/src/uts/common/inet/tcp_stats.h Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tcp_stats.h Mon Jul 19 17:27:45 2010 -0700 @@ -18,9 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _INET_TCP_STATS_H @@ -91,8 +91,8 @@ kstat_named_t tcp_no_listener; kstat_named_t tcp_listendrop; kstat_named_t tcp_listendropq0; - kstat_named_t tcp_wsrv_called; - kstat_named_t tcp_flwctl_on; + kstat_named_t tcp_wsrv_called; + kstat_named_t tcp_flwctl_on; kstat_named_t tcp_timer_fire_early; kstat_named_t tcp_timer_fire_miss; kstat_named_t tcp_zcopy_on; @@ -135,6 +135,62 @@ #endif } tcp_stat_t; +/* + * This struct contains only the counter part of tcp_stat_t. It is used + * in tcp_stats_cpu_t instead of tcp_stat_t to save memory space. + */ +typedef struct tcp_stat_counter_s { + uint64_t tcp_time_wait_syn_success; + uint64_t tcp_clean_death_nondetached; + uint64_t tcp_eager_blowoff_q; + uint64_t tcp_eager_blowoff_q0; + uint64_t tcp_no_listener; + uint64_t tcp_listendrop; + uint64_t tcp_listendropq0; + uint64_t tcp_wsrv_called; + uint64_t tcp_flwctl_on; + uint64_t tcp_timer_fire_early; + uint64_t tcp_timer_fire_miss; + uint64_t tcp_zcopy_on; + uint64_t tcp_zcopy_off; + uint64_t tcp_zcopy_backoff; + uint64_t tcp_fusion_flowctl; + uint64_t tcp_fusion_backenabled; + uint64_t tcp_fusion_urg; + uint64_t tcp_fusion_putnext; + uint64_t tcp_fusion_unfusable; + uint64_t tcp_fusion_aborted; + uint64_t tcp_fusion_unqualified; + uint64_t tcp_fusion_rrw_busy; + uint64_t tcp_fusion_rrw_msgcnt; + uint64_t tcp_fusion_rrw_plugged; + uint64_t tcp_in_ack_unsent_drop; + uint64_t tcp_sock_fallback; + uint64_t tcp_lso_enabled; + uint64_t tcp_lso_disabled; + uint64_t tcp_lso_times; + uint64_t tcp_lso_pkt_out; + uint64_t tcp_listen_cnt_drop; + uint64_t tcp_listen_mem_drop; + uint64_t tcp_zwin_mem_drop; + uint64_t tcp_zwin_ack_syn; + uint64_t tcp_rst_unsent; + uint64_t tcp_reclaim_cnt; + uint64_t tcp_reass_timeout; +#ifdef TCP_DEBUG_COUNTER + uint64_t tcp_time_wait; + uint64_t tcp_rput_time_wait; + uint64_t tcp_detach_time_wait; + uint64_t tcp_timeout_calls; + uint64_t tcp_timeout_cached_alloc; + uint64_t tcp_timeout_cancel_reqs; + uint64_t tcp_timeout_canceled; + uint64_t tcp_timermp_freed; + uint64_t tcp_push_timer_cnt; + uint64_t tcp_ack_timer_cnt; +#endif +} tcp_stat_counter_t; + typedef struct tcp_g_stat { kstat_named_t tcp_timermp_alloced; kstat_named_t tcp_timermp_allocfail; @@ -144,9 +200,9 @@ /* Per CPU stats: TCP MIB2, TCP kstat and connection counter. */ typedef struct { - int64_t tcp_sc_conn_cnt; - mib2_tcp_t tcp_sc_mib; - tcp_stat_t tcp_sc_stats; + int64_t tcp_sc_conn_cnt; + mib2_tcp_t tcp_sc_mib; + tcp_stat_counter_t tcp_sc_stats; } tcp_stats_cpu_t; #define TCPS_BUMP_MIB(tcps, x) \ @@ -158,7 +214,7 @@ #if TCP_DEBUG_COUNTER #define TCP_DBGSTAT(tcps, x) \ atomic_inc_64( \ - &((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x.value.ui64)) + &((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x)) #define TCP_G_DBGSTAT(x) \ atomic_inc_64(&(tcp_g_statistics.x.value.ui64)) #else @@ -169,12 +225,13 @@ #define TCP_G_STAT(x) (tcp_g_statistics.x.value.ui64++) #define TCP_STAT(tcps, x) \ - ((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x.value.ui64++) + ((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x++) #define TCP_STAT_UPDATE(tcps, x, n) \ - ((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x.value.ui64 += (n)) + ((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x += (n)) #define TCP_STAT_SET(tcps, x, n) \ - ((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x.value.ui64 = (n)) + ((tcps)->tcps_sc[CPU->cpu_seqid]->tcp_sc_stats.x = (n)) +/* Global TCP stats for all IP stacks. */ extern tcp_g_stat_t tcp_g_statistics; extern kstat_t *tcp_g_kstat;
--- a/usr/src/uts/common/inet/tunables.h Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/tunables.h Mon Jul 19 17:27:45 2010 -0700 @@ -140,6 +140,8 @@ #define HOURS (60 * MINUTES) #define DAYS (24 * HOURS) +#define MB (1024 * 1024) + /* Largest TCP/UDP/SCTP port number */ #define ULP_MAX_PORT (64 * 1024 - 1)
--- a/usr/src/uts/common/inet/udp/udp.c Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/udp/udp.c Mon Jul 19 17:27:45 2010 -0700 @@ -181,13 +181,6 @@ static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); static void udp_stack_fini(netstackid_t stackid, void *arg); -static void *udp_kstat_init(netstackid_t stackid); -static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); -static void *udp_kstat2_init(netstackid_t, udp_stat_t *); -static void udp_kstat2_fini(netstackid_t, kstat_t *); -static int udp_kstat_update(kstat_t *kp, int rw); - - /* Common routines for TPI and socket module */ static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); @@ -1128,7 +1121,7 @@ udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + opt_length; if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { - BUMP_MIB(&us->us_udp_mib, udpInErrors); + UDPS_BUMP_MIB(us, udpInErrors); break; } @@ -2265,7 +2258,7 @@ /* Allocation failed. Drop packet */ mutex_exit(&connp->conn_lock); freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpInErrors); + UDPS_BUMP_MIB(us, udpInErrors); return; } mutex_exit(&connp->conn_lock); @@ -2328,7 +2321,7 @@ mp1 = allocb(udi_size, BPRI_MED); if (mp1 == NULL) { freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpInErrors); + UDPS_BUMP_MIB(us, udpInErrors); return; } mp1->b_cont = mp; @@ -2377,7 +2370,7 @@ mp1 = allocb(udi_size, BPRI_MED); if (mp1 == NULL) { freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpInErrors); + UDPS_BUMP_MIB(us, udpInErrors); return; } mp1->b_cont = mp; @@ -2440,312 +2433,13 @@ pkt_len -= hdr_length; } - BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); + UDPS_BUMP_MIB(us, udpHCInDatagrams); udp_ulp_recv(connp, mp1, pkt_len, ira); return; tossit: freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpInErrors); -} - -/* - * return SNMP stuff in buffer in mpdata. We don't hold any lock and report - * information that can be changing beneath us. - */ -mblk_t * -udp_snmp_get(queue_t *q, mblk_t *mpctl) -{ - mblk_t *mpdata; - mblk_t *mp_conn_ctl; - mblk_t *mp_attr_ctl; - mblk_t *mp6_conn_ctl; - mblk_t *mp6_attr_ctl; - mblk_t *mp_conn_tail; - mblk_t *mp_attr_tail; - mblk_t *mp6_conn_tail; - mblk_t *mp6_attr_tail; - struct opthdr *optp; - mib2_udpEntry_t ude; - mib2_udp6Entry_t ude6; - mib2_transportMLPEntry_t mlp; - int state; - zoneid_t zoneid; - int i; - connf_t *connfp; - conn_t *connp = Q_TO_CONN(q); - int v4_conn_idx; - int v6_conn_idx; - boolean_t needattr; - udp_t *udp; - ip_stack_t *ipst = connp->conn_netstack->netstack_ip; - udp_stack_t *us = connp->conn_netstack->netstack_udp; - mblk_t *mp2ctl; - - /* - * make a copy of the original message - */ - mp2ctl = copymsg(mpctl); - - mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; - if (mpctl == NULL || - (mpdata = mpctl->b_cont) == NULL || - (mp_conn_ctl = copymsg(mpctl)) == NULL || - (mp_attr_ctl = copymsg(mpctl)) == NULL || - (mp6_conn_ctl = copymsg(mpctl)) == NULL || - (mp6_attr_ctl = copymsg(mpctl)) == NULL) { - freemsg(mp_conn_ctl); - freemsg(mp_attr_ctl); - freemsg(mp6_conn_ctl); - freemsg(mpctl); - freemsg(mp2ctl); - return (0); - } - - zoneid = connp->conn_zoneid; - - /* fixed length structure for IPv4 and IPv6 counters */ - SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); - SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); - /* synchronize 64- and 32-bit counters */ - SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); - SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); - - optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; - optp->level = MIB2_UDP; - optp->name = 0; - (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, - sizeof (us->us_udp_mib)); - optp->len = msgdsize(mpdata); - qreply(q, mpctl); - - mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; - v4_conn_idx = v6_conn_idx = 0; - - for (i = 0; i < CONN_G_HASH_SIZE; i++) { - connfp = &ipst->ips_ipcl_globalhash_fanout[i]; - connp = NULL; - - while ((connp = ipcl_get_next_conn(connfp, connp, - IPCL_UDPCONN))) { - udp = connp->conn_udp; - if (zoneid != connp->conn_zoneid) - continue; - - /* - * Note that the port numbers are sent in - * host byte order - */ - - if (udp->udp_state == TS_UNBND) - state = MIB2_UDP_unbound; - else if (udp->udp_state == TS_IDLE) - state = MIB2_UDP_idle; - else if (udp->udp_state == TS_DATA_XFER) - state = MIB2_UDP_connected; - else - state = MIB2_UDP_unknown; - - needattr = B_FALSE; - bzero(&mlp, sizeof (mlp)); - if (connp->conn_mlp_type != mlptSingle) { - if (connp->conn_mlp_type == mlptShared || - connp->conn_mlp_type == mlptBoth) - mlp.tme_flags |= MIB2_TMEF_SHARED; - if (connp->conn_mlp_type == mlptPrivate || - connp->conn_mlp_type == mlptBoth) - mlp.tme_flags |= MIB2_TMEF_PRIVATE; - needattr = B_TRUE; - } - if (connp->conn_anon_mlp) { - mlp.tme_flags |= MIB2_TMEF_ANONMLP; - needattr = B_TRUE; - } - switch (connp->conn_mac_mode) { - case CONN_MAC_DEFAULT: - break; - case CONN_MAC_AWARE: - mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; - needattr = B_TRUE; - break; - case CONN_MAC_IMPLICIT: - mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; - needattr = B_TRUE; - break; - } - mutex_enter(&connp->conn_lock); - if (udp->udp_state == TS_DATA_XFER && - connp->conn_ixa->ixa_tsl != NULL) { - ts_label_t *tsl; - - tsl = connp->conn_ixa->ixa_tsl; - mlp.tme_flags |= MIB2_TMEF_IS_LABELED; - mlp.tme_doi = label2doi(tsl); - mlp.tme_label = *label2bslabel(tsl); - needattr = B_TRUE; - } - mutex_exit(&connp->conn_lock); - - /* - * Create an IPv4 table entry for IPv4 entries and also - * any IPv6 entries which are bound to in6addr_any - * (i.e. anything a IPv4 peer could connect/send to). - */ - if (connp->conn_ipversion == IPV4_VERSION || - (udp->udp_state <= TS_IDLE && - IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { - ude.udpEntryInfo.ue_state = state; - /* - * If in6addr_any this will set it to - * INADDR_ANY - */ - ude.udpLocalAddress = connp->conn_laddr_v4; - ude.udpLocalPort = ntohs(connp->conn_lport); - if (udp->udp_state == TS_DATA_XFER) { - /* - * Can potentially get here for - * v6 socket if another process - * (say, ping) has just done a - * sendto(), changing the state - * from the TS_IDLE above to - * TS_DATA_XFER by the time we hit - * this part of the code. - */ - ude.udpEntryInfo.ue_RemoteAddress = - connp->conn_faddr_v4; - ude.udpEntryInfo.ue_RemotePort = - ntohs(connp->conn_fport); - } else { - ude.udpEntryInfo.ue_RemoteAddress = 0; - ude.udpEntryInfo.ue_RemotePort = 0; - } - - /* - * We make the assumption that all udp_t - * structs will be created within an address - * region no larger than 32-bits. - */ - ude.udpInstance = (uint32_t)(uintptr_t)udp; - ude.udpCreationProcess = - (connp->conn_cpid < 0) ? - MIB2_UNKNOWN_PROCESS : - connp->conn_cpid; - ude.udpCreationTime = connp->conn_open_time; - - (void) snmp_append_data2(mp_conn_ctl->b_cont, - &mp_conn_tail, (char *)&ude, sizeof (ude)); - mlp.tme_connidx = v4_conn_idx++; - if (needattr) - (void) snmp_append_data2( - mp_attr_ctl->b_cont, &mp_attr_tail, - (char *)&mlp, sizeof (mlp)); - } - if (connp->conn_ipversion == IPV6_VERSION) { - ude6.udp6EntryInfo.ue_state = state; - ude6.udp6LocalAddress = connp->conn_laddr_v6; - ude6.udp6LocalPort = ntohs(connp->conn_lport); - mutex_enter(&connp->conn_lock); - if (connp->conn_ixa->ixa_flags & - IXAF_SCOPEID_SET) { - ude6.udp6IfIndex = - connp->conn_ixa->ixa_scopeid; - } else { - ude6.udp6IfIndex = connp->conn_bound_if; - } - mutex_exit(&connp->conn_lock); - if (udp->udp_state == TS_DATA_XFER) { - ude6.udp6EntryInfo.ue_RemoteAddress = - connp->conn_faddr_v6; - ude6.udp6EntryInfo.ue_RemotePort = - ntohs(connp->conn_fport); - } else { - ude6.udp6EntryInfo.ue_RemoteAddress = - sin6_null.sin6_addr; - ude6.udp6EntryInfo.ue_RemotePort = 0; - } - /* - * We make the assumption that all udp_t - * structs will be created within an address - * region no larger than 32-bits. - */ - ude6.udp6Instance = (uint32_t)(uintptr_t)udp; - ude6.udp6CreationProcess = - (connp->conn_cpid < 0) ? - MIB2_UNKNOWN_PROCESS : - connp->conn_cpid; - ude6.udp6CreationTime = connp->conn_open_time; - - (void) snmp_append_data2(mp6_conn_ctl->b_cont, - &mp6_conn_tail, (char *)&ude6, - sizeof (ude6)); - mlp.tme_connidx = v6_conn_idx++; - if (needattr) - (void) snmp_append_data2( - mp6_attr_ctl->b_cont, - &mp6_attr_tail, (char *)&mlp, - sizeof (mlp)); - } - } - } - - /* IPv4 UDP endpoints */ - optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ - sizeof (struct T_optmgmt_ack)]; - optp->level = MIB2_UDP; - optp->name = MIB2_UDP_ENTRY; - optp->len = msgdsize(mp_conn_ctl->b_cont); - qreply(q, mp_conn_ctl); - - /* table of MLP attributes... */ - optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ - sizeof (struct T_optmgmt_ack)]; - optp->level = MIB2_UDP; - optp->name = EXPER_XPORT_MLP; - optp->len = msgdsize(mp_attr_ctl->b_cont); - if (optp->len == 0) - freemsg(mp_attr_ctl); - else - qreply(q, mp_attr_ctl); - - /* IPv6 UDP endpoints */ - optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ - sizeof (struct T_optmgmt_ack)]; - optp->level = MIB2_UDP6; - optp->name = MIB2_UDP6_ENTRY; - optp->len = msgdsize(mp6_conn_ctl->b_cont); - qreply(q, mp6_conn_ctl); - - /* table of MLP attributes... */ - optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ - sizeof (struct T_optmgmt_ack)]; - optp->level = MIB2_UDP6; - optp->name = EXPER_XPORT_MLP; - optp->len = msgdsize(mp6_attr_ctl->b_cont); - if (optp->len == 0) - freemsg(mp6_attr_ctl); - else - qreply(q, mp6_attr_ctl); - - return (mp2ctl); -} - -/* - * Return 0 if invalid set request, 1 otherwise, including non-udp requests. - * NOTE: Per MIB-II, UDP has no writable data. - * TODO: If this ever actually tries to set anything, it needs to be - * to do the appropriate locking. - */ -/* ARGSUSED */ -int -udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, - uchar_t *ptr, int len) -{ - switch (level) { - case MIB2_UDP: - return (0); - default: - return (1); - } + UDPS_BUMP_MIB(us, udpInErrors); } /* @@ -2922,7 +2616,7 @@ */ ixa = conn_get_ixa_exclusive(connp); if (ixa == NULL) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(mp); return (ENOMEM); } @@ -2945,7 +2639,7 @@ ixa->ixa_cred = connp->conn_cred; /* Restore */ ixa->ixa_cpid = connp->conn_cpid; ixa_refrele(ixa); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(mp); return (ENOMEM); } @@ -2953,7 +2647,7 @@ error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); mutex_exit(&connp->conn_lock); if (error != 0) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(mp); goto done; } @@ -2989,7 +2683,7 @@ * module for "is_absreq_failure" */ freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); goto done; } ASSERT(is_absreq_failure == 0); @@ -3085,7 +2779,7 @@ default: failed: freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); goto done; } @@ -3101,7 +2795,7 @@ /* Using UDP MLP requires SCM_UCRED from user */ if (connp->conn_mlp_type != mlptSingle && !((ixa->ixa_flags & IXAF_UCRED_TSL))) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); error = ECONNREFUSED; freemsg(mp); goto done; @@ -3121,7 +2815,7 @@ error = conn_update_label(connp, ixa, &v6dst, ipp); if (error != 0) { freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); goto done; } } @@ -3129,17 +2823,17 @@ flowinfo, mp, &error); if (mp == NULL) { ASSERT(error != 0); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); goto done; } if (ixa->ixa_pktlen > IP_MAXPACKET) { error = EMSGSIZE; - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(mp); goto done; } /* We're done. Pass the packet to ip. */ - BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); + UDPS_BUMP_MIB(us, udpHCOutDatagrams); DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, @@ -3203,7 +2897,7 @@ */ ixa = conn_get_ixa(connp, B_FALSE); if (ixa == NULL) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(mp); return (ENOMEM); } @@ -3224,7 +2918,7 @@ ixa->ixa_cred = connp->conn_cred; /* Restore */ ixa->ixa_cpid = connp->conn_cpid; ixa_refrele(ixa); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(mp); return (error); } @@ -3283,7 +2977,7 @@ ixa->ixa_cpid = connp->conn_cpid; ixa_refrele(ixa); freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (error); } } else { @@ -3293,7 +2987,7 @@ ASSERT(ixa->ixa_ire != NULL); /* We're done. Pass the packet to ip. */ - BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); + UDPS_BUMP_MIB(us, udpHCOutDatagrams); DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, @@ -3355,7 +3049,7 @@ ixa->ixa_cred = connp->conn_cred; /* Restore */ ixa->ixa_cpid = connp->conn_cpid; ixa_refrele(ixa); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(mp); return (error); } @@ -3414,7 +3108,7 @@ ixa->ixa_cpid = connp->conn_cpid; ixa_refrele(ixa); freemsg(mp); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (error); } } else { @@ -3423,7 +3117,7 @@ } /* We're done. Pass the packet to ip. */ - BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); + UDPS_BUMP_MIB(us, udpHCOutDatagrams); DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, @@ -3679,7 +3373,7 @@ case M_DATA: if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { /* Not connected; address is required */ - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); UDP_DBGSTAT(us, udp_data_notconn); UDP_STAT(us, udp_out_err_notconn); freemsg(mp); @@ -3694,7 +3388,7 @@ cr = msg_getcred(mp, &pid); ASSERT(cr != NULL); if (cr == NULL) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(mp); return; } @@ -3946,7 +3640,7 @@ return; ud_error2: - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); freemsg(data_mp); UDP_STAT(us, udp_out_err_output); ASSERT(mp != NULL); @@ -4227,7 +3921,7 @@ } /* We're done. Pass the packet to ip. */ - BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); + UDPS_BUMP_MIB(us, udpHCOutDatagrams); DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *, @@ -4276,7 +3970,7 @@ ixa_refrele(ixa); freemsg(data_mp); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); UDP_STAT(us, udp_out_err_output); return (error); } @@ -4688,7 +4382,18 @@ KM_SLEEP); bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz); - us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); + /* Allocate the per netstack stats */ + mutex_enter(&cpu_lock); + us->us_sc_cnt = MAX(ncpus, boot_ncpus); + mutex_exit(&cpu_lock); + us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *), + KM_SLEEP); + for (i = 0; i < us->us_sc_cnt; i++) { + us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), + KM_SLEEP); + } + + us->us_kstat = udp_kstat2_init(stackid); us->us_mibkp = udp_kstat_init(stackid); major = mod_name_to_major(INET_NAME); @@ -4715,6 +4420,10 @@ us->us_bind_fanout = NULL; + for (i = 0; i < us->us_sc_cnt; i++) + kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t)); + kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *)); + kmem_free(us->us_propinfo_tbl, udp_propinfo_count * sizeof (mod_prop_info_t)); us->us_propinfo_tbl = NULL; @@ -4724,132 +4433,12 @@ udp_kstat2_fini(stackid, us->us_kstat); us->us_kstat = NULL; - bzero(&us->us_statistics, sizeof (us->us_statistics)); mutex_destroy(&us->us_epriv_port_lock); ldi_ident_release(us->us_ldi_ident); kmem_free(us, sizeof (*us)); } -static void * -udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) -{ - kstat_t *ksp; - - udp_stat_t template = { - { "udp_sock_fallback", KSTAT_DATA_UINT64 }, - { "udp_out_opt", KSTAT_DATA_UINT64 }, - { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, - { "udp_out_err_output", KSTAT_DATA_UINT64 }, - { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, -#ifdef DEBUG - { "udp_data_conn", KSTAT_DATA_UINT64 }, - { "udp_data_notconn", KSTAT_DATA_UINT64 }, - { "udp_out_lastdst", KSTAT_DATA_UINT64 }, - { "udp_out_diffdst", KSTAT_DATA_UINT64 }, - { "udp_out_ipv6", KSTAT_DATA_UINT64 }, - { "udp_out_mapped", KSTAT_DATA_UINT64 }, - { "udp_out_ipv4", KSTAT_DATA_UINT64 }, -#endif - }; - - ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", - KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL, stackid); - - if (ksp == NULL) - return (NULL); - - bcopy(&template, us_statisticsp, sizeof (template)); - ksp->ks_data = (void *)us_statisticsp; - ksp->ks_private = (void *)(uintptr_t)stackid; - - kstat_install(ksp); - return (ksp); -} - -static void -udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) -{ - if (ksp != NULL) { - ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); - kstat_delete_netstack(ksp, stackid); - } -} - -static void * -udp_kstat_init(netstackid_t stackid) -{ - kstat_t *ksp; - - udp_named_kstat_t template = { - { "inDatagrams", KSTAT_DATA_UINT64, 0 }, - { "inErrors", KSTAT_DATA_UINT32, 0 }, - { "outDatagrams", KSTAT_DATA_UINT64, 0 }, - { "entrySize", KSTAT_DATA_INT32, 0 }, - { "entry6Size", KSTAT_DATA_INT32, 0 }, - { "outErrors", KSTAT_DATA_UINT32, 0 }, - }; - - ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", - KSTAT_TYPE_NAMED, - NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); - - if (ksp == NULL || ksp->ks_data == NULL) - return (NULL); - - template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); - template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); - - bcopy(&template, ksp->ks_data, sizeof (template)); - ksp->ks_update = udp_kstat_update; - ksp->ks_private = (void *)(uintptr_t)stackid; - - kstat_install(ksp); - return (ksp); -} - -static void -udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) -{ - if (ksp != NULL) { - ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); - kstat_delete_netstack(ksp, stackid); - } -} - -static int -udp_kstat_update(kstat_t *kp, int rw) -{ - udp_named_kstat_t *udpkp; - netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; - netstack_t *ns; - udp_stack_t *us; - - if ((kp == NULL) || (kp->ks_data == NULL)) - return (EIO); - - if (rw == KSTAT_WRITE) - return (EACCES); - - ns = netstack_find_by_stackid(stackid); - if (ns == NULL) - return (-1); - us = ns->netstack_udp; - if (us == NULL) { - netstack_rele(ns); - return (-1); - } - udpkp = (udp_named_kstat_t *)kp->ks_data; - - udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; - udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; - udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; - udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; - netstack_rele(ns); - return (0); -} - static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size) { @@ -4897,6 +4486,25 @@ } /* + * When a CPU is added, we need to allocate the per CPU stats struct. + */ +void +udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid) +{ + int i; + + if (cpu_seqid < us->us_sc_cnt) + return; + for (i = us->us_sc_cnt; i <= cpu_seqid; i++) { + ASSERT(us->us_sc[i] == NULL); + us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), + KM_SLEEP); + } + membar_producer(); + us->us_sc_cnt = cpu_seqid + 1; +} + +/* * Below routines for UDP socket module. */ @@ -6297,7 +5905,7 @@ /* Connected? */ if (msg->msg_name == NULL) { if (udp->udp_state != TS_DATA_XFER) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (EDESTADDRREQ); } if (msg->msg_controllen != 0) { @@ -6312,13 +5920,13 @@ return (error); } if (udp->udp_state == TS_DATA_XFER) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (EISCONN); } error = proto_verify_ip_addr(connp->conn_family, (struct sockaddr *)msg->msg_name, msg->msg_namelen); if (error != 0) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (error); } switch (connp->conn_family) { @@ -6341,7 +5949,7 @@ * since it is bound to a mapped address. */ if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (EADDRNOTAVAIL); } if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) @@ -6349,7 +5957,7 @@ ipversion = IPV6_VERSION; } else { if (connp->conn_ipv6_v6only) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (EADDRNOTAVAIL); } @@ -6362,7 +5970,7 @@ */ if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (EADDRNOTAVAIL); } @@ -6382,7 +5990,7 @@ if (msg->msg_controllen == 0) { ixa = conn_get_ixa(connp, B_FALSE); if (ixa == NULL) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (ENOMEM); } } else { @@ -6402,7 +6010,7 @@ &sin2->sin6_addr) && sin6->sin6_family == sin2->sin6_family) { mutex_exit(&connp->conn_lock); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); if (ixa != NULL) ixa_refrele(ixa); return (error); @@ -6445,7 +6053,7 @@ if (msg->msg_controllen == 0) { ixa = conn_get_ixa(connp, B_FALSE); if (ixa == NULL) { - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); return (ENOMEM); } } else { @@ -6463,7 +6071,7 @@ if (sin->sin_port == sin2->sin_port && sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { mutex_exit(&connp->conn_lock); - BUMP_MIB(&us->us_udp_mib, udpOutErrors); + UDPS_BUMP_MIB(us, udpOutErrors); if (ixa != NULL) ixa_refrele(ixa); return (error);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/inet/udp/udp_stats.c Mon Jul 19 17:27:45 2010 -0700 @@ -0,0 +1,578 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/types.h> +#include <sys/tihdr.h> +#include <sys/policy.h> +#include <sys/tsol/tnet.h> + +#include <inet/common.h> +#include <inet/kstatcom.h> +#include <inet/snmpcom.h> +#include <inet/mib2.h> +#include <inet/optcom.h> +#include <inet/snmpcom.h> +#include <inet/kstatcom.h> +#include <inet/udp_impl.h> + +static int udp_kstat_update(kstat_t *, int); +static int udp_kstat2_update(kstat_t *, int); +static void udp_sum_mib(udp_stack_t *, mib2_udp_t *); +static void udp_clr_stats(udp_stat_t *); +static void udp_add_stats(udp_stat_counter_t *, udp_stat_t *); +static void udp_add_mib(mib2_udp_t *, mib2_udp_t *); +/* + * return SNMP stuff in buffer in mpdata. We don't hold any lock and report + * information that can be changing beneath us. + */ +mblk_t * +udp_snmp_get(queue_t *q, mblk_t *mpctl) +{ + mblk_t *mpdata; + mblk_t *mp_conn_ctl; + mblk_t *mp_attr_ctl; + mblk_t *mp6_conn_ctl; + mblk_t *mp6_attr_ctl; + mblk_t *mp_conn_tail; + mblk_t *mp_attr_tail; + mblk_t *mp6_conn_tail; + mblk_t *mp6_attr_tail; + struct opthdr *optp; + mib2_udpEntry_t ude; + mib2_udp6Entry_t ude6; + mib2_transportMLPEntry_t mlp; + int state; + zoneid_t zoneid; + int i; + connf_t *connfp; + conn_t *connp = Q_TO_CONN(q); + int v4_conn_idx; + int v6_conn_idx; + boolean_t needattr; + udp_t *udp; + ip_stack_t *ipst = connp->conn_netstack->netstack_ip; + udp_stack_t *us = connp->conn_netstack->netstack_udp; + mblk_t *mp2ctl; + mib2_udp_t udp_mib; + + /* + * make a copy of the original message + */ + mp2ctl = copymsg(mpctl); + + mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; + if (mpctl == NULL || + (mpdata = mpctl->b_cont) == NULL || + (mp_conn_ctl = copymsg(mpctl)) == NULL || + (mp_attr_ctl = copymsg(mpctl)) == NULL || + (mp6_conn_ctl = copymsg(mpctl)) == NULL || + (mp6_attr_ctl = copymsg(mpctl)) == NULL) { + freemsg(mp_conn_ctl); + freemsg(mp_attr_ctl); + freemsg(mp6_conn_ctl); + freemsg(mpctl); + freemsg(mp2ctl); + return (0); + } + + zoneid = connp->conn_zoneid; + + bzero(&udp_mib, sizeof (udp_mib)); + /* fixed length structure for IPv4 and IPv6 counters */ + SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); + SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); + + udp_sum_mib(us, &udp_mib); + + /* + * Synchronize 32- and 64-bit counters. Note that udpInDatagrams and + * udpOutDatagrams are not updated anywhere in UDP. The new 64 bits + * counters are used. Hence the old counters' values in us_sc_mib + * are always 0. + */ + SYNC32_MIB(&udp_mib, udpInDatagrams, udpHCInDatagrams); + SYNC32_MIB(&udp_mib, udpOutDatagrams, udpHCOutDatagrams); + + optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; + optp->level = MIB2_UDP; + optp->name = 0; + (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); + optp->len = msgdsize(mpdata); + qreply(q, mpctl); + + mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; + v4_conn_idx = v6_conn_idx = 0; + + for (i = 0; i < CONN_G_HASH_SIZE; i++) { + connfp = &ipst->ips_ipcl_globalhash_fanout[i]; + connp = NULL; + + while ((connp = ipcl_get_next_conn(connfp, connp, + IPCL_UDPCONN))) { + udp = connp->conn_udp; + if (zoneid != connp->conn_zoneid) + continue; + + /* + * Note that the port numbers are sent in + * host byte order + */ + + if (udp->udp_state == TS_UNBND) + state = MIB2_UDP_unbound; + else if (udp->udp_state == TS_IDLE) + state = MIB2_UDP_idle; + else if (udp->udp_state == TS_DATA_XFER) + state = MIB2_UDP_connected; + else + state = MIB2_UDP_unknown; + + needattr = B_FALSE; + bzero(&mlp, sizeof (mlp)); + if (connp->conn_mlp_type != mlptSingle) { + if (connp->conn_mlp_type == mlptShared || + connp->conn_mlp_type == mlptBoth) + mlp.tme_flags |= MIB2_TMEF_SHARED; + if (connp->conn_mlp_type == mlptPrivate || + connp->conn_mlp_type == mlptBoth) + mlp.tme_flags |= MIB2_TMEF_PRIVATE; + needattr = B_TRUE; + } + if (connp->conn_anon_mlp) { + mlp.tme_flags |= MIB2_TMEF_ANONMLP; + needattr = B_TRUE; + } + switch (connp->conn_mac_mode) { + case CONN_MAC_DEFAULT: + break; + case CONN_MAC_AWARE: + mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; + needattr = B_TRUE; + break; + case CONN_MAC_IMPLICIT: + mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; + needattr = B_TRUE; + break; + } + mutex_enter(&connp->conn_lock); + if (udp->udp_state == TS_DATA_XFER && + connp->conn_ixa->ixa_tsl != NULL) { + ts_label_t *tsl; + + tsl = connp->conn_ixa->ixa_tsl; + mlp.tme_flags |= MIB2_TMEF_IS_LABELED; + mlp.tme_doi = label2doi(tsl); + mlp.tme_label = *label2bslabel(tsl); + needattr = B_TRUE; + } + mutex_exit(&connp->conn_lock); + + /* + * Create an IPv4 table entry for IPv4 entries and also + * any IPv6 entries which are bound to in6addr_any + * (i.e. anything a IPv4 peer could connect/send to). + */ + if (connp->conn_ipversion == IPV4_VERSION || + (udp->udp_state <= TS_IDLE && + IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { + ude.udpEntryInfo.ue_state = state; + /* + * If in6addr_any this will set it to + * INADDR_ANY + */ + ude.udpLocalAddress = connp->conn_laddr_v4; + ude.udpLocalPort = ntohs(connp->conn_lport); + if (udp->udp_state == TS_DATA_XFER) { + /* + * Can potentially get here for + * v6 socket if another process + * (say, ping) has just done a + * sendto(), changing the state + * from the TS_IDLE above to + * TS_DATA_XFER by the time we hit + * this part of the code. + */ + ude.udpEntryInfo.ue_RemoteAddress = + connp->conn_faddr_v4; + ude.udpEntryInfo.ue_RemotePort = + ntohs(connp->conn_fport); + } else { + ude.udpEntryInfo.ue_RemoteAddress = 0; + ude.udpEntryInfo.ue_RemotePort = 0; + } + + /* + * We make the assumption that all udp_t + * structs will be created within an address + * region no larger than 32-bits. + */ + ude.udpInstance = (uint32_t)(uintptr_t)udp; + ude.udpCreationProcess = + (connp->conn_cpid < 0) ? + MIB2_UNKNOWN_PROCESS : + connp->conn_cpid; + ude.udpCreationTime = connp->conn_open_time; + + (void) snmp_append_data2(mp_conn_ctl->b_cont, + &mp_conn_tail, (char *)&ude, sizeof (ude)); + mlp.tme_connidx = v4_conn_idx++; + if (needattr) + (void) snmp_append_data2( + mp_attr_ctl->b_cont, &mp_attr_tail, + (char *)&mlp, sizeof (mlp)); + } + if (connp->conn_ipversion == IPV6_VERSION) { + ude6.udp6EntryInfo.ue_state = state; + ude6.udp6LocalAddress = connp->conn_laddr_v6; + ude6.udp6LocalPort = ntohs(connp->conn_lport); + mutex_enter(&connp->conn_lock); + if (connp->conn_ixa->ixa_flags & + IXAF_SCOPEID_SET) { + ude6.udp6IfIndex = + connp->conn_ixa->ixa_scopeid; + } else { + ude6.udp6IfIndex = connp->conn_bound_if; + } + mutex_exit(&connp->conn_lock); + if (udp->udp_state == TS_DATA_XFER) { + ude6.udp6EntryInfo.ue_RemoteAddress = + connp->conn_faddr_v6; + ude6.udp6EntryInfo.ue_RemotePort = + ntohs(connp->conn_fport); + } else { + ude6.udp6EntryInfo.ue_RemoteAddress = + sin6_null.sin6_addr; + ude6.udp6EntryInfo.ue_RemotePort = 0; + } + /* + * We make the assumption that all udp_t + * structs will be created within an address + * region no larger than 32-bits. + */ + ude6.udp6Instance = (uint32_t)(uintptr_t)udp; + ude6.udp6CreationProcess = + (connp->conn_cpid < 0) ? + MIB2_UNKNOWN_PROCESS : + connp->conn_cpid; + ude6.udp6CreationTime = connp->conn_open_time; + + (void) snmp_append_data2(mp6_conn_ctl->b_cont, + &mp6_conn_tail, (char *)&ude6, + sizeof (ude6)); + mlp.tme_connidx = v6_conn_idx++; + if (needattr) + (void) snmp_append_data2( + mp6_attr_ctl->b_cont, + &mp6_attr_tail, (char *)&mlp, + sizeof (mlp)); + } + } + } + + /* IPv4 UDP endpoints */ + optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ + sizeof (struct T_optmgmt_ack)]; + optp->level = MIB2_UDP; + optp->name = MIB2_UDP_ENTRY; + optp->len = msgdsize(mp_conn_ctl->b_cont); + qreply(q, mp_conn_ctl); + + /* table of MLP attributes... */ + optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ + sizeof (struct T_optmgmt_ack)]; + optp->level = MIB2_UDP; + optp->name = EXPER_XPORT_MLP; + optp->len = msgdsize(mp_attr_ctl->b_cont); + if (optp->len == 0) + freemsg(mp_attr_ctl); + else + qreply(q, mp_attr_ctl); + + /* IPv6 UDP endpoints */ + optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ + sizeof (struct T_optmgmt_ack)]; + optp->level = MIB2_UDP6; + optp->name = MIB2_UDP6_ENTRY; + optp->len = msgdsize(mp6_conn_ctl->b_cont); + qreply(q, mp6_conn_ctl); + + /* table of MLP attributes... */ + optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ + sizeof (struct T_optmgmt_ack)]; + optp->level = MIB2_UDP6; + optp->name = EXPER_XPORT_MLP; + optp->len = msgdsize(mp6_attr_ctl->b_cont); + if (optp->len == 0) + freemsg(mp6_attr_ctl); + else + qreply(q, mp6_attr_ctl); + + return (mp2ctl); +} + +/* + * Return 0 if invalid set request, 1 otherwise, including non-udp requests. + * NOTE: Per MIB-II, UDP has no writable data. + * TODO: If this ever actually tries to set anything, it needs to be + * to do the appropriate locking. + */ +/* ARGSUSED */ +int +udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, + uchar_t *ptr, int len) +{ + switch (level) { + case MIB2_UDP: + return (0); + default: + return (1); + } +} + +void +udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) +{ + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); + } +} + +/* + * To add stats from one mib2_udp_t to another. Static fields are not added. + * The caller should set them up propertly. + */ +static void +udp_add_mib(mib2_udp_t *from, mib2_udp_t *to) +{ + to->udpHCInDatagrams += from->udpHCInDatagrams; + to->udpInErrors += from->udpInErrors; + to->udpHCOutDatagrams += from->udpHCOutDatagrams; + to->udpOutErrors += from->udpOutErrors; +} + + +void * +udp_kstat2_init(netstackid_t stackid) +{ + kstat_t *ksp; + + udp_stat_t template = { + { "udp_sock_fallback", KSTAT_DATA_UINT64 }, + { "udp_out_opt", KSTAT_DATA_UINT64 }, + { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, + { "udp_out_err_output", KSTAT_DATA_UINT64 }, + { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, +#ifdef DEBUG + { "udp_data_conn", KSTAT_DATA_UINT64 }, + { "udp_data_notconn", KSTAT_DATA_UINT64 }, + { "udp_out_lastdst", KSTAT_DATA_UINT64 }, + { "udp_out_diffdst", KSTAT_DATA_UINT64 }, + { "udp_out_ipv6", KSTAT_DATA_UINT64 }, + { "udp_out_mapped", KSTAT_DATA_UINT64 }, + { "udp_out_ipv4", KSTAT_DATA_UINT64 }, +#endif + }; + + ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", + KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), + 0, stackid); + + if (ksp == NULL) + return (NULL); + + bcopy(&template, ksp->ks_data, sizeof (template)); + ksp->ks_update = udp_kstat2_update; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); +} + +void +udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) +{ + if (ksp != NULL) { + ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); + kstat_delete_netstack(ksp, stackid); + } +} + +/* + * To copy counters from the per CPU udpp_stat_counter_t to the stack + * udp_stat_t. + */ +static void +udp_add_stats(udp_stat_counter_t *from, udp_stat_t *to) +{ + to->udp_sock_fallback.value.ui64 += from->udp_sock_fallback; + to->udp_out_opt.value.ui64 += from->udp_out_opt; + to->udp_out_err_notconn.value.ui64 += from->udp_out_err_notconn; + to->udp_out_err_output.value.ui64 += from->udp_out_err_output; + to->udp_out_err_tudr.value.ui64 += from->udp_out_err_tudr; +#ifdef DEBUG + to->udp_data_conn.value.ui64 += from->udp_data_conn; + to->udp_data_notconn.value.ui64 += from->udp_data_notconn; + to->udp_out_lastdst.value.ui64 += from->udp_out_lastdst; + to->udp_out_diffdst.value.ui64 += from->udp_out_diffdst; + to->udp_out_ipv6.value.ui64 += from->udp_out_ipv6; + to->udp_out_mapped.value.ui64 += from->udp_out_mapped; + to->udp_out_ipv4.value.ui64 += from->udp_out_ipv4; +#endif +} + +/* + * To set all udp_stat_t counters to 0. + */ +static void +udp_clr_stats(udp_stat_t *stats) +{ + stats->udp_sock_fallback.value.ui64 = 0; + stats->udp_out_opt.value.ui64 = 0; + stats->udp_out_err_notconn.value.ui64 = 0; + stats->udp_out_err_output.value.ui64 = 0; + stats->udp_out_err_tudr.value.ui64 = 0; +#ifdef DEBUG + stats->udp_data_conn.value.ui64 = 0; + stats->udp_data_notconn.value.ui64 = 0; + stats->udp_out_lastdst.value.ui64 = 0; + stats->udp_out_diffdst.value.ui64 = 0; + stats->udp_out_ipv6.value.ui64 = 0; + stats->udp_out_mapped.value.ui64 = 0; + stats->udp_out_ipv4.value.ui64 = 0; +#endif +} + +int +udp_kstat2_update(kstat_t *kp, int rw) +{ + udp_stat_t *stats; + netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + udp_stack_t *us; + int i; + int cnt; + + if (rw == KSTAT_WRITE) + return (EACCES); + + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + us = ns->netstack_udp; + if (us == NULL) { + netstack_rele(ns); + return (-1); + } + stats = (udp_stat_t *)kp->ks_data; + udp_clr_stats(stats); + + cnt = us->us_sc_cnt; + for (i = 0; i < cnt; i++) + udp_add_stats(&us->us_sc[i]->udp_sc_stats, stats); + + netstack_rele(ns); + return (0); +} + +void * +udp_kstat_init(netstackid_t stackid) +{ + kstat_t *ksp; + + udp_named_kstat_t template = { + { "inDatagrams", KSTAT_DATA_UINT64, 0 }, + { "inErrors", KSTAT_DATA_UINT32, 0 }, + { "outDatagrams", KSTAT_DATA_UINT64, 0 }, + { "entrySize", KSTAT_DATA_INT32, 0 }, + { "entry6Size", KSTAT_DATA_INT32, 0 }, + { "outErrors", KSTAT_DATA_UINT32, 0 }, + }; + + ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", + KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); + + if (ksp == NULL) + return (NULL); + + template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); + template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); + + bcopy(&template, ksp->ks_data, sizeof (template)); + ksp->ks_update = udp_kstat_update; + ksp->ks_private = (void *)(uintptr_t)stackid; + + kstat_install(ksp); + return (ksp); +} + +/* + * To sum up all MIB2 stats for a udp_stack_t from all per CPU stats. The + * caller should initialize the target mib2_udp_t properly as this function + * just adds up all the per CPU stats. + */ +static void +udp_sum_mib(udp_stack_t *us, mib2_udp_t *udp_mib) +{ + int i; + int cnt; + + cnt = us->us_sc_cnt; + for (i = 0; i < cnt; i++) + udp_add_mib(&us->us_sc[i]->udp_sc_mib, udp_mib); +} + +static int +udp_kstat_update(kstat_t *kp, int rw) +{ + udp_named_kstat_t *udpkp; + netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; + netstack_t *ns; + udp_stack_t *us; + mib2_udp_t udp_mib; + + if (rw == KSTAT_WRITE) + return (EACCES); + + ns = netstack_find_by_stackid(stackid); + if (ns == NULL) + return (-1); + us = ns->netstack_udp; + if (us == NULL) { + netstack_rele(ns); + return (-1); + } + udpkp = (udp_named_kstat_t *)kp->ks_data; + + bzero(&udp_mib, sizeof (udp_mib)); + udp_sum_mib(us, &udp_mib); + + udpkp->inDatagrams.value.ui64 = udp_mib.udpHCInDatagrams; + udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; + udpkp->outDatagrams.value.ui64 = udp_mib.udpHCOutDatagrams; + udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; + netstack_rele(ns); + return (0); +}
--- a/usr/src/uts/common/inet/udp_impl.h Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/common/inet/udp_impl.h Mon Jul 19 17:27:45 2010 -0700 @@ -86,8 +86,34 @@ kstat_named_t udp_out_mapped; kstat_named_t udp_out_ipv4; #endif +} udp_stat_t; -} udp_stat_t; +/* + * This struct contains only the counter part of udp_stat_t. It is used + * in udp_stats_cpu_t instead of udp_stat_t to save memory space. + */ +typedef struct { + uint64_t udp_sock_fallback; + uint64_t udp_out_opt; + uint64_t udp_out_err_notconn; + uint64_t udp_out_err_output; + uint64_t udp_out_err_tudr; +#ifdef DEBUG + uint64_t udp_data_conn; + uint64_t udp_data_notconn; + uint64_t udp_out_lastdst; + uint64_t udp_out_diffdst; + uint64_t udp_out_ipv6; + uint64_t udp_out_mapped; + uint64_t udp_out_ipv4; +#endif +} udp_stat_counter_t; + +/* Per CPU stats: UDP MIB2 and UDP kstat. */ +typedef struct { + mib2_udp_t udp_sc_mib; + udp_stat_counter_t udp_sc_stats; +} udp_stats_cpu_t; #define UDP_NUM_EPRIV_PORTS 64 @@ -118,9 +144,6 @@ kstat_t *us_mibkp; /* kstats exporting mib data */ kstat_t *us_kstat; - udp_stat_t us_statistics; - - mib2_udp_t us_udp_mib; /* SNMP fixed size info */ /* * The smallest anonymous port in the priviledged port range which UDP @@ -129,6 +152,9 @@ in_port_t us_min_anonpriv_port; ldi_ident_t us_ldi_ident; + + udp_stats_cpu_t **us_sc; + int us_sc_cnt; }; typedef struct udp_stack udp_stack_t; @@ -194,9 +220,12 @@ #define us_pmtu_discovery us_propinfo_tbl[11].prop_cur_bval #define us_sendto_ignerr us_propinfo_tbl[12].prop_cur_bval -#define UDP_STAT(us, x) ((us)->us_statistics.x.value.ui64++) +#define UDPS_BUMP_MIB(us, x) \ + BUMP_MIB(&(us)->us_sc[CPU->cpu_seqid]->udp_sc_mib, x) + +#define UDP_STAT(us, x) ((us)->us_sc[CPU->cpu_seqid]->udp_sc_stats.x++) #define UDP_STAT_UPDATE(us, x, n) \ - ((us)->us_statistics.x.value.ui64 += (n)) + ((us)->us->sc[CPU->cpu_seqid]->udp_sc_stats.x.value.ui64 += (n)) #ifdef DEBUG #define UDP_DBGSTAT(us, x) UDP_STAT(us, x) #else @@ -215,6 +244,13 @@ socklen_t addrlen); extern void udp_wput(queue_t *, mblk_t *); +extern void *udp_kstat_init(netstackid_t stackid); +extern void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); +extern void *udp_kstat2_init(netstackid_t stackid); +extern void udp_kstat2_fini(netstackid_t, kstat_t *); + +extern void udp_stack_cpu_add(udp_stack_t *, processorid_t); + /* * Object to represent database of options to search passed to * {sock,tpi}optcom_req() interface routine to take care of option
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64 Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 Mon Jul 19 17:27:45 2010 -0700 @@ -215,14 +215,18 @@ sctp_asconf_dispatch_tbl sctp_conn_cache sctp_conn_hash_size +sctp_do_reclaim sctp_kmem_faddr_cache sctp_kmem_ftsn_set_cache sctp_kmem_set_cache +sctp_min_assoc_listener sctp_opt_arr sctp_opt_arr_size +sctp_pa_early_abort +sctp_pp_early_abort sctp_propinfo_tbl sctp_propinfo_count -sctp_recvq_tq_task_max +sctp_recvq_tq_list_max sctp_recvq_tq_task_min sctp_recvq_tq_thr_max sctp_recvq_tq_thr_min
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64 Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 Mon Jul 19 17:27:45 2010 -0700 @@ -213,14 +213,18 @@ sctp_asconf_dispatch_tbl sctp_conn_cache sctp_conn_hash_size +sctp_do_reclaim sctp_kmem_faddr_cache sctp_kmem_ftsn_set_cache sctp_kmem_set_cache +sctp_min_assoc_listener sctp_opt_arr sctp_opt_arr_size +sctp_pa_early_abort +sctp_pp_early_abort sctp_propinfo_tbl sctp_propinfo_count -sctp_recvq_tq_task_max +sctp_recvq_tq_list_max sctp_recvq_tq_task_min sctp_recvq_tq_thr_max sctp_recvq_tq_thr_min
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64 Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64 Mon Jul 19 17:27:45 2010 -0700 @@ -215,14 +215,18 @@ sctp_asconf_dispatch_tbl sctp_conn_cache sctp_conn_hash_size +sctp_do_reclaim sctp_kmem_faddr_cache sctp_kmem_ftsn_set_cache sctp_kmem_set_cache +sctp_min_assoc_listener sctp_opt_arr sctp_opt_arr_size +sctp_pa_early_abort +sctp_pp_early_abort sctp_propinfo_tbl sctp_propinfo_count -sctp_recvq_tq_task_max +sctp_recvq_tq_list_max sctp_recvq_tq_task_min sctp_recvq_tq_thr_max sctp_recvq_tq_thr_min
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64 Tue Jul 20 14:28:29 2010 +0800 +++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64 Mon Jul 19 17:27:45 2010 -0700 @@ -213,14 +213,18 @@ sctp_asconf_dispatch_tbl sctp_conn_cache sctp_conn_hash_size +sctp_do_reclaim sctp_kmem_faddr_cache sctp_kmem_ftsn_set_cache sctp_kmem_set_cache +sctp_min_assoc_listener sctp_opt_arr sctp_opt_arr_size +sctp_pa_early_abort +sctp_pp_early_abort sctp_propinfo_tbl sctp_propinfo_count -sctp_recvq_tq_task_max +sctp_recvq_tq_list_max sctp_recvq_tq_task_min sctp_recvq_tq_thr_max sctp_recvq_tq_thr_min