Mercurial > illumos > illumos-gate
changeset 13135:9efd3d43accd
PSARC/2010/325 Different MTU for unicast and multicast
6836162 Interface to report different multicast and unicast MTUs from network stack
author | Erik Nordmark <Erik.Nordmark@Sun.COM> |
---|---|
date | Mon, 16 Aug 2010 15:30:54 -0700 |
parents | 61fe7fb74c94 |
children | 8f28cf08bb11 |
files | usr/src/uts/common/inet/ip.h usr/src/uts/common/inet/ip/igmp.c usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip6.c usr/src/uts/common/inet/ip/ip6_if.c usr/src/uts/common/inet/ip/ip_dce.c usr/src/uts/common/inet/ip/ip_if.c usr/src/uts/common/inet/ip/ip_input.c usr/src/uts/common/inet/ip/ip_mroute.c usr/src/uts/common/inet/ip/ip_output.c usr/src/uts/common/inet/ip/ipmp.c usr/src/uts/common/io/dld/dld_proto.c usr/src/uts/common/io/dld/dld_str.c usr/src/uts/common/io/ib/clients/ibd/ibd.c usr/src/uts/common/io/mac/mac.c usr/src/uts/common/io/mac/mac_client.c usr/src/uts/common/io/mac/mac_provider.c usr/src/uts/common/sys/dlpi.h usr/src/uts/common/sys/mac.h usr/src/uts/common/sys/mac_impl.h usr/src/uts/common/sys/mac_provider.h |
diffstat | 21 files changed, 231 insertions(+), 69 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/inet/ip.h Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip.h Mon Aug 16 15:30:54 2010 -0700 @@ -1506,6 +1506,7 @@ * ig_cast_ill ipsq or ipmp_lock ipsq and ipmp_lock * ig_arpent ipsq ipsq * ig_mtu ipsq ipsq + * ig_mc_mtu ipsq ipsq */ typedef struct ipmp_illgrp_s { list_t ig_if; /* list of all interfaces */ @@ -1515,7 +1516,8 @@ struct ill_s *ig_ipmp_ill; /* backpointer to IPMP meta-interface */ struct ill_s *ig_cast_ill; /* nominated ill for multi/broadcast */ list_t ig_arpent; /* list of ARP entries */ - uint_t ig_mtu; /* ig_ipmp_ill->ill_max_mtu */ + uint_t ig_mtu; /* ig_ipmp_ill->ill_mtu */ + uint_t ig_mc_mtu; /* ig_ipmp_ill->ill_mc_mtu */ } ipmp_illgrp_t; /* @@ -1611,6 +1613,7 @@ uint_t ill_max_frag; /* Max IDU from DLPI. */ uint_t ill_current_frag; /* Current IDU from DLPI. */ uint_t ill_mtu; /* User-specified MTU; SIOCSLIFMTU */ + uint_t ill_mc_mtu; /* MTU for multi/broadcast */ uint_t ill_metric; /* BSD if metric, for compatibility. */ char *ill_name; /* Our name. */ uint_t ill_ipif_dup_count; /* Number of duplicate addresses. */ @@ -1905,6 +1908,7 @@ * ill_max_hops ipsq Not atomic * * ill_mtu ill_lock None + * ill_mc_mtu ill_lock None * * ill_user_mtu ipsq + ill_lock ill_lock * ill_reachable_time ipsq + ill_lock ill_lock @@ -3189,6 +3193,7 @@ extern mblk_t *ip_carve_mp(mblk_t **, ssize_t); extern mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); extern mblk_t *ip_dlnotify_alloc(uint_t, uint_t); +extern mblk_t *ip_dlnotify_alloc2(uint_t, uint_t, uint_t); extern char *ip_dot_addr(ipaddr_t, char *); extern const char *mac_colon_addr(const uint8_t *, size_t, char *, size_t); extern void ip_lwput(queue_t *, mblk_t *);
--- a/usr/src/uts/common/inet/ip/igmp.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/igmp.c Mon Aug 16 15:30:54 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -1861,7 +1860,7 @@ * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill. * The report will contain one group record * for each element of reclist. If this causes packet length to - * exceed ill->ill_mtu, multiple reports are sent. + * exceed ill->ill_mc_mtu, multiple reports are sent. * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), * and those buffers are freed here. */ @@ -1897,7 +1896,7 @@ for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { rsize = sizeof (grphdra_t) + (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); - if (size + rsize > ill->ill_mtu) { + if (size + rsize > ill->ill_mc_mtu) { if (rp == cur_reclist) { /* * If the first mrec we looked at is too big @@ -1908,7 +1907,7 @@ * other types). */ int srcspace, srcsperpkt; - srcspace = ill->ill_mtu - (size + + srcspace = ill->ill_mc_mtu - (size + sizeof (grphdra_t)); /* @@ -2498,7 +2497,7 @@ /* * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The * report will contain one multicast address record for each element of - * reclist. If this causes packet length to exceed ill->ill_mtu, + * reclist. If this causes packet length to exceed ill->ill_mc_mtu, * multiple reports are sent. reclist is assumed to be made up of * buffers allocated by mcast_bldmrec(), and those buffers are freed here. */ @@ -2542,7 +2541,7 @@ rp = rp->mrec_next, numrec++) { rsize = sizeof (mld2mar_t) + (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); - if (size + rsize > ill->ill_mtu) { + if (size + rsize > ill->ill_mc_mtu) { if (rp == cur_reclist) { /* * If the first mrec we looked at is too big @@ -2553,7 +2552,7 @@ * other types). */ int srcspace, srcsperpkt; - srcspace = ill->ill_mtu - + srcspace = ill->ill_mc_mtu - (size + sizeof (mld2mar_t)); /*
--- a/usr/src/uts/common/inet/ip/ip.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ip.c Mon Aug 16 15:30:54 2010 -0700 @@ -3352,7 +3352,7 @@ * If uinfo is set, then we fill in the best available information * we have for the destination. This is based on (in priority order) any * metrics and path MTU stored in a dce_t, route metrics, and finally the - * ill_mtu. + * ill_mtu/ill_mc_mtu. * * Tsol note: If we have a source route then dst_addr != firsthop. But we * always do the label check on dst_addr. @@ -3681,9 +3681,14 @@ uint_t ip_get_base_mtu(ill_t *ill, ire_t *ire) { - uint_t mtu = ill->ill_mtu; + uint_t mtu; uint_t iremtu = ire->ire_metrics.iulp_mtu; + if (ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST)) + mtu = ill->ill_mc_mtu; + else + mtu = ill->ill_mtu; + if (iremtu != 0 && iremtu < mtu) mtu = iremtu; @@ -3796,17 +3801,32 @@ * an ill. We'd use the above IP_MAXPACKET in that case just * to tell the transport something larger than zero. */ - if (nce->nce_common->ncec_ill->ill_mtu < pmtu) - pmtu = nce->nce_common->ncec_ill->ill_mtu; - if (nce->nce_common->ncec_ill != nce->nce_ill && - nce->nce_ill->ill_mtu < pmtu) { - /* - * for interfaces in an IPMP group, the mtu of - * the nce_ill (under_ill) could be different - * from the mtu of the ncec_ill, so we take the - * min of the two. - */ - pmtu = nce->nce_ill->ill_mtu; + if (ire->ire_type & (IRE_MULTICAST|IRE_BROADCAST)) { + if (nce->nce_common->ncec_ill->ill_mc_mtu < pmtu) + pmtu = nce->nce_common->ncec_ill->ill_mc_mtu; + if (nce->nce_common->ncec_ill != nce->nce_ill && + nce->nce_ill->ill_mc_mtu < pmtu) { + /* + * for interfaces in an IPMP group, the mtu of + * the nce_ill (under_ill) could be different + * from the mtu of the ncec_ill, so we take the + * min of the two. + */ + pmtu = nce->nce_ill->ill_mc_mtu; + } + } else { + if (nce->nce_common->ncec_ill->ill_mtu < pmtu) + pmtu = nce->nce_common->ncec_ill->ill_mtu; + if (nce->nce_common->ncec_ill != nce->nce_ill && + nce->nce_ill->ill_mtu < pmtu) { + /* + * for interfaces in an IPMP group, the mtu of + * the nce_ill (under_ill) could be different + * from the mtu of the ncec_ill, so we take the + * min of the two. + */ + pmtu = nce->nce_ill->ill_mtu; + } } } @@ -4684,6 +4704,22 @@ return (mp); } +mblk_t * +ip_dlnotify_alloc2(uint_t notification, uint_t data1, uint_t data2) +{ + dl_notify_ind_t *notifyp; + mblk_t *mp; + + if ((mp = ip_dlpi_alloc(DL_NOTIFY_IND_SIZE, DL_NOTIFY_IND)) == NULL) + return (NULL); + + notifyp = (dl_notify_ind_t *)mp->b_rptr; + notifyp->dl_notification = notification; + notifyp->dl_data1 = data1; + notifyp->dl_data2 = data2; + return (mp); +} + /* * Debug formatting routine. Returns a character string representation of the * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address @@ -8449,7 +8485,7 @@ case DL_NOTIFY_IND: { dl_notify_ind_t *notify = (dl_notify_ind_t *)mp->b_rptr; - uint_t orig_mtu; + uint_t orig_mtu, orig_mc_mtu; switch (notify->dl_notification) { case DL_NOTE_PHYS_ADDR: @@ -8470,6 +8506,7 @@ break; case DL_NOTE_SDU_SIZE: + case DL_NOTE_SDU_SIZE2: /* * The dce and fragmentation code can cope with * this changing while packets are being sent. @@ -8479,11 +8516,23 @@ * Change the MTU size of the interface. */ mutex_enter(&ill->ill_lock); - ill->ill_current_frag = (uint_t)notify->dl_data; + orig_mtu = ill->ill_mtu; + orig_mc_mtu = ill->ill_mc_mtu; + switch (notify->dl_notification) { + case DL_NOTE_SDU_SIZE: + ill->ill_current_frag = + (uint_t)notify->dl_data; + ill->ill_mc_mtu = (uint_t)notify->dl_data; + break; + case DL_NOTE_SDU_SIZE2: + ill->ill_current_frag = + (uint_t)notify->dl_data1; + ill->ill_mc_mtu = (uint_t)notify->dl_data2; + break; + } if (ill->ill_current_frag > ill->ill_max_frag) ill->ill_max_frag = ill->ill_current_frag; - orig_mtu = ill->ill_mtu; if (!(ill->ill_flags & ILLF_FIXEDMTU)) { ill->ill_mtu = ill->ill_current_frag; @@ -8495,20 +8544,32 @@ ill->ill_user_mtu < ill->ill_mtu) ill->ill_mtu = ill->ill_user_mtu; + if (ill->ill_user_mtu != 0 && + ill->ill_user_mtu < ill->ill_mc_mtu) + ill->ill_mc_mtu = ill->ill_user_mtu; + if (ill->ill_isv6) { if (ill->ill_mtu < IPV6_MIN_MTU) ill->ill_mtu = IPV6_MIN_MTU; + if (ill->ill_mc_mtu < IPV6_MIN_MTU) + ill->ill_mc_mtu = IPV6_MIN_MTU; } else { if (ill->ill_mtu < IP_MIN_MTU) ill->ill_mtu = IP_MIN_MTU; + if (ill->ill_mc_mtu < IP_MIN_MTU) + ill->ill_mc_mtu = IP_MIN_MTU; } - } + } else if (ill->ill_mc_mtu > ill->ill_mtu) { + ill->ill_mc_mtu = ill->ill_mtu; + } + mutex_exit(&ill->ill_lock); /* * Make sure all dce_generation checks find out - * that ill_mtu has changed. - */ - if (orig_mtu != ill->ill_mtu) { + * that ill_mtu/ill_mc_mtu has changed. + */ + if (orig_mtu != ill->ill_mtu || + orig_mc_mtu != ill->ill_mc_mtu) { dce_increment_all_generations(ill->ill_isv6, ill->ill_ipst); }
--- a/usr/src/uts/common/inet/ip/ip6.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ip6.c Mon Aug 16 15:30:54 2010 -0700 @@ -709,6 +709,8 @@ mutex_enter(&dce->dce_lock); if (dce->dce_flags & DCEF_PMTU) old_max_frag = dce->dce_pmtu; + else if (IN6_IS_ADDR_MULTICAST(&final_dst)) + old_max_frag = ill->ill_mc_mtu; else old_max_frag = ill->ill_mtu; @@ -1954,7 +1956,7 @@ * If uinfo is set, then we fill in the best available information * we have for the destination. This is based on (in priority order) any * metrics and path MTU stored in a dce_t, route metrics, and finally the - * ill_mtu. + * ill_mtu/ill_mc_mtu. * * Tsol note: If we have a source route then dst_addr != firsthop. But we * always do the label check on dst_addr.
--- a/usr/src/uts/common/inet/ip/ip6_if.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ip6_if.c Mon Aug 16 15:30:54 2010 -0700 @@ -2299,7 +2299,7 @@ (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH | DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG | DL_NOTE_PROMISC_ON_PHYS | DL_NOTE_PROMISC_OFF_PHYS | - DL_NOTE_REPLUMB | DL_NOTE_ALLOWED_IPS); + DL_NOTE_REPLUMB | DL_NOTE_ALLOWED_IPS | DL_NOTE_SDU_SIZE2); phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) + sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
--- a/usr/src/uts/common/inet/ip/ip_dce.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ip_dce.c Mon Aug 16 15:30:54 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -664,7 +663,7 @@ /* * Increment the generation number on all dces that have a path MTU and - * the default DCE. Used when ill_mtu changes. + * the default DCE. Used when ill_mtu or ill_mc_mtu changes. */ void dce_increment_all_generations(boolean_t isv6, ip_stack_t *ipst)
--- a/usr/src/uts/common/inet/ip/ip_if.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ip_if.c Mon Aug 16 15:30:54 2010 -0700 @@ -3717,6 +3717,7 @@ goto done; ill->ill_current_frag = ill->ill_max_frag; ill->ill_mtu = ill->ill_max_frag; /* Initial value */ + ill->ill_mc_mtu = ill->ill_mtu; /* * ipif_loopback_name can't be pointed at directly because its used * by both the ipv4 and ipv6 interfaces. When the ill is removed @@ -4189,6 +4190,7 @@ ill->ill_max_frag = MAX(min_mtu, dlia->dl_max_sdu); ill->ill_current_frag = ill->ill_max_frag; ill->ill_mtu = ill->ill_max_frag; + ill->ill_mc_mtu = ill->ill_mtu; /* Overridden by DL_NOTE_SDU_SIZE2 */ ill->ill_type = ipm->ip_m_type; @@ -10816,6 +10818,10 @@ mutex_exit(&ill->ill_lock); return (EINVAL); } + /* Avoid increasing ill_mc_mtu */ + if (ill->ill_mc_mtu > mtu) + ill->ill_mc_mtu = mtu; + /* * The dce and fragmentation code can handle changes to ill_mtu * concurrent with sending/fragmenting packets. @@ -10826,7 +10832,7 @@ /* * Make sure all dce_generation checks find out - * that ill_mtu has changed. + * that ill_mtu/ill_mc_mtu has changed. */ dce_increment_all_generations(ill->ill_isv6, ill->ill_ipst); @@ -11584,12 +11590,13 @@ * here. */ ill->ill_mtu = MIN(ill->ill_current_frag, ill->ill_user_mtu); + ill->ill_mc_mtu = MIN(ill->ill_mc_mtu, ill->ill_user_mtu); } mutex_exit(&ill->ill_lock); /* * Make sure all dce_generation checks find out - * that ill_mtu has changed. + * that ill_mtu/ill_mc_mtu has changed. */ if (!(ill->ill_flags & ILLF_FIXEDMTU) && (lir->lir_maxmtu != 0)) dce_increment_all_generations(ill->ill_isv6, ill->ill_ipst);
--- a/usr/src/uts/common/inet/ip/ip_input.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ip_input.c Mon Aug 16 15:30:54 2010 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -1475,7 +1475,7 @@ goto done; } - ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mtu, 0); + ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mc_mtu, 0); nce_refrele(nce); done: /* Restore */
--- a/usr/src/uts/common/inet/ip/ip_mroute.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ip_mroute.c Mon Aug 16 15:30:54 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -3185,7 +3184,8 @@ * statistics for input errors will be increased on the wrong * ill but that isn't a big deal. */ - ip_forward_xmit_v4(nce, ill, mp, ipha, &iras, ill->ill_mtu, 0); + ip_forward_xmit_v4(nce, ill, mp, ipha, &iras, ill->ill_mc_mtu, + 0); ASSERT(!(iras.ira_flags & IRAF_IPSEC_SECURE)); nce_refrele(nce);
--- a/usr/src/uts/common/inet/ip/ip_output.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ip_output.c Mon Aug 16 15:30:54 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -328,7 +327,8 @@ * An initial ixa_fragsize was set in ip_set_destination * and we update it if any routing changes above. * A change to ill_mtu with ifconfig will increase all dce_generation - * so that we will detect that with the generation check. + * so that we will detect that with the generation check. Ditto for + * ill_mc_mtu. */ /*
--- a/usr/src/uts/common/inet/ip/ipmp.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/inet/ip/ipmp.c Mon Aug 16 15:30:54 2010 -0700 @@ -79,7 +79,7 @@ static ill_t *ipmp_illgrp_min_ill(ipmp_illgrp_t *); static ill_t *ipmp_illgrp_max_ill(ipmp_illgrp_t *); static void ipmp_illgrp_set_cast(ipmp_illgrp_t *, ill_t *); -static void ipmp_illgrp_set_mtu(ipmp_illgrp_t *, uint_t); +static void ipmp_illgrp_set_mtu(ipmp_illgrp_t *, uint_t, uint_t); static boolean_t ipmp_ill_activate(ill_t *); static void ipmp_ill_deactivate(ill_t *); static void ipmp_ill_ire_mark_testhidden(ire_t *, char *); @@ -556,7 +556,7 @@ illg->ig_ipmp_ill = ill; ill->ill_grp = illg; - ipmp_illgrp_set_mtu(illg, mtu); + ipmp_illgrp_set_mtu(illg, mtu, mtu); return (illg); } @@ -995,7 +995,7 @@ * Caller must be inside the IPSQ unless this is initialization. */ static void -ipmp_illgrp_set_mtu(ipmp_illgrp_t *illg, uint_t mtu) +ipmp_illgrp_set_mtu(ipmp_illgrp_t *illg, uint_t mtu, uint_t mc_mtu) { ill_t *ill = illg->ig_ipmp_ill; mblk_t *mp; @@ -1005,8 +1005,9 @@ /* * If allocation fails, we have bigger problems than MTU. */ - if ((mp = ip_dlnotify_alloc(DL_NOTE_SDU_SIZE, mtu)) != NULL) { + if ((mp = ip_dlnotify_alloc2(DL_NOTE_SDU_SIZE2, mtu, mc_mtu)) != NULL) { illg->ig_mtu = mtu; + illg->ig_mc_mtu = mc_mtu; put(ill->ill_rq, mp); } } @@ -1021,6 +1022,7 @@ ill_t *ill; ill_t *ipmp_ill = illg->ig_ipmp_ill; uint_t mtu = 0; + uint_t mc_mtu = 0; ASSERT(IAM_WRITER_ILL(ipmp_ill)); @@ -1035,6 +1037,8 @@ mutex_enter(&ill->ill_lock); if (mtu == 0 || ill->ill_mtu < mtu) mtu = ill->ill_mtu; + if (mc_mtu == 0 || ill->ill_mc_mtu < mc_mtu) + mc_mtu = ill->ill_mc_mtu; mutex_exit(&ill->ill_lock); } @@ -1042,9 +1046,9 @@ * MTU must be at least the minimum MTU. */ mtu = MAX(mtu, ipmp_ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU); - - if (illg->ig_mtu != mtu) - ipmp_illgrp_set_mtu(illg, mtu); + mc_mtu = MAX(mc_mtu, ipmp_ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU); + if (illg->ig_mtu != mtu || illg->ig_mc_mtu != mc_mtu) + ipmp_illgrp_set_mtu(illg, mtu, mc_mtu); } /* @@ -1174,7 +1178,7 @@ ipmp_ill->ill_flags |= ILLF_COS_ENABLED; mutex_exit(&ipmp_ill->ill_lock); } - ipmp_illgrp_set_mtu(illg, ill->ill_mtu); + ipmp_illgrp_set_mtu(illg, ill->ill_mtu, ill->ill_mc_mtu); } else { ASSERT(ipmp_ill->ill_phys_addr_length == ill->ill_phys_addr_length); @@ -1185,8 +1189,11 @@ ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED; mutex_exit(&ipmp_ill->ill_lock); } - if (illg->ig_mtu > ill->ill_mtu) - ipmp_illgrp_set_mtu(illg, ill->ill_mtu); + if (illg->ig_mtu > ill->ill_mtu || + illg->ig_mc_mtu > ill->ill_mc_mtu) { + ipmp_illgrp_set_mtu(illg, ill->ill_mtu, + ill->ill_mc_mtu); + } } rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
--- a/usr/src/uts/common/io/dld/dld_proto.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/io/dld/dld_proto.c Mon Aug 16 15:30:54 2010 -0700 @@ -1128,6 +1128,7 @@ DL_NOTE_FASTPATH_FLUSH | DL_NOTE_SPEED | DL_NOTE_SDU_SIZE| + DL_NOTE_SDU_SIZE2| DL_NOTE_ALLOWED_IPS; if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
--- a/usr/src/uts/common/io/dld/dld_str.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/io/dld/dld_str.c Mon Aug 16 15:30:54 2010 -0700 @@ -1394,12 +1394,12 @@ * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE */ static void -str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu) +str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu, uint_t multicast_sdu) { mblk_t *mp; dl_notify_ind_t *dlip; - if (!(dsp->ds_notifications & DL_NOTE_SDU_SIZE)) + if (!(dsp->ds_notifications & (DL_NOTE_SDU_SIZE|DL_NOTE_SDU_SIZE2))) return; if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), @@ -1409,8 +1409,14 @@ bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); dlip = (dl_notify_ind_t *)mp->b_rptr; dlip->dl_primitive = DL_NOTIFY_IND; - dlip->dl_notification = DL_NOTE_SDU_SIZE; - dlip->dl_data = max_sdu; + if (dsp->ds_notifications & DL_NOTE_SDU_SIZE2) { + dlip->dl_notification = DL_NOTE_SDU_SIZE2; + dlip->dl_data1 = max_sdu; + dlip->dl_data2 = multicast_sdu; + } else { + dlip->dl_notification = DL_NOTE_SDU_SIZE; + dlip->dl_data = max_sdu; + } qreply(dsp->ds_wq, mp); } @@ -1865,8 +1871,9 @@ case MAC_NOTE_SDU_SIZE: { uint_t max_sdu; - mac_sdu_get(dsp->ds_mh, NULL, &max_sdu); - str_notify_sdu_size(dsp, max_sdu); + uint_t multicast_sdu; + mac_sdu_get2(dsp->ds_mh, NULL, &max_sdu, &multicast_sdu); + str_notify_sdu_size(dsp, max_sdu, multicast_sdu); break; }
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c Mon Aug 16 15:30:54 2010 -0700 @@ -155,7 +155,7 @@ * Changing the linkmode requires some bookkeeping in the driver. The * capabilities need to be re-reported to the mac layer. This is done by * calling mac_capab_update(). The maxsdu is updated by calling - * mac_maxsdu_update(). + * mac_maxsdu_update2(). * The private properties retain their values across the change of linkmode. * NOTE: * - The port driver does not support any property apart from mtu. @@ -2392,6 +2392,7 @@ macp->m_src_addr = (uint8_t *)&state->id_macaddr; macp->m_callbacks = &ibd_m_callbacks; macp->m_min_sdu = 0; + macp->m_multicast_sdu = IBD_DEF_MAX_SDU; if (state->id_type == IBD_PORT_DRIVER) { macp->m_max_sdu = IBD_DEF_RC_MAX_SDU; } else if (state->id_enable_rc) { @@ -4592,14 +4593,16 @@ } state->id_enable_rc = 1; /* inform MAC framework of new MTU */ - err = mac_maxsdu_update(state->id_mh, - state->rc_mtu - IPOIB_HDRSIZE); + err = mac_maxsdu_update2(state->id_mh, + state->rc_mtu - IPOIB_HDRSIZE, + state->id_mtu - IPOIB_HDRSIZE); } else { if (!state->id_enable_rc) { return (0); } state->id_enable_rc = 0; - err = mac_maxsdu_update(state->id_mh, + err = mac_maxsdu_update2(state->id_mh, + state->id_mtu - IPOIB_HDRSIZE, state->id_mtu - IPOIB_HDRSIZE); } (void) ibd_record_capab(state); @@ -6007,8 +6010,9 @@ state->id_mgid.gid_prefix, state->id_mgid.gid_guid); if (!state->id_enable_rc) { - (void) mac_maxsdu_update(state->id_mh, state->id_mtu - - IPOIB_HDRSIZE); + (void) mac_maxsdu_update2(state->id_mh, + state->id_mtu - IPOIB_HDRSIZE, + state->id_mtu - IPOIB_HDRSIZE); } mac_unicst_update(state->id_mh, (uint8_t *)&state->id_macaddr); @@ -8272,6 +8276,7 @@ macp->m_src_addr = (uint8_t *)&state->id_macaddr; macp->m_callbacks = &ibd_m_callbacks; macp->m_min_sdu = 0; + macp->m_multicast_sdu = IBD_DEF_MAX_SDU; if (state->id_enable_rc) { macp->m_max_sdu = IBD_DEF_RC_MAX_SDU; } else {
--- a/usr/src/uts/common/io/mac/mac.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/io/mac/mac.c Mon Aug 16 15:30:54 2010 -0700 @@ -3140,7 +3140,7 @@ uint32_t sdu; ASSERT(valsize >= sizeof (uint32_t)); - mac_sdu_get(mh, NULL, &sdu); + mac_sdu_get2(mh, NULL, &sdu, NULL); bcopy(&sdu, val, sizeof (sdu)); return (0); @@ -3398,7 +3398,7 @@ case MAC_PROP_MTU: { uint32_t sdu; - mac_sdu_get(mh, NULL, &sdu); + mac_sdu_get2(mh, NULL, &sdu, NULL); if (range != NULL && !(state.pr_flags & MAC_PROP_INFO_RANGE)) {
--- a/usr/src/uts/common/io/mac/mac_client.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/io/mac/mac_client.c Mon Aug 16 15:30:54 2010 -0700 @@ -783,6 +783,20 @@ *max_sdu = mip->mi_sdu_max; } +void +mac_sdu_get2(mac_handle_t mh, uint_t *min_sdu, uint_t *max_sdu, + uint_t *multicast_sdu) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + if (min_sdu != NULL) + *min_sdu = mip->mi_sdu_min; + if (max_sdu != NULL) + *max_sdu = mip->mi_sdu_max; + if (multicast_sdu != NULL) + *multicast_sdu = mip->mi_sdu_multicast; +} + /* * Update the MAC unicast address of the specified client's flows. Currently * only one unicast MAC unicast address is allowed per client.
--- a/usr/src/uts/common/io/mac/mac_provider.c Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/io/mac/mac_provider.c Mon Aug 16 15:30:54 2010 -0700 @@ -212,8 +212,14 @@ mip->mi_info.mi_nativemedia = mtype->mt_nativetype; if (mregp->m_max_sdu <= mregp->m_min_sdu) goto fail; + if (mregp->m_multicast_sdu == 0) + mregp->m_multicast_sdu = mregp->m_max_sdu; + if (mregp->m_multicast_sdu < mregp->m_min_sdu || + mregp->m_multicast_sdu > mregp->m_max_sdu) + goto fail; mip->mi_sdu_min = mregp->m_min_sdu; mip->mi_sdu_max = mregp->m_max_sdu; + mip->mi_sdu_multicast = mregp->m_multicast_sdu; mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length; /* * If the media supports a broadcast address, cache a pointer to it @@ -934,6 +940,13 @@ i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG); } +/* + * Used by normal drivers to update the max sdu size. + * We need to handle the case of a smaller mi_sdu_multicast + * since this is called by mac_set_mtu() even for drivers that + * have differing unicast and multicast mtu and we don't want to + * increase the multicast mtu by accident in that case. + */ int mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max) { @@ -942,6 +955,31 @@ if (sdu_max == 0 || sdu_max < mip->mi_sdu_min) return (EINVAL); mip->mi_sdu_max = sdu_max; + if (mip->mi_sdu_multicast > mip->mi_sdu_max) + mip->mi_sdu_multicast = mip->mi_sdu_max; + + /* Send a MAC_NOTE_SDU_SIZE notification. */ + i_mac_notify(mip, MAC_NOTE_SDU_SIZE); + return (0); +} + +/* + * Version of the above function that is used by drivers that have a different + * max sdu size for multicast/broadcast vs. unicast. + */ +int +mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast) +{ + mac_impl_t *mip = (mac_impl_t *)mh; + + if (sdu_max == 0 || sdu_max < mip->mi_sdu_min) + return (EINVAL); + if (sdu_multicast == 0) + sdu_multicast = sdu_max; + if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min) + return (EINVAL); + mip->mi_sdu_max = sdu_max; + mip->mi_sdu_multicast = sdu_multicast; /* Send a MAC_NOTE_SDU_SIZE notification. */ i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
--- a/usr/src/uts/common/sys/dlpi.h Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/sys/dlpi.h Mon Aug 16 15:30:54 2010 -0700 @@ -407,6 +407,7 @@ #define DL_NOTE_CAPAB_RENEG 0x0400 /* Initiate capability renegotiation */ #define DL_NOTE_REPLUMB 0x0800 /* Inform the link to replumb */ #define DL_NOTE_ALLOWED_IPS 0x1000 /* "allowed-ips" notification */ +#define DL_NOTE_SDU_SIZE2 0x2000 /* New unicast and multicast size */ /* * DLPI notification codes for DL_NOTIFY_CONF primitives. @@ -991,7 +992,13 @@ typedef struct { t_uscalar_t dl_primitive; /* set to DL_NOTIFY_IND */ uint32_t dl_notification; /* Which notification? */ - uint32_t dl_data; /* notification specific */ + union { + uint32_t dlu_data32; /* notification specific */ + uint16_t dlu_data16[2]; /* For DL_NOTE_SDU_SIZE2 */ + } dl_dlu; +#define dl_data dl_dlu.dlu_data32 +#define dl_data1 dl_dlu.dlu_data16[0] /* Unicast MTU */ +#define dl_data2 dl_dlu.dlu_data16[1] /* Multicast MTU */ t_uscalar_t dl_addr_length; /* length of complete DLSAP addr */ t_uscalar_t dl_addr_offset; /* offset from start of M_PROTO */ } dl_notify_ind_t;
--- a/usr/src/uts/common/sys/mac.h Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/sys/mac.h Mon Aug 16 15:30:54 2010 -0700 @@ -584,7 +584,11 @@ extern minor_t mac_minor_hold(boolean_t); extern void mac_minor_rele(minor_t); extern void mac_sdu_get(mac_handle_t, uint_t *, uint_t *); +extern void mac_sdu_get2(mac_handle_t, uint_t *, uint_t *, + uint_t *); extern int mac_maxsdu_update(mac_handle_t, uint_t); +extern int mac_maxsdu_update2(mac_handle_t, uint_t, + uint_t); extern uint_t mac_addr_len(mac_handle_t); extern int mac_type(mac_handle_t); extern int mac_nativetype(mac_handle_t);
--- a/usr/src/uts/common/sys/mac_impl.h Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/sys/mac_impl.h Mon Aug 16 15:30:54 2010 -0700 @@ -494,6 +494,7 @@ uint32_t mi_margin; /* mi_rw_lock */ uint_t mi_sdu_min; /* mi_rw_lock */ uint_t mi_sdu_max; /* mi_rw_lock */ + uint_t mi_sdu_multicast; /* mi_rw_lock */ /* * Cache of factory MAC addresses provided by the driver. If
--- a/usr/src/uts/common/sys/mac_provider.h Mon Aug 16 15:11:00 2010 -0700 +++ b/usr/src/uts/common/sys/mac_provider.h Mon Aug 16 15:30:54 2010 -0700 @@ -450,6 +450,7 @@ char **m_priv_props; uint32_t m_margin; uint32_t m_v12n; /* Virtualization level */ + uint_t m_multicast_sdu; } mac_register_t; /* @@ -457,7 +458,11 @@ */ extern mac_protect_t *mac_protect_get(mac_handle_t); extern void mac_sdu_get(mac_handle_t, uint_t *, uint_t *); +extern void mac_sdu_get2(mac_handle_t, uint_t *, uint_t *, + uint_t *); extern int mac_maxsdu_update(mac_handle_t, uint_t); +extern int mac_maxsdu_update2(mac_handle_t, uint_t, + uint_t); extern mac_register_t *mac_alloc(uint_t); extern void mac_free(mac_register_t *);