Mercurial > illumos > illumos-gate
changeset 4041:530c0817b983
PSARC 2007/198 IP Filter ipmp_hook_emulation
6535824 pfhooks and patch for IPMP+IP FIlter doesn't work together
author | nordmark |
---|---|
date | Mon, 16 Apr 2007 11:46:20 -0700 |
parents | ab47869b3932 |
children | ed569729e015 |
files | usr/src/uts/common/inet/ip.h usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip_ftable.c usr/src/uts/common/inet/ip/ip_if.c usr/src/uts/common/inet/ip/ip_netinfo.c usr/src/uts/common/inet/ip_if.h usr/src/uts/common/inet/ip_stack.h |
diffstat | 7 files changed, 531 insertions(+), 122 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/inet/ip.h Mon Apr 16 11:16:17 2007 -0700 +++ b/usr/src/uts/common/inet/ip.h Mon Apr 16 11:46:20 2007 -0700 @@ -1521,6 +1521,9 @@ kmutex_t phyint_lock; struct ipsq_s *phyint_ipsq; /* back pointer to ipsq */ struct phyint *phyint_ipsq_next; /* phyint list on this ipsq */ + /* Once Clearview IPMP is added the follow two fields can be removed */ + uint_t phyint_group_ifindex; /* index assigned to group */ + uint_t phyint_hook_ifindex; /* index used with neti/hook */ } phyint_t; #define CACHE_ALIGN_SIZE 64 @@ -3008,7 +3011,7 @@ if ((_ilp != NULL) && \ (((ill_t *)(_ilp))->ill_phyint != NULL)) \ info.hpe_ifp = (phy_if_t)((ill_t *) \ - (_ilp))->ill_phyint->phyint_ifindex; \ + (_ilp))->ill_phyint->phyint_hook_ifindex; \ else \ info.hpe_ifp = 0; \ \ @@ -3016,7 +3019,7 @@ if ((_olp != NULL) && \ (((ill_t *)(_olp))->ill_phyint != NULL)) \ info.hpe_ofp = (phy_if_t)((ill_t *) \ - (_olp))->ill_phyint->phyint_ifindex; \ + (_olp))->ill_phyint->phyint_hook_ifindex; \ else \ info.hpe_ofp = 0; \ info.hpe_hdr = _iph; \ @@ -3050,7 +3053,7 @@ if ((_ilp != NULL) && \ (((ill_t *)(_ilp))->ill_phyint != NULL)) \ info.hpe_ifp = (phy_if_t)((ill_t *) \ - (_ilp))->ill_phyint->phyint_ifindex; \ + (_ilp))->ill_phyint->phyint_hook_ifindex; \ else \ info.hpe_ifp = 0; \ \ @@ -3058,7 +3061,7 @@ if ((_olp != NULL) && \ (((ill_t *)(_olp))->ill_phyint != NULL)) \ info.hpe_ofp = (phy_if_t)((ill_t *) \ - (_olp))->ill_phyint->phyint_ifindex; \ + (_olp))->ill_phyint->phyint_hook_ifindex; \ else \ info.hpe_ofp = 0; \ info.hpe_hdr = _iph; \ @@ -3137,12 +3140,15 @@ struct mac_header_info_s; +extern boolean_t ip_assign_ifindex(uint_t *, ip_stack_t *); extern const char *dlpi_prim_str(int); extern const char *dlpi_err_str(int); extern void ill_frag_timer(void *); extern ill_t *ill_first(int, int, ill_walk_context_t *, ip_stack_t *); extern ill_t *ill_next(ill_walk_context_t *, ill_t *); extern void ill_frag_timer_start(ill_t *); +extern void ill_nic_info_dispatch(ill_t *); +extern void ill_nic_info_plumb(ill_t *, boolean_t); extern mblk_t *ip_carve_mp(mblk_t **, ssize_t); extern mblk_t *ip_dlpi_alloc(size_t, t_uscalar_t); extern char *ip_dot_addr(ipaddr_t, char *); @@ -3304,6 +3310,9 @@ uint_t); extern mblk_t *ip_unbind(queue_t *, mblk_t *); +extern phyint_t *phyint_lookup_group(char *, boolean_t, ip_stack_t *); +extern phyint_t *phyint_lookup_group_ifindex(uint_t, ip_stack_t *); + extern void tnet_init(void); extern void tnet_fini(void);
--- a/usr/src/uts/common/inet/ip/ip.c Mon Apr 16 11:16:17 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip.c Mon Apr 16 11:46:20 2007 -0700 @@ -751,6 +751,8 @@ caddr_t cp, cred_t *cr); static int ip_int_set(queue_t *, mblk_t *, char *, caddr_t, cred_t *); +static int ipmp_hook_emulation_set(queue_t *, mblk_t *, char *, caddr_t, + cred_t *); static squeue_func_t ip_squeue_switch(int); static void *ip_kstat_init(netstackid_t, ip_stack_t *); @@ -934,6 +936,9 @@ "ip_cgtp_filter" }, { ip_param_generic_get, ip_int_set, (caddr_t)&ip_soft_rings_cnt, "ip_soft_rings_cnt" }, +#define IPNDP_IPMP_HOOK_OFFSET 18 + { ip_param_generic_get, ipmp_hook_emulation_set, NULL, + "ipmp_hook_emulation" }, }; /* @@ -5361,7 +5366,6 @@ ipsq_t *ipsq; ipif_t *ipif; queue_t *q = ill->ill_rq; - hook_nic_event_t *info; ip_stack_t *ipst = ill->ill_ipst; clock_t timeout; @@ -5496,21 +5500,9 @@ if (ill->ill_credp != NULL) crfree(ill->ill_credp); - /* - * Unhook the nic event message from the ill and enqueue it into the nic - * event taskq. - */ - if ((info = ill->ill_nic_event_info) != NULL) { - if (ddi_taskq_dispatch(eventq_queue_nic, - ip_ne_queue_func, - (void *)info, DDI_SLEEP) == DDI_FAILURE) { - ip2dbg(("ip_ioctl_finish:ddi_taskq_dispatch failed\n")); - if (info->hne_data != NULL) - kmem_free(info->hne_data, info->hne_datalen); - kmem_free(info, sizeof (hook_nic_event_t)); - } - ill->ill_nic_event_info = NULL; - } + mutex_enter(&ill->ill_lock); + ill_nic_info_dispatch(ill); + mutex_exit(&ill->ill_lock); /* * Now we are done with the module close pieces that @@ -6046,6 +6038,10 @@ "ip_cgtp_filter") == 0); ipst->ips_ndp_arr[IPNDP_CGTP_FILTER_OFFSET].ip_ndp_data = (caddr_t)&ip_cgtp_filter; + ASSERT(strcmp(ipst->ips_ndp_arr[IPNDP_IPMP_HOOK_OFFSET].ip_ndp_name, + "ipmp_hook_emulation") == 0); + ipst->ips_ndp_arr[IPNDP_IPMP_HOOK_OFFSET].ip_ndp_data = + (caddr_t)&ipst->ips_ipmp_hook_emulation; (void) ip_param_register(&ipst->ips_ip_g_nd, ipst->ips_param_arr, A_CNT(lcl_param_arr), @@ -15992,7 +15988,7 @@ info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP); if (info != NULL) { - info->hne_nic = ill->ill_phyint->phyint_ifindex; + info->hne_nic = ill->ill_phyint->phyint_hook_ifindex; info->hne_lif = 0; info->hne_event = NE_UP; info->hne_data = NULL; @@ -29427,6 +29423,128 @@ return (0); } +/* + * Handle changes to ipmp_hook_emulation ndd variable. + * Need to update phyint_hook_ifindex. + * Also generate a nic plumb event should a new ifidex be assigned to a group. + */ +static void +ipmp_hook_emulation_changed(ip_stack_t *ipst) +{ + phyint_t *phyi; + phyint_t *phyi_tmp; + char *groupname; + int namelen; + ill_t *ill; + boolean_t new_group; + + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + /* + * Group indicies are stored in the phyint - a common structure + * to both IPv4 and IPv6. + */ + phyi = avl_first(&ipst->ips_phyint_g_list->phyint_list_avl_by_index); + for (; phyi != NULL; + phyi = avl_walk(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, + phyi, AVL_AFTER)) { + /* Ignore the ones that do not have a group */ + if (phyi->phyint_groupname_len == 0) + continue; + + /* + * Look for other phyint in group. + * Clear name/namelen so the lookup doesn't find ourselves. + */ + namelen = phyi->phyint_groupname_len; + groupname = phyi->phyint_groupname; + phyi->phyint_groupname_len = 0; + phyi->phyint_groupname = NULL; + + phyi_tmp = phyint_lookup_group(groupname, B_FALSE, ipst); + /* Restore */ + phyi->phyint_groupname_len = namelen; + phyi->phyint_groupname = groupname; + + new_group = B_FALSE; + if (ipst->ips_ipmp_hook_emulation) { + /* + * If the group already exists and has already + * been assigned a group ifindex, we use the existing + * group_ifindex, otherwise we pick a new group_ifindex + * here. + */ + if (phyi_tmp != NULL && + phyi_tmp->phyint_group_ifindex != 0) { + phyi->phyint_group_ifindex = + phyi_tmp->phyint_group_ifindex; + } else { + /* XXX We need a recovery strategy here. */ + if (!ip_assign_ifindex( + &phyi->phyint_group_ifindex, ipst)) + cmn_err(CE_PANIC, + "ip_assign_ifindex() failed"); + new_group = B_TRUE; + } + } else { + phyi->phyint_group_ifindex = 0; + } + if (ipst->ips_ipmp_hook_emulation) + phyi->phyint_hook_ifindex = phyi->phyint_group_ifindex; + else + phyi->phyint_hook_ifindex = phyi->phyint_ifindex; + + /* + * For IP Filter to find out the relationship between + * names and interface indicies, we need to generate + * a NE_PLUMB event when a new group can appear. + * We always generate events when a new interface appears + * (even when ipmp_hook_emulation is set) so there + * is no need to generate NE_PLUMB events when + * ipmp_hook_emulation is turned off. + * And since it isn't critical for IP Filter to get + * the NE_UNPLUMB events we skip those here. + */ + if (new_group) { + /* + * First phyint in group - generate group PLUMB event. + * Since we are not running inside the ipsq we do + * the dispatch immediately. + */ + if (phyi->phyint_illv4 != NULL) + ill = phyi->phyint_illv4; + else + ill = phyi->phyint_illv6; + + if (ill != NULL) { + mutex_enter(&ill->ill_lock); + ill_nic_info_plumb(ill, B_TRUE); + ill_nic_info_dispatch(ill); + mutex_exit(&ill->ill_lock); + } + } + } + rw_exit(&ipst->ips_ill_g_lock); +} + +/* ARGSUSED */ +static int +ipmp_hook_emulation_set(queue_t *q, mblk_t *mp, char *value, + caddr_t addr, cred_t *cr) +{ + int *v = (int *)addr; + long new_value; + ip_stack_t *ipst = CONNQ_TO_IPST(q); + + if (ddi_strtol(value, NULL, 10, &new_value) != 0) + return (EINVAL); + + if (*v != new_value) { + *v = new_value; + ipmp_hook_emulation_changed(ipst); + } + return (0); +} + static void * ip_kstat2_init(netstackid_t stackid, ip_stat_t *ip_statisticsp) {
--- a/usr/src/uts/common/inet/ip/ip_ftable.c Mon Apr 16 11:16:17 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip_ftable.c Mon Apr 16 11:46:20 2007 -0700 @@ -1305,27 +1305,35 @@ ipif_t *supplied_ipif; ill_t *ill; + match_flags = (MATCH_IRE_DSTONLY | MATCH_IRE_DEFAULT | + MATCH_IRE_RECURSIVE| MATCH_IRE_RJ_BHOLE| + MATCH_IRE_SECATTR); + /* * If supplied ifindex is non-null, the only valid - * nexthop is one off of the interface corresponding + * nexthop is one off of the interface or group corresponding * to the specified ifindex. */ - ill = ill_lookup_on_ifindex(ifindex, B_FALSE, NULL, NULL, NULL, NULL, ipst); if (ill != NULL) { - supplied_ipif = ipif_get_next_ipif(NULL, ill); + match_flags |= MATCH_IRE_ILL; } else { - ip1dbg(("ipfil_sendpkt: Could not find" - " route to dst\n")); - value = ECOMM; - freemsg(mp); - goto discard; + /* Fallback to group names if hook_emulation set */ + if (ipst->ips_ipmp_hook_emulation) { + ill = ill_group_lookup_on_ifindex(ifindex, + B_FALSE, ipst); + } + if (ill == NULL) { + ip1dbg(("ipfil_sendpkt: Could not find" + " route to dst\n")); + value = ECOMM; + freemsg(mp); + goto discard; + } + match_flags |= MATCH_IRE_ILL_GROUP; } - - match_flags = (MATCH_IRE_DSTONLY | MATCH_IRE_DEFAULT | - MATCH_IRE_IPIF | MATCH_IRE_RECURSIVE| MATCH_IRE_RJ_BHOLE| - MATCH_IRE_SECATTR); + supplied_ipif = ipif_get_next_ipif(NULL, ill); ire = ire_route_lookup(dst, 0, 0, 0, supplied_ipif, &sire, zoneid, MBLK_GETLABEL(mp), match_flags, ipst);
--- a/usr/src/uts/common/inet/ip/ip_if.c Mon Apr 16 11:16:17 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip_if.c Mon Apr 16 11:46:20 2007 -0700 @@ -2785,12 +2785,12 @@ rx_ring->rr_min_pkt_cnt = normal_pkt_cnt * rr_min_pkt_cnt_ratio; - rx_ring->rr_ring_state = ILL_RING_INUSE; - mutex_exit(&ill->ill_lock); + rx_ring->rr_ring_state = ILL_RING_INUSE; + mutex_exit(&ill->ill_lock); DTRACE_PROBE2(ill__ring__add, (void *), ill, (int), ip_rx_index); - return ((mac_resource_handle_t)rx_ring); + return ((mac_resource_handle_t)rx_ring); } } @@ -4484,6 +4484,7 @@ } } + /* Generate NE_UNPLUMB event for ill_name. */ info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP); if (info != NULL) { info->hne_nic = ill->ill_phyint->phyint_ifindex; @@ -4956,33 +4957,38 @@ /* * Has ifindex been plumbed already. + * Compares both phyint_ifindex and phyint_group_ifindex. */ static boolean_t phyint_exists(uint_t index, ip_stack_t *ipst) { phyint_t *phyi; + ASSERT(index != 0); ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock)); /* * Indexes are stored in the phyint - a common structure * to both IPv4 and IPv6. */ - phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, - (void *) &index, NULL); - return (phyi != NULL); -} - -/* - * Assign a unique interface index for the phyint. - */ -static boolean_t -phyint_assign_ifindex(phyint_t *phyi, ip_stack_t *ipst) + phyi = avl_first(&ipst->ips_phyint_g_list->phyint_list_avl_by_index); + for (; phyi != NULL; + phyi = avl_walk(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, + phyi, AVL_AFTER)) { + if (phyi->phyint_ifindex == index || + phyi->phyint_group_ifindex == index) + return (B_TRUE); + } + return (B_FALSE); +} + +/* Pick a unique ifindex */ +boolean_t +ip_assign_ifindex(uint_t *indexp, ip_stack_t *ipst) { uint_t starting_index; - ASSERT(phyi->phyint_ifindex == 0); if (!ipst->ips_ill_index_wrap) { - phyi->phyint_ifindex = ipst->ips_ill_index++; + *indexp = ipst->ips_ill_index++; if (ipst->ips_ill_index == 0) { /* Reached the uint_t limit Next time wrap */ ipst->ips_ill_index_wrap = B_TRUE; @@ -5000,7 +5006,7 @@ if (ipst->ips_ill_index != 0 && !phyint_exists(ipst->ips_ill_index, ipst)) { /* found unused index - use it */ - phyi->phyint_ifindex = ipst->ips_ill_index; + *indexp = ipst->ips_ill_index; return (B_TRUE); } } @@ -5012,6 +5018,16 @@ } /* + * Assign a unique interface index for the phyint. + */ +static boolean_t +phyint_assign_ifindex(phyint_t *phyi, ip_stack_t *ipst) +{ + ASSERT(phyi->phyint_ifindex == 0); + return (ip_assign_ifindex(&phyi->phyint_ifindex, ipst)); +} + +/* * Return a pointer to the ill which matches the supplied name. Note that * the ill name length includes the null termination character. (May be * called as writer.) @@ -8059,7 +8075,6 @@ ipsq_current_finish(ipsq_t *ipsq) { ipif_t *ipif = ipsq->ipsq_current_ipif; - hook_nic_event_t *info; ASSERT(IAM_WRITER_IPSQ(ipsq)); @@ -8070,22 +8085,9 @@ if (ipsq->ipsq_current_ioctl != SIOCLIFREMOVEIF) { mutex_enter(&ipif->ipif_ill->ill_lock); ipif->ipif_state_flags &= ~IPIF_CHANGING; - /* - * Unhook the nic event message from the ill and enqueue it - * into the nic event taskq. - */ - if ((info = ipif->ipif_ill->ill_nic_event_info) != NULL) { - if (ddi_taskq_dispatch(eventq_queue_nic, - ip_ne_queue_func, info, DDI_SLEEP) == DDI_FAILURE) { - ip2dbg(("ipsq_current_finish: " - "ddi_taskq_dispatch failed\n")); - if (info->hne_data != NULL) - kmem_free(info->hne_data, - info->hne_datalen); - kmem_free(info, sizeof (hook_nic_event_t)); - } - ipif->ipif_ill->ill_nic_event_info = NULL; - } + + /* Send any queued event */ + ill_nic_info_dispatch(ipif->ipif_ill); mutex_exit(&ipif->ipif_ill->ill_lock); } @@ -11510,7 +11512,7 @@ ip_stack_t *ipst = ill->ill_ipst; info->hne_nic = - ipif->ipif_ill->ill_phyint->phyint_ifindex; + ipif->ipif_ill->ill_phyint->phyint_hook_ifindex; info->hne_lif = MAP_IPIF_ID(ipif->ipif_id); info->hne_event = NE_ADDRESS_CHANGE; info->hne_family = ipif->ipif_isv6 ? @@ -14499,7 +14501,7 @@ * messages can land up, since the ipsq_refs is zero. * i.e. this ipsq is unnamed and no phyint or phyint group * is associated with this ipsq. (Lookups are based on ill_name - * or phyint_group_name) + * or phyint_groupname) */ ASSERT(ipsq->ipsq_refs == 0); ASSERT(ipsq->ipsq_xopq_mphead == NULL && ipsq->ipsq_mphead == NULL); @@ -16397,10 +16399,13 @@ * Return the first phyint matching the groupname. There could * be more than one when there are ill groups. * - * Needs work: called only from ip_sioctl_groupname - */ -static phyint_t * -phyint_lookup_group(char *groupname, ip_stack_t *ipst) + * If 'usable' is set, then we exclude ones that are marked with any of + * (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE). + * Needs work: called only from ip_sioctl_groupname and from the ipmp/netinfo + * emulation of ipmp. + */ +phyint_t * +phyint_lookup_group(char *groupname, boolean_t usable, ip_stack_t *ipst) { phyint_t *phyi; @@ -16415,6 +16420,14 @@ phyi, AVL_AFTER)) { if (phyi->phyint_groupname_len == 0) continue; + /* + * Skip the ones that should not be used since the callers + * sometime use this for sending packets. + */ + if (usable && (phyi->phyint_flags & + (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE))) + continue; + ASSERT(phyi->phyint_groupname != NULL); if (mi_strcmp(groupname, phyi->phyint_groupname) == 0) return (phyi); @@ -16423,6 +16436,49 @@ } +/* + * Return the first usable phyint matching the group index. By 'usable' + * we exclude ones that are marked ununsable with any of + * (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE). + * + * Used only for the ipmp/netinfo emulation of ipmp. + */ +phyint_t * +phyint_lookup_group_ifindex(uint_t group_ifindex, ip_stack_t *ipst) +{ + phyint_t *phyi; + + ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock)); + + if (!ipst->ips_ipmp_hook_emulation) + return (NULL); + + /* + * Group indicies are stored in the phyint - a common structure + * to both IPv4 and IPv6. + */ + phyi = avl_first(&ipst->ips_phyint_g_list->phyint_list_avl_by_index); + for (; phyi != NULL; + phyi = avl_walk(&ipst->ips_phyint_g_list->phyint_list_avl_by_index, + phyi, AVL_AFTER)) { + /* Ignore the ones that do not have a group */ + if (phyi->phyint_groupname_len == 0) + continue; + + ASSERT(phyi->phyint_group_ifindex != 0); + /* + * Skip the ones that should not be used since the callers + * sometime use this for sending packets. + */ + if (phyi->phyint_flags & + (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE)) + continue; + if (phyi->phyint_group_ifindex == group_ifindex) + return (phyi); + } + return (NULL); +} + /* * MT notes on creation and deletion of IPMP groups @@ -16589,6 +16645,10 @@ mi_free(phyi->phyint_groupname); phyi->phyint_groupname = NULL; phyi->phyint_groupname_len = 0; + + /* Restore the ifindex used to be the per interface one */ + phyi->phyint_group_ifindex = 0; + phyi->phyint_hook_ifindex = phyi->phyint_ifindex; mutex_exit(&phyi->phyint_lock); RELEASE_ILL_LOCKS(ill_v4, ill_v6); rw_exit(&ipst->ips_ill_g_lock); @@ -16641,7 +16701,7 @@ * packets across the group because of potential link-level * header differences. */ - phyi_tmp = phyint_lookup_group(groupname, ipst); + phyi_tmp = phyint_lookup_group(groupname, B_FALSE, ipst); if (phyi_tmp != NULL) { if ((ill_v4 != NULL && phyi_tmp->phyint_illv4 != NULL) && @@ -16736,6 +16796,37 @@ phyi->phyint_groupname = tmp; bcopy(groupname, phyi->phyint_groupname, namelen + 1); phyi->phyint_groupname_len = namelen + 1; + + if (ipst->ips_ipmp_hook_emulation) { + /* + * If the group already exists we use the existing + * group_ifindex, otherwise we pick a new index here. + */ + if (phyi_tmp != NULL) { + phyi->phyint_group_ifindex = + phyi_tmp->phyint_group_ifindex; + } else { + /* XXX We need a recovery strategy here. */ + if (!ip_assign_ifindex( + &phyi->phyint_group_ifindex, ipst)) + cmn_err(CE_PANIC, + "ip_assign_ifindex() failed"); + } + } + /* + * Select whether the netinfo and hook use the per-interface + * or per-group ifindex. + */ + if (ipst->ips_ipmp_hook_emulation) + phyi->phyint_hook_ifindex = phyi->phyint_group_ifindex; + else + phyi->phyint_hook_ifindex = phyi->phyint_ifindex; + + if (ipst->ips_ipmp_hook_emulation && + phyi_tmp != NULL) { + /* First phyint in group - group PLUMB event */ + ill_nic_info_plumb(ill, B_TRUE); + } mutex_exit(&phyi->phyint_lock); RELEASE_ILL_LOCKS(ill_v4, ill_v6); rw_exit(&ipst->ips_ill_g_lock); @@ -18414,7 +18505,7 @@ if (info != NULL) { ip_stack_t *ipst = ill->ill_ipst; - info->hne_nic = ill->ill_phyint->phyint_ifindex; + info->hne_nic = ill->ill_phyint->phyint_hook_ifindex; info->hne_lif = 0; info->hne_event = NE_DOWN; info->hne_data = NULL; @@ -22891,6 +22982,9 @@ if (!phyint_assign_ifindex(phyi, ipst)) cmn_err(CE_PANIC, "phyint_assign_ifindex() failed"); + /* No IPMP group yet, thus the hook uses the ifindex */ + phyi->phyint_hook_ifindex = phyi->phyint_ifindex; + avl_insert(&ipst->ips_phyint_g_list->phyint_list_avl_by_name, (void *)phyi, where); @@ -22940,45 +23034,103 @@ */ if (ill->ill_name_length <= 2 || ill->ill_name[0] != 'l' || ill->ill_name[1] != 'o') { - hook_nic_event_t *info; - if ((info = ill->ill_nic_event_info) != NULL) { - ip2dbg(("ill_phyint_reinit: unexpected nic event %d " - "attached for %s\n", info->hne_event, - ill->ill_name)); + /* + * Generate nic plumb event for ill_name even if + * ipmp_hook_emulation is set. That avoids generating events + * for the ill_names should ipmp_hook_emulation be turned on + * later. + */ + ill_nic_info_plumb(ill, B_FALSE); + } + RELEASE_ILL_LOCKS(ill, ill_other); + mutex_exit(&phyi->phyint_lock); +} + +/* + * Allocate a NE_PLUMB nic info event and store in the ill. + * If 'group' is set we do it for the group name, otherwise the ill name. + * It will be sent when we leave the ipsq. + */ +void +ill_nic_info_plumb(ill_t *ill, boolean_t group) +{ + phyint_t *phyi = ill->ill_phyint; + ip_stack_t *ipst = ill->ill_ipst; + hook_nic_event_t *info; + char *name; + int namelen; + + ASSERT(MUTEX_HELD(&ill->ill_lock)); + + if ((info = ill->ill_nic_event_info) != NULL) { + ip2dbg(("ill_nic_info_plumb: unexpected nic event %d " + "attached for %s\n", info->hne_event, + ill->ill_name)); + if (info->hne_data != NULL) + kmem_free(info->hne_data, info->hne_datalen); + kmem_free(info, sizeof (hook_nic_event_t)); + ill->ill_nic_event_info = NULL; + } + + info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP); + if (info == NULL) { + ip2dbg(("ill_nic_info_plumb: could not attach PLUMB nic " + "event information for %s (ENOMEM)\n", + ill->ill_name)); + return; + } + + if (group) { + ASSERT(phyi->phyint_groupname_len != 0); + namelen = phyi->phyint_groupname_len; + name = phyi->phyint_groupname; + } else { + namelen = ill->ill_name_length; + name = ill->ill_name; + } + + info->hne_nic = phyi->phyint_hook_ifindex; + info->hne_lif = 0; + info->hne_event = NE_PLUMB; + info->hne_family = ill->ill_isv6 ? + ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data; + + info->hne_data = kmem_alloc(namelen, KM_NOSLEEP); + if (info->hne_data != NULL) { + info->hne_datalen = namelen; + bcopy(name, info->hne_data, info->hne_datalen); + } else { + ip2dbg(("ill_nic_info_plumb: could not attach " + "name information for PLUMB nic event " + "of %s (ENOMEM)\n", name)); + kmem_free(info, sizeof (hook_nic_event_t)); + info = NULL; + } + ill->ill_nic_event_info = info; +} + +/* + * Unhook the nic event message from the ill and enqueue it + * into the nic event taskq. + */ +void +ill_nic_info_dispatch(ill_t *ill) +{ + hook_nic_event_t *info; + + ASSERT(MUTEX_HELD(&ill->ill_lock)); + + if ((info = ill->ill_nic_event_info) != NULL) { + if (ddi_taskq_dispatch(eventq_queue_nic, + ip_ne_queue_func, info, DDI_SLEEP) == DDI_FAILURE) { + ip2dbg(("ill_nic_info_dispatch: " + "ddi_taskq_dispatch failed\n")); if (info->hne_data != NULL) kmem_free(info->hne_data, info->hne_datalen); kmem_free(info, sizeof (hook_nic_event_t)); } - - info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP); - if (info != NULL) { - info->hne_nic = ill->ill_phyint->phyint_ifindex; - info->hne_lif = 0; - info->hne_event = NE_PLUMB; - info->hne_family = ill->ill_isv6 ? - ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data; - info->hne_data = kmem_alloc(ill->ill_name_length, - KM_NOSLEEP); - if (info->hne_data != NULL) { - info->hne_datalen = ill->ill_name_length; - bcopy(ill->ill_name, info->hne_data, - info->hne_datalen); - } else { - ip2dbg(("ill_phyint_reinit: could not attach " - "ill_name information for PLUMB nic event " - "of %s (ENOMEM)\n", ill->ill_name)); - kmem_free(info, sizeof (hook_nic_event_t)); - } - } else - ip2dbg(("ill_phyint_reinit: could not attach PLUMB nic " - "event information for %s (ENOMEM)\n", - ill->ill_name)); - - ill->ill_nic_event_info = info; - } - - RELEASE_ILL_LOCKS(ill, ill_other); - mutex_exit(&phyi->phyint_lock); + ill->ill_nic_event_info = NULL; + } } /* @@ -24478,6 +24630,11 @@ * Return a pointer to an ipif_t given a combination of (ill_idx,ipif_id) * If a pointer to an ipif_t is returned then the caller will need to do * an ill_refrele(). + * + * If there is no real interface which matches the ifindex, then it looks + * for a group that has a matching index. In the case of a group match the + * lifidx must be zero. We don't need emulate the logical interfaces + * since IP Filter's use of netinfo doesn't use that. */ ipif_t * ipif_getby_indexes(uint_t ifindex, uint_t lifidx, boolean_t isv6, @@ -24489,8 +24646,17 @@ ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL, ipst); - if (ill == NULL) - return (NULL); + if (ill == NULL) { + /* Fallback to group names only if hook_emulation set */ + if (!ipst->ips_ipmp_hook_emulation) + return (NULL); + + if (lifidx != 0) + return (NULL); + ill = ill_group_lookup_on_ifindex(ifindex, isv6, ipst); + if (ill == NULL) + return (NULL); + } mutex_enter(&ill->ill_lock); if (ill->ill_state_flags & ILL_CONDEMNED) {
--- a/usr/src/uts/common/inet/ip/ip_netinfo.c Mon Apr 16 11:16:17 2007 -0700 +++ b/usr/src/uts/common/inet/ip/ip_netinfo.c Mon Apr 16 11:46:20 2007 -0700 @@ -475,16 +475,26 @@ char *buffer, const size_t buflen, boolean_t isv6, ip_stack_t *ipst) { ill_t *ill; + char *name; ASSERT(buffer != NULL); ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL, NULL, NULL, ipst); - if (ill == NULL) - return (1); - - if (ill->ill_name != NULL) { - (void) strlcpy(buffer, ill->ill_name, buflen); + if (ill != NULL) { + name = ill->ill_name; + } else { + /* Fallback to group names only if hook_emulation is set */ + if (ipst->ips_ipmp_hook_emulation) { + ill = ill_group_lookup_on_ifindex((uint_t)phy_ifdata, + isv6, ipst); + } + if (ill == NULL) + return (1); + name = ill->ill_phyint->phyint_groupname; + } + if (name != NULL) { + (void) strlcpy(buffer, name, buflen); ill_refrele(ill); return (0); } else { @@ -516,6 +526,9 @@ /* * Shared implementation to determine the MTU of a network interface + * + * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set. + * But IP Filter only uses a zero ifdata. */ /* ARGSUSED */ static int @@ -541,7 +554,16 @@ if ((ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL, NULL, NULL, ipst)) == NULL) { - return (0); + /* + * Fallback to group names only if hook_emulation + * is set + */ + if (ipst->ips_ipmp_hook_emulation) { + ill = ill_group_lookup_on_ifindex( + (uint_t)phy_ifdata, isv6, ipst); + } + if (ill == NULL) + return (0); } mtu = ill->ill_max_frag; ill_refrele(ill); @@ -562,6 +584,9 @@ /* * Get next interface from the current list of IPv4 physical network interfaces + * + * Note: this does not handle the case when ipmp_hook_emulation is set. + * But IP Filter does not use this function. */ static phy_if_t ip_phygetnext(phy_if_t phy_ifdata, netstack_t *ns) @@ -614,10 +639,14 @@ ill = ill_lookup_on_name((char *)name, B_FALSE, isv6, NULL, NULL, NULL, NULL, NULL, ipst); + /* Fallback to group names only if hook_emulation is set */ + if (ill == NULL && ipst->ips_ipmp_hook_emulation) { + ill = ill_group_lookup_on_name((char *)name, isv6, ipst); + } if (ill == NULL) return (0); - phy = ill->ill_phyint->phyint_ifindex; + phy = ill->ill_phyint->phyint_hook_ifindex; ill_refrele(ill); @@ -649,6 +678,9 @@ /* * Shared implementation to get next interface from the current list of * logical network interfaces + * + * Note: this does not handle the case when ipmp_hook_emulation is set. + * But IP Filter does not use this function. */ static lif_if_t ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6, @@ -960,11 +992,13 @@ return (0); ill = ire_to_ill(ire); - if (ill == NULL) + if (ill == NULL) { + ire_refrele(ire); return (0); + } ASSERT(ill != NULL); - phy_if = (phy_if_t)ill->ill_phyint->phyint_ifindex; + phy_if = (phy_if_t)ill->ill_phyint->phyint_hook_ifindex; ire_refrele(ire); return (phy_if); @@ -1089,6 +1123,9 @@ /* * Shared implementation to determine the network addresses for an interface + * + * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set. + * But IP Filter only uses a zero ifdata. */ /* ARGSUSED */ static int @@ -1236,8 +1273,15 @@ packet = &inject->inj_data; ASSERT(packet->ni_packet != NULL); - if ((ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical, - B_FALSE, NULL, NULL, NULL, NULL, ipst)) == NULL) { + ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical, + B_FALSE, NULL, NULL, NULL, NULL, ipst); + + /* Fallback to group names only if hook_emulation is set */ + if (ill == NULL && ipst->ips_ipmp_hook_emulation) { + ill = ill_group_lookup_on_ifindex((uint_t)packet->ni_physical, + B_FALSE, ipst); + } + if (ill == NULL) { kmem_free(inject, sizeof (*inject)); return; } @@ -1302,3 +1346,63 @@ kmem_free(info->hne_data, info->hne_datalen); kmem_free(arg, sizeof (hook_nic_event_t)); } + +/* + * Temporary function to support IPMP emulation for IP Filter. + * Lookup an ill based on the ifindex assigned to the group. + * Skips unusable ones i.e. where any of these flags are set: + * (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE) + */ +ill_t * +ill_group_lookup_on_ifindex(uint_t index, boolean_t isv6, ip_stack_t *ipst) +{ + ill_t *ill; + phyint_t *phyi; + + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + phyi = phyint_lookup_group_ifindex(index, ipst); + if (phyi != NULL) { + ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4; + if (ill != NULL) { + mutex_enter(&ill->ill_lock); + if (ILL_CAN_LOOKUP(ill)) { + ill_refhold_locked(ill); + mutex_exit(&ill->ill_lock); + rw_exit(&ipst->ips_ill_g_lock); + return (ill); + } + } + } + rw_exit(&ipst->ips_ill_g_lock); + return (NULL); +} + +/* + * Temporary function to support IPMP emulation for IP Filter. + * Lookup an ill based on the group name. + * Skips unusable ones i.e. where any of these flags are set: + * (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE) + */ +ill_t * +ill_group_lookup_on_name(char *name, boolean_t isv6, ip_stack_t *ipst) +{ + ill_t *ill; + phyint_t *phyi; + + rw_enter(&ipst->ips_ill_g_lock, RW_READER); + phyi = phyint_lookup_group(name, B_TRUE, ipst); + if (phyi != NULL) { + ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4; + if (ill != NULL) { + mutex_enter(&ill->ill_lock); + if (ILL_CAN_LOOKUP(ill)) { + ill_refhold_locked(ill); + mutex_exit(&ill->ill_lock); + rw_exit(&ipst->ips_ill_g_lock); + return (ill); + } + } + } + rw_exit(&ipst->ips_ill_g_lock); + return (NULL); +}
--- a/usr/src/uts/common/inet/ip_if.h Mon Apr 16 11:16:17 2007 -0700 +++ b/usr/src/uts/common/inet/ip_if.h Mon Apr 16 11:46:20 2007 -0700 @@ -160,6 +160,8 @@ extern void ill_dlpi_done(ill_t *, t_uscalar_t); extern void ill_dlpi_send(ill_t *, mblk_t *); extern mblk_t *ill_dlur_gen(uchar_t *, uint_t, t_uscalar_t, t_scalar_t); +extern ill_t *ill_group_lookup_on_ifindex(uint_t, boolean_t, ip_stack_t *); +extern ill_t *ill_group_lookup_on_name(char *, boolean_t, ip_stack_t *); /* NOTE: Keep unmodified ill_lookup_on_ifindex for ipp for now */ extern ill_t *ill_lookup_on_ifindex_global_instance(uint_t, boolean_t, queue_t *, mblk_t *, ipsq_func_t, int *);
--- a/usr/src/uts/common/inet/ip_stack.h Mon Apr 16 11:16:17 2007 -0700 +++ b/usr/src/uts/common/inet/ip_stack.h Mon Apr 16 11:46:20 2007 -0700 @@ -297,6 +297,8 @@ int ips_ip_g_forward; int ips_ipv6_forward; + int ips_ipmp_hook_emulation; /* ndd variable */ + time_t ips_ip_g_frag_timeout; clock_t ips_ip_g_frag_timo_ms;