changeset 4041:530c0817b983

PSARC 2007/198 IP Filter ipmp_hook_emulation 6535824 pfhooks and patch for IPMP+IP FIlter doesn't work together
author nordmark
date Mon, 16 Apr 2007 11:46:20 -0700
parents ab47869b3932
children ed569729e015
files usr/src/uts/common/inet/ip.h usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip_ftable.c usr/src/uts/common/inet/ip/ip_if.c usr/src/uts/common/inet/ip/ip_netinfo.c usr/src/uts/common/inet/ip_if.h usr/src/uts/common/inet/ip_stack.h
diffstat 7 files changed, 531 insertions(+), 122 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/inet/ip.h	Mon Apr 16 11:16:17 2007 -0700
+++ b/usr/src/uts/common/inet/ip.h	Mon Apr 16 11:46:20 2007 -0700
@@ -1521,6 +1521,9 @@
 	kmutex_t	phyint_lock;
 	struct ipsq_s	*phyint_ipsq;		/* back pointer to ipsq */
 	struct phyint	*phyint_ipsq_next;	/* phyint list on this ipsq */
+	/* Once Clearview IPMP is added the follow two fields can be removed */
+	uint_t		phyint_group_ifindex;	/* index assigned to group */
+	uint_t		phyint_hook_ifindex;	/* index used with neti/hook */
 } phyint_t;
 
 #define	CACHE_ALIGN_SIZE 64
@@ -3008,7 +3011,7 @@
 		if ((_ilp != NULL) &&					\
 		    (((ill_t *)(_ilp))->ill_phyint != NULL))		\
 			info.hpe_ifp = (phy_if_t)((ill_t *)		\
-			    (_ilp))->ill_phyint->phyint_ifindex;	\
+			    (_ilp))->ill_phyint->phyint_hook_ifindex;	\
 		else							\
 			info.hpe_ifp = 0;				\
 									\
@@ -3016,7 +3019,7 @@
 		if ((_olp != NULL) &&					\
 		    (((ill_t *)(_olp))->ill_phyint != NULL))		\
 			info.hpe_ofp = (phy_if_t)((ill_t *)		\
-			    (_olp))->ill_phyint->phyint_ifindex;	\
+			    (_olp))->ill_phyint->phyint_hook_ifindex;	\
 		else							\
 			info.hpe_ofp = 0;				\
 		info.hpe_hdr = _iph;					\
@@ -3050,7 +3053,7 @@
 		if ((_ilp != NULL) &&					\
 		    (((ill_t *)(_ilp))->ill_phyint != NULL))		\
 			info.hpe_ifp = (phy_if_t)((ill_t *)		\
-			    (_ilp))->ill_phyint->phyint_ifindex;	\
+			    (_ilp))->ill_phyint->phyint_hook_ifindex;	\
 		else							\
 			info.hpe_ifp = 0;				\
 									\
@@ -3058,7 +3061,7 @@
 		if ((_olp != NULL) &&					\
 		    (((ill_t *)(_olp))->ill_phyint != NULL))		\
 			info.hpe_ofp = (phy_if_t)((ill_t *)		\
-			    (_olp))->ill_phyint->phyint_ifindex;	\
+			    (_olp))->ill_phyint->phyint_hook_ifindex;	\
 		else							\
 			info.hpe_ofp = 0;				\
 		info.hpe_hdr = _iph;					\
@@ -3137,12 +3140,15 @@
 
 struct	mac_header_info_s;
 
+extern boolean_t ip_assign_ifindex(uint_t *, ip_stack_t *);
 extern const char *dlpi_prim_str(int);
 extern const char *dlpi_err_str(int);
 extern void	ill_frag_timer(void *);
 extern ill_t	*ill_first(int, int, ill_walk_context_t *, ip_stack_t *);
 extern ill_t	*ill_next(ill_walk_context_t *, ill_t *);
 extern void	ill_frag_timer_start(ill_t *);
+extern void	ill_nic_info_dispatch(ill_t *);
+extern void	ill_nic_info_plumb(ill_t *, boolean_t);
 extern mblk_t	*ip_carve_mp(mblk_t **, ssize_t);
 extern mblk_t	*ip_dlpi_alloc(size_t, t_uscalar_t);
 extern char	*ip_dot_addr(ipaddr_t, char *);
@@ -3304,6 +3310,9 @@
 			uint_t);
 extern mblk_t	*ip_unbind(queue_t *, mblk_t *);
 
+extern phyint_t *phyint_lookup_group(char *, boolean_t, ip_stack_t *);
+extern phyint_t *phyint_lookup_group_ifindex(uint_t, ip_stack_t *);
+
 extern void tnet_init(void);
 extern void tnet_fini(void);
 
--- a/usr/src/uts/common/inet/ip/ip.c	Mon Apr 16 11:16:17 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip.c	Mon Apr 16 11:46:20 2007 -0700
@@ -751,6 +751,8 @@
     caddr_t cp, cred_t *cr);
 static int	ip_int_set(queue_t *, mblk_t *, char *, caddr_t,
     cred_t *);
+static int	ipmp_hook_emulation_set(queue_t *, mblk_t *, char *, caddr_t,
+    cred_t *);
 static squeue_func_t ip_squeue_switch(int);
 
 static void	*ip_kstat_init(netstackid_t, ip_stack_t *);
@@ -934,6 +936,9 @@
 	    "ip_cgtp_filter" },
 	{ ip_param_generic_get, ip_int_set,
 	    (caddr_t)&ip_soft_rings_cnt, "ip_soft_rings_cnt" },
+#define	IPNDP_IPMP_HOOK_OFFSET	18
+	{  ip_param_generic_get, ipmp_hook_emulation_set, NULL,
+	    "ipmp_hook_emulation" },
 };
 
 /*
@@ -5361,7 +5366,6 @@
 	ipsq_t	*ipsq;
 	ipif_t	*ipif;
 	queue_t	*q = ill->ill_rq;
-	hook_nic_event_t *info;
 	ip_stack_t	*ipst = ill->ill_ipst;
 	clock_t timeout;
 
@@ -5496,21 +5500,9 @@
 	if (ill->ill_credp != NULL)
 		crfree(ill->ill_credp);
 
-	/*
-	 * Unhook the nic event message from the ill and enqueue it into the nic
-	 * event taskq.
-	 */
-	if ((info = ill->ill_nic_event_info) != NULL) {
-		if (ddi_taskq_dispatch(eventq_queue_nic,
-		    ip_ne_queue_func,
-		    (void *)info, DDI_SLEEP) == DDI_FAILURE) {
-			ip2dbg(("ip_ioctl_finish:ddi_taskq_dispatch failed\n"));
-			if (info->hne_data != NULL)
-				kmem_free(info->hne_data, info->hne_datalen);
-			kmem_free(info, sizeof (hook_nic_event_t));
-		}
-		ill->ill_nic_event_info = NULL;
-	}
+	mutex_enter(&ill->ill_lock);
+	ill_nic_info_dispatch(ill);
+	mutex_exit(&ill->ill_lock);
 
 	/*
 	 * Now we are done with the module close pieces that
@@ -6046,6 +6038,10 @@
 		"ip_cgtp_filter") == 0);
 	ipst->ips_ndp_arr[IPNDP_CGTP_FILTER_OFFSET].ip_ndp_data =
 	    (caddr_t)&ip_cgtp_filter;
+	ASSERT(strcmp(ipst->ips_ndp_arr[IPNDP_IPMP_HOOK_OFFSET].ip_ndp_name,
+		"ipmp_hook_emulation") == 0);
+	ipst->ips_ndp_arr[IPNDP_IPMP_HOOK_OFFSET].ip_ndp_data =
+	    (caddr_t)&ipst->ips_ipmp_hook_emulation;
 
 	(void) ip_param_register(&ipst->ips_ip_g_nd,
 	    ipst->ips_param_arr, A_CNT(lcl_param_arr),
@@ -15992,7 +15988,7 @@
 
 		info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP);
 		if (info != NULL) {
-			info->hne_nic = ill->ill_phyint->phyint_ifindex;
+			info->hne_nic = ill->ill_phyint->phyint_hook_ifindex;
 			info->hne_lif = 0;
 			info->hne_event = NE_UP;
 			info->hne_data = NULL;
@@ -29427,6 +29423,128 @@
 	return (0);
 }
 
+/*
+ * Handle changes to ipmp_hook_emulation ndd variable.
+ * Need to update phyint_hook_ifindex.
+ * Also generate a nic plumb event should a new ifidex be assigned to a group.
+ */
+static void
+ipmp_hook_emulation_changed(ip_stack_t *ipst)
+{
+	phyint_t *phyi;
+	phyint_t *phyi_tmp;
+	char *groupname;
+	int namelen;
+	ill_t	*ill;
+	boolean_t new_group;
+
+	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+	/*
+	 * Group indicies are stored in the phyint - a common structure
+	 * to both IPv4 and IPv6.
+	 */
+	phyi = avl_first(&ipst->ips_phyint_g_list->phyint_list_avl_by_index);
+	for (; phyi != NULL;
+	    phyi = avl_walk(&ipst->ips_phyint_g_list->phyint_list_avl_by_index,
+	    phyi, AVL_AFTER)) {
+		/* Ignore the ones that do not have a group */
+		if (phyi->phyint_groupname_len == 0)
+			continue;
+
+		/*
+		 * Look for other phyint in group.
+		 * Clear name/namelen so the lookup doesn't find ourselves.
+		 */
+		namelen = phyi->phyint_groupname_len;
+		groupname = phyi->phyint_groupname;
+		phyi->phyint_groupname_len = 0;
+		phyi->phyint_groupname = NULL;
+
+		phyi_tmp = phyint_lookup_group(groupname, B_FALSE, ipst);
+		/* Restore */
+		phyi->phyint_groupname_len = namelen;
+		phyi->phyint_groupname = groupname;
+
+		new_group = B_FALSE;
+		if (ipst->ips_ipmp_hook_emulation) {
+			/*
+			 * If the group already exists and has already
+			 * been assigned a group ifindex, we use the existing
+			 * group_ifindex, otherwise we pick a new group_ifindex
+			 * here.
+			 */
+			if (phyi_tmp != NULL &&
+			    phyi_tmp->phyint_group_ifindex != 0) {
+				phyi->phyint_group_ifindex =
+				    phyi_tmp->phyint_group_ifindex;
+			} else {
+				/* XXX We need a recovery strategy here. */
+				if (!ip_assign_ifindex(
+				    &phyi->phyint_group_ifindex, ipst))
+					cmn_err(CE_PANIC,
+					    "ip_assign_ifindex() failed");
+				new_group = B_TRUE;
+			}
+		} else {
+			phyi->phyint_group_ifindex = 0;
+		}
+		if (ipst->ips_ipmp_hook_emulation)
+			phyi->phyint_hook_ifindex = phyi->phyint_group_ifindex;
+		else
+			phyi->phyint_hook_ifindex = phyi->phyint_ifindex;
+
+		/*
+		 * For IP Filter to find out the relationship between
+		 * names and interface indicies, we need to generate
+		 * a NE_PLUMB event when a new group can appear.
+		 * We always generate events when a new interface appears
+		 * (even when ipmp_hook_emulation is set) so there
+		 * is no need to generate NE_PLUMB events when
+		 * ipmp_hook_emulation is turned off.
+		 * And since it isn't critical for IP Filter to get
+		 * the NE_UNPLUMB events we skip those here.
+		 */
+		if (new_group) {
+			/*
+			 * First phyint in group - generate group PLUMB event.
+			 * Since we are not running inside the ipsq we do
+			 * the dispatch immediately.
+			 */
+			if (phyi->phyint_illv4 != NULL)
+				ill = phyi->phyint_illv4;
+			else
+				ill = phyi->phyint_illv6;
+
+			if (ill != NULL) {
+				mutex_enter(&ill->ill_lock);
+				ill_nic_info_plumb(ill, B_TRUE);
+				ill_nic_info_dispatch(ill);
+				mutex_exit(&ill->ill_lock);
+			}
+		}
+	}
+	rw_exit(&ipst->ips_ill_g_lock);
+}
+
+/* ARGSUSED */
+static int
+ipmp_hook_emulation_set(queue_t *q, mblk_t *mp, char *value,
+    caddr_t addr, cred_t *cr)
+{
+	int *v = (int *)addr;
+	long new_value;
+	ip_stack_t	*ipst = CONNQ_TO_IPST(q);
+
+	if (ddi_strtol(value, NULL, 10, &new_value) != 0)
+		return (EINVAL);
+
+	if (*v != new_value) {
+		*v = new_value;
+		ipmp_hook_emulation_changed(ipst);
+	}
+	return (0);
+}
+
 static void *
 ip_kstat2_init(netstackid_t stackid, ip_stat_t *ip_statisticsp)
 {
--- a/usr/src/uts/common/inet/ip/ip_ftable.c	Mon Apr 16 11:16:17 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip_ftable.c	Mon Apr 16 11:46:20 2007 -0700
@@ -1305,27 +1305,35 @@
 		ipif_t *supplied_ipif;
 		ill_t *ill;
 
+		match_flags = (MATCH_IRE_DSTONLY | MATCH_IRE_DEFAULT |
+		    MATCH_IRE_RECURSIVE| MATCH_IRE_RJ_BHOLE|
+		    MATCH_IRE_SECATTR);
+
 		/*
 		 * If supplied ifindex is non-null, the only valid
-		 * nexthop is one off of the interface corresponding
+		 * nexthop is one off of the interface or group corresponding
 		 * to the specified ifindex.
 		 */
-
 		ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
 		    NULL, NULL, NULL, NULL, ipst);
 		if (ill != NULL) {
-			supplied_ipif = ipif_get_next_ipif(NULL, ill);
+			match_flags |= MATCH_IRE_ILL;
 		} else {
-			ip1dbg(("ipfil_sendpkt: Could not find"
-			    " route to dst\n"));
-			value = ECOMM;
-			freemsg(mp);
-			goto discard;
+			/* Fallback to group names if hook_emulation set */
+			if (ipst->ips_ipmp_hook_emulation) {
+				ill = ill_group_lookup_on_ifindex(ifindex,
+				    B_FALSE, ipst);
+			}
+			if (ill == NULL) {
+				ip1dbg(("ipfil_sendpkt: Could not find"
+				    " route to dst\n"));
+				value = ECOMM;
+				freemsg(mp);
+				goto discard;
+			}
+			match_flags |= MATCH_IRE_ILL_GROUP;
 		}
-
-		match_flags = (MATCH_IRE_DSTONLY | MATCH_IRE_DEFAULT |
-		    MATCH_IRE_IPIF | MATCH_IRE_RECURSIVE| MATCH_IRE_RJ_BHOLE|
-		    MATCH_IRE_SECATTR);
+		supplied_ipif = ipif_get_next_ipif(NULL, ill);
 
 		ire = ire_route_lookup(dst, 0, 0, 0, supplied_ipif,
 		    &sire, zoneid, MBLK_GETLABEL(mp), match_flags, ipst);
--- a/usr/src/uts/common/inet/ip/ip_if.c	Mon Apr 16 11:16:17 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip_if.c	Mon Apr 16 11:46:20 2007 -0700
@@ -2785,12 +2785,12 @@
 			rx_ring->rr_min_pkt_cnt =
 			    normal_pkt_cnt * rr_min_pkt_cnt_ratio;
 
-	rx_ring->rr_ring_state = ILL_RING_INUSE;
-	mutex_exit(&ill->ill_lock);
+			rx_ring->rr_ring_state = ILL_RING_INUSE;
+			mutex_exit(&ill->ill_lock);
 
 			DTRACE_PROBE2(ill__ring__add, (void *), ill,
 			    (int), ip_rx_index);
-	return ((mac_resource_handle_t)rx_ring);
+			return ((mac_resource_handle_t)rx_ring);
 		}
 	}
 
@@ -4484,6 +4484,7 @@
 		}
 	}
 
+	/* Generate NE_UNPLUMB event for ill_name. */
 	info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP);
 	if (info != NULL) {
 		info->hne_nic = ill->ill_phyint->phyint_ifindex;
@@ -4956,33 +4957,38 @@
 
 /*
  * Has ifindex been plumbed already.
+ * Compares both phyint_ifindex and phyint_group_ifindex.
  */
 static boolean_t
 phyint_exists(uint_t index, ip_stack_t *ipst)
 {
 	phyint_t *phyi;
 
+	ASSERT(index != 0);
 	ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock));
 	/*
 	 * Indexes are stored in the phyint - a common structure
 	 * to both IPv4 and IPv6.
 	 */
-	phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_index,
-	    (void *) &index, NULL);
-	return (phyi != NULL);
-}
-
-/*
- * Assign a unique interface index for the phyint.
- */
-static boolean_t
-phyint_assign_ifindex(phyint_t *phyi, ip_stack_t *ipst)
+	phyi = avl_first(&ipst->ips_phyint_g_list->phyint_list_avl_by_index);
+	for (; phyi != NULL;
+	    phyi = avl_walk(&ipst->ips_phyint_g_list->phyint_list_avl_by_index,
+	    phyi, AVL_AFTER)) {
+		if (phyi->phyint_ifindex == index ||
+		    phyi->phyint_group_ifindex == index)
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/* Pick a unique ifindex */
+boolean_t
+ip_assign_ifindex(uint_t *indexp, ip_stack_t *ipst)
 {
 	uint_t starting_index;
 
-	ASSERT(phyi->phyint_ifindex == 0);
 	if (!ipst->ips_ill_index_wrap) {
-		phyi->phyint_ifindex = ipst->ips_ill_index++;
+		*indexp = ipst->ips_ill_index++;
 		if (ipst->ips_ill_index == 0) {
 			/* Reached the uint_t limit Next time wrap  */
 			ipst->ips_ill_index_wrap = B_TRUE;
@@ -5000,7 +5006,7 @@
 		if (ipst->ips_ill_index != 0 &&
 		    !phyint_exists(ipst->ips_ill_index, ipst)) {
 			/* found unused index - use it */
-			phyi->phyint_ifindex = ipst->ips_ill_index;
+			*indexp = ipst->ips_ill_index;
 			return (B_TRUE);
 		}
 	}
@@ -5012,6 +5018,16 @@
 }
 
 /*
+ * Assign a unique interface index for the phyint.
+ */
+static boolean_t
+phyint_assign_ifindex(phyint_t *phyi, ip_stack_t *ipst)
+{
+	ASSERT(phyi->phyint_ifindex == 0);
+	return (ip_assign_ifindex(&phyi->phyint_ifindex, ipst));
+}
+
+/*
  * Return a pointer to the ill which matches the supplied name.  Note that
  * the ill name length includes the null termination character.  (May be
  * called as writer.)
@@ -8059,7 +8075,6 @@
 ipsq_current_finish(ipsq_t *ipsq)
 {
 	ipif_t *ipif = ipsq->ipsq_current_ipif;
-	hook_nic_event_t *info;
 
 	ASSERT(IAM_WRITER_IPSQ(ipsq));
 
@@ -8070,22 +8085,9 @@
 	if (ipsq->ipsq_current_ioctl != SIOCLIFREMOVEIF) {
 		mutex_enter(&ipif->ipif_ill->ill_lock);
 		ipif->ipif_state_flags &= ~IPIF_CHANGING;
-		/*
-		 * Unhook the nic event message from the ill and enqueue it
-		 * into the nic event taskq.
-		 */
-		if ((info = ipif->ipif_ill->ill_nic_event_info) != NULL) {
-			if (ddi_taskq_dispatch(eventq_queue_nic,
-			    ip_ne_queue_func, info, DDI_SLEEP) == DDI_FAILURE) {
-				ip2dbg(("ipsq_current_finish: "
-				    "ddi_taskq_dispatch failed\n"));
-				if (info->hne_data != NULL)
-					kmem_free(info->hne_data,
-					    info->hne_datalen);
-				kmem_free(info, sizeof (hook_nic_event_t));
-			}
-			ipif->ipif_ill->ill_nic_event_info = NULL;
-		}
+
+		/* Send any queued event */
+		ill_nic_info_dispatch(ipif->ipif_ill);
 		mutex_exit(&ipif->ipif_ill->ill_lock);
 	}
 
@@ -11510,7 +11512,7 @@
 			ip_stack_t	*ipst = ill->ill_ipst;
 
 			info->hne_nic =
-			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
+			    ipif->ipif_ill->ill_phyint->phyint_hook_ifindex;
 			info->hne_lif = MAP_IPIF_ID(ipif->ipif_id);
 			info->hne_event = NE_ADDRESS_CHANGE;
 			info->hne_family = ipif->ipif_isv6 ?
@@ -14499,7 +14501,7 @@
 	 * messages can land up, since the ipsq_refs is zero.
 	 * i.e. this ipsq is unnamed and no phyint or phyint group
 	 * is associated with this ipsq. (Lookups are based on ill_name
-	 * or phyint_group_name)
+	 * or phyint_groupname)
 	 */
 	ASSERT(ipsq->ipsq_refs == 0);
 	ASSERT(ipsq->ipsq_xopq_mphead == NULL && ipsq->ipsq_mphead == NULL);
@@ -16397,10 +16399,13 @@
  * Return the first phyint matching the groupname. There could
  * be more than one when there are ill groups.
  *
- * Needs work: called only from ip_sioctl_groupname
- */
-static phyint_t *
-phyint_lookup_group(char *groupname, ip_stack_t *ipst)
+ * If 'usable' is set, then we exclude ones that are marked with any of
+ * (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE).
+ * Needs work: called only from ip_sioctl_groupname and from the ipmp/netinfo
+ * emulation of ipmp.
+ */
+phyint_t *
+phyint_lookup_group(char *groupname, boolean_t usable, ip_stack_t *ipst)
 {
 	phyint_t *phyi;
 
@@ -16415,6 +16420,14 @@
 	    phyi, AVL_AFTER)) {
 		if (phyi->phyint_groupname_len == 0)
 			continue;
+		/*
+		 * Skip the ones that should not be used since the callers
+		 * sometime use this for sending packets.
+		 */
+		if (usable && (phyi->phyint_flags &
+		    (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE)))
+			continue;
+
 		ASSERT(phyi->phyint_groupname != NULL);
 		if (mi_strcmp(groupname, phyi->phyint_groupname) == 0)
 			return (phyi);
@@ -16423,6 +16436,49 @@
 }
 
 
+/*
+ * Return the first usable phyint matching the group index. By 'usable'
+ * we exclude ones that are marked ununsable with any of
+ * (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE).
+ *
+ * Used only for the ipmp/netinfo emulation of ipmp.
+ */
+phyint_t *
+phyint_lookup_group_ifindex(uint_t group_ifindex, ip_stack_t *ipst)
+{
+	phyint_t *phyi;
+
+	ASSERT(RW_LOCK_HELD(&ipst->ips_ill_g_lock));
+
+	if (!ipst->ips_ipmp_hook_emulation)
+		return (NULL);
+
+	/*
+	 * Group indicies are stored in the phyint - a common structure
+	 * to both IPv4 and IPv6.
+	 */
+	phyi = avl_first(&ipst->ips_phyint_g_list->phyint_list_avl_by_index);
+	for (; phyi != NULL;
+	    phyi = avl_walk(&ipst->ips_phyint_g_list->phyint_list_avl_by_index,
+	    phyi, AVL_AFTER)) {
+		/* Ignore the ones that do not have a group */
+		if (phyi->phyint_groupname_len == 0)
+			continue;
+
+		ASSERT(phyi->phyint_group_ifindex != 0);
+		/*
+		 * Skip the ones that should not be used since the callers
+		 * sometime use this for sending packets.
+		 */
+		if (phyi->phyint_flags &
+		    (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE))
+			continue;
+		if (phyi->phyint_group_ifindex == group_ifindex)
+			return (phyi);
+	}
+	return (NULL);
+}
+
 
 /*
  * MT notes on creation and deletion of IPMP groups
@@ -16589,6 +16645,10 @@
 		mi_free(phyi->phyint_groupname);
 		phyi->phyint_groupname = NULL;
 		phyi->phyint_groupname_len = 0;
+
+		/* Restore the ifindex used to be the per interface one */
+		phyi->phyint_group_ifindex = 0;
+		phyi->phyint_hook_ifindex = phyi->phyint_ifindex;
 		mutex_exit(&phyi->phyint_lock);
 		RELEASE_ILL_LOCKS(ill_v4, ill_v6);
 		rw_exit(&ipst->ips_ill_g_lock);
@@ -16641,7 +16701,7 @@
 		 * packets across the group because of potential link-level
 		 * header differences.
 		 */
-		phyi_tmp = phyint_lookup_group(groupname, ipst);
+		phyi_tmp = phyint_lookup_group(groupname, B_FALSE, ipst);
 		if (phyi_tmp != NULL) {
 			if ((ill_v4 != NULL &&
 			    phyi_tmp->phyint_illv4 != NULL) &&
@@ -16736,6 +16796,37 @@
 		phyi->phyint_groupname = tmp;
 		bcopy(groupname, phyi->phyint_groupname, namelen + 1);
 		phyi->phyint_groupname_len = namelen + 1;
+
+		if (ipst->ips_ipmp_hook_emulation) {
+			/*
+			 * If the group already exists we use the existing
+			 * group_ifindex, otherwise we pick a new index here.
+			 */
+			if (phyi_tmp != NULL) {
+				phyi->phyint_group_ifindex =
+				    phyi_tmp->phyint_group_ifindex;
+			} else {
+				/* XXX We need a recovery strategy here. */
+				if (!ip_assign_ifindex(
+				    &phyi->phyint_group_ifindex, ipst))
+					cmn_err(CE_PANIC,
+					    "ip_assign_ifindex() failed");
+			}
+		}
+		/*
+		 * Select whether the netinfo and hook use the per-interface
+		 * or per-group ifindex.
+		 */
+		if (ipst->ips_ipmp_hook_emulation)
+			phyi->phyint_hook_ifindex = phyi->phyint_group_ifindex;
+		else
+			phyi->phyint_hook_ifindex = phyi->phyint_ifindex;
+
+		if (ipst->ips_ipmp_hook_emulation &&
+		    phyi_tmp != NULL) {
+			/* First phyint in group - group PLUMB event */
+			ill_nic_info_plumb(ill, B_TRUE);
+		}
 		mutex_exit(&phyi->phyint_lock);
 		RELEASE_ILL_LOCKS(ill_v4, ill_v6);
 		rw_exit(&ipst->ips_ill_g_lock);
@@ -18414,7 +18505,7 @@
 	if (info != NULL) {
 		ip_stack_t	*ipst = ill->ill_ipst;
 
-		info->hne_nic = ill->ill_phyint->phyint_ifindex;
+		info->hne_nic = ill->ill_phyint->phyint_hook_ifindex;
 		info->hne_lif = 0;
 		info->hne_event = NE_DOWN;
 		info->hne_data = NULL;
@@ -22891,6 +22982,9 @@
 		if (!phyint_assign_ifindex(phyi, ipst))
 			cmn_err(CE_PANIC, "phyint_assign_ifindex() failed");
 
+		/* No IPMP group yet, thus the hook uses the ifindex */
+		phyi->phyint_hook_ifindex = phyi->phyint_ifindex;
+
 		avl_insert(&ipst->ips_phyint_g_list->phyint_list_avl_by_name,
 		    (void *)phyi, where);
 
@@ -22940,45 +23034,103 @@
 	 */
 	if (ill->ill_name_length <= 2 ||
 	    ill->ill_name[0] != 'l' || ill->ill_name[1] != 'o') {
-		hook_nic_event_t *info;
-		if ((info = ill->ill_nic_event_info) != NULL) {
-			ip2dbg(("ill_phyint_reinit: unexpected nic event %d "
-			    "attached for %s\n", info->hne_event,
-			    ill->ill_name));
+		/*
+		 * Generate nic plumb event for ill_name even if
+		 * ipmp_hook_emulation is set. That avoids generating events
+		 * for the ill_names should ipmp_hook_emulation be turned on
+		 * later.
+		 */
+		ill_nic_info_plumb(ill, B_FALSE);
+	}
+	RELEASE_ILL_LOCKS(ill, ill_other);
+	mutex_exit(&phyi->phyint_lock);
+}
+
+/*
+ * Allocate a NE_PLUMB nic info event and store in the ill.
+ * If 'group' is set we do it for the group name, otherwise the ill name.
+ * It will be sent when we leave the ipsq.
+ */
+void
+ill_nic_info_plumb(ill_t *ill, boolean_t group)
+{
+	phyint_t	*phyi = ill->ill_phyint;
+	ip_stack_t	*ipst = ill->ill_ipst;
+	hook_nic_event_t *info;
+	char		*name;
+	int		namelen;
+
+	ASSERT(MUTEX_HELD(&ill->ill_lock));
+
+	if ((info = ill->ill_nic_event_info) != NULL) {
+		ip2dbg(("ill_nic_info_plumb: unexpected nic event %d "
+		    "attached for %s\n", info->hne_event,
+		    ill->ill_name));
+		if (info->hne_data != NULL)
+			kmem_free(info->hne_data, info->hne_datalen);
+		kmem_free(info, sizeof (hook_nic_event_t));
+		ill->ill_nic_event_info = NULL;
+	}
+
+	info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP);
+	if (info == NULL) {
+		ip2dbg(("ill_nic_info_plumb: could not attach PLUMB nic "
+		    "event information for %s (ENOMEM)\n",
+		    ill->ill_name));
+		return;
+	}
+
+	if (group) {
+		ASSERT(phyi->phyint_groupname_len != 0);
+		namelen = phyi->phyint_groupname_len;
+		name = phyi->phyint_groupname;
+	} else {
+		namelen = ill->ill_name_length;
+		name = ill->ill_name;
+	}
+
+	info->hne_nic = phyi->phyint_hook_ifindex;
+	info->hne_lif = 0;
+	info->hne_event = NE_PLUMB;
+	info->hne_family = ill->ill_isv6 ?
+	    ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data;
+
+	info->hne_data = kmem_alloc(namelen, KM_NOSLEEP);
+	if (info->hne_data != NULL) {
+		info->hne_datalen = namelen;
+		bcopy(name, info->hne_data, info->hne_datalen);
+	} else {
+		ip2dbg(("ill_nic_info_plumb: could not attach "
+		    "name information for PLUMB nic event "
+		    "of %s (ENOMEM)\n", name));
+		kmem_free(info, sizeof (hook_nic_event_t));
+		info = NULL;
+	}
+	ill->ill_nic_event_info = info;
+}
+
+/*
+ * Unhook the nic event message from the ill and enqueue it
+ * into the nic event taskq.
+ */
+void
+ill_nic_info_dispatch(ill_t *ill)
+{
+	hook_nic_event_t *info;
+
+	ASSERT(MUTEX_HELD(&ill->ill_lock));
+
+	if ((info = ill->ill_nic_event_info) != NULL) {
+		if (ddi_taskq_dispatch(eventq_queue_nic,
+		    ip_ne_queue_func, info, DDI_SLEEP) == DDI_FAILURE) {
+			ip2dbg(("ill_nic_info_dispatch: "
+			    "ddi_taskq_dispatch failed\n"));
 			if (info->hne_data != NULL)
 				kmem_free(info->hne_data, info->hne_datalen);
 			kmem_free(info, sizeof (hook_nic_event_t));
 		}
-
-		info = kmem_alloc(sizeof (hook_nic_event_t), KM_NOSLEEP);
-		if (info != NULL) {
-			info->hne_nic = ill->ill_phyint->phyint_ifindex;
-			info->hne_lif = 0;
-			info->hne_event = NE_PLUMB;
-			info->hne_family = ill->ill_isv6 ?
-			    ipst->ips_ipv6_net_data : ipst->ips_ipv4_net_data;
-			info->hne_data = kmem_alloc(ill->ill_name_length,
-			    KM_NOSLEEP);
-			if (info->hne_data != NULL) {
-				info->hne_datalen = ill->ill_name_length;
-				bcopy(ill->ill_name, info->hne_data,
-				    info->hne_datalen);
-			} else {
-				ip2dbg(("ill_phyint_reinit: could not attach "
-				    "ill_name information for PLUMB nic event "
-				    "of %s (ENOMEM)\n", ill->ill_name));
-				kmem_free(info, sizeof (hook_nic_event_t));
-			}
-		} else
-			ip2dbg(("ill_phyint_reinit: could not attach PLUMB nic "
-			    "event information for %s (ENOMEM)\n",
-			    ill->ill_name));
-
-		ill->ill_nic_event_info = info;
-	}
-
-	RELEASE_ILL_LOCKS(ill, ill_other);
-	mutex_exit(&phyi->phyint_lock);
+		ill->ill_nic_event_info = NULL;
+	}
 }
 
 /*
@@ -24478,6 +24630,11 @@
  * Return a pointer to an ipif_t given a combination of (ill_idx,ipif_id)
  * If a pointer to an ipif_t is returned then the caller will need to do
  * an ill_refrele().
+ *
+ * If there is no real interface which matches the ifindex, then it looks
+ * for a group that has a matching index. In the case of a group match the
+ * lifidx must be zero. We don't need emulate the logical interfaces
+ * since IP Filter's use of netinfo doesn't use that.
  */
 ipif_t *
 ipif_getby_indexes(uint_t ifindex, uint_t lifidx, boolean_t isv6,
@@ -24489,8 +24646,17 @@
 	ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL,
 	    ipst);
 
-	if (ill == NULL)
-		return (NULL);
+	if (ill == NULL) {
+		/* Fallback to group names only if hook_emulation set */
+		if (!ipst->ips_ipmp_hook_emulation)
+			return (NULL);
+
+		if (lifidx != 0)
+			return (NULL);
+		ill = ill_group_lookup_on_ifindex(ifindex, isv6, ipst);
+		if (ill == NULL)
+			return (NULL);
+	}
 
 	mutex_enter(&ill->ill_lock);
 	if (ill->ill_state_flags & ILL_CONDEMNED) {
--- a/usr/src/uts/common/inet/ip/ip_netinfo.c	Mon Apr 16 11:16:17 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip_netinfo.c	Mon Apr 16 11:46:20 2007 -0700
@@ -475,16 +475,26 @@
     char *buffer, const size_t buflen, boolean_t isv6, ip_stack_t *ipst)
 {
 	ill_t *ill;
+	char *name;
 
 	ASSERT(buffer != NULL);
 
 	ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6, NULL, NULL,
 	    NULL, NULL, ipst);
-	if (ill == NULL)
-		return (1);
-
-	if (ill->ill_name != NULL) {
-		(void) strlcpy(buffer, ill->ill_name, buflen);
+	if (ill != NULL) {
+		name = ill->ill_name;
+	} else {
+		/* Fallback to group names only if hook_emulation is set */
+		if (ipst->ips_ipmp_hook_emulation) {
+			ill = ill_group_lookup_on_ifindex((uint_t)phy_ifdata,
+			    isv6, ipst);
+		}
+		if (ill == NULL)
+			return (1);
+		name = ill->ill_phyint->phyint_groupname;
+	}
+	if (name != NULL) {
+		(void) strlcpy(buffer, name, buflen);
 		ill_refrele(ill);
 		return (0);
 	} else {
@@ -516,6 +526,9 @@
 
 /*
  * Shared implementation to determine the MTU of a network interface
+ *
+ * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set.
+ * But IP Filter only uses a zero ifdata.
  */
 /* ARGSUSED */
 static int
@@ -541,7 +554,16 @@
 
 		if ((ill = ill_lookup_on_ifindex((uint_t)phy_ifdata, isv6,
 		    NULL, NULL, NULL, NULL, ipst)) == NULL) {
-			return (0);
+			/*
+			 * Fallback to group names only if hook_emulation
+			 * is set
+			 */
+			if (ipst->ips_ipmp_hook_emulation) {
+				ill = ill_group_lookup_on_ifindex(
+				    (uint_t)phy_ifdata, isv6, ipst);
+			}
+			if (ill == NULL)
+				return (0);
 		}
 		mtu = ill->ill_max_frag;
 		ill_refrele(ill);
@@ -562,6 +584,9 @@
 
 /*
  * Get next interface from the current list of IPv4 physical network interfaces
+ *
+ * Note: this does not handle the case when ipmp_hook_emulation is set.
+ * But IP Filter does not use this function.
  */
 static phy_if_t
 ip_phygetnext(phy_if_t phy_ifdata, netstack_t *ns)
@@ -614,10 +639,14 @@
 	ill = ill_lookup_on_name((char *)name, B_FALSE, isv6, NULL, NULL,
 	    NULL, NULL, NULL, ipst);
 
+	/* Fallback to group names only if hook_emulation is set */
+	if (ill == NULL && ipst->ips_ipmp_hook_emulation) {
+		ill = ill_group_lookup_on_name((char *)name, isv6, ipst);
+	}
 	if (ill == NULL)
 		return (0);
 
-	phy = ill->ill_phyint->phyint_ifindex;
+	phy = ill->ill_phyint->phyint_hook_ifindex;
 
 	ill_refrele(ill);
 
@@ -649,6 +678,9 @@
 /*
  * Shared implementation to get next interface from the current list of
  * logical network interfaces
+ *
+ * Note: this does not handle the case when ipmp_hook_emulation is set.
+ * But IP Filter does not use this function.
  */
 static lif_if_t
 ip_lifgetnext_impl(phy_if_t phy_ifdata, lif_if_t ifdata, boolean_t isv6,
@@ -960,11 +992,13 @@
 		return (0);
 
 	ill = ire_to_ill(ire);
-	if (ill == NULL)
+	if (ill == NULL) {
+		ire_refrele(ire);
 		return (0);
+	}
 
 	ASSERT(ill != NULL);
-	phy_if = (phy_if_t)ill->ill_phyint->phyint_ifindex;
+	phy_if = (phy_if_t)ill->ill_phyint->phyint_hook_ifindex;
 	ire_refrele(ire);
 
 	return (phy_if);
@@ -1089,6 +1123,9 @@
 
 /*
  * Shared implementation to determine the network addresses for an interface
+ *
+ * Note: this does not handle a non-zero ifdata when ipmp_hook_emulation is set.
+ * But IP Filter only uses a zero ifdata.
  */
 /* ARGSUSED */
 static int
@@ -1236,8 +1273,15 @@
 	packet = &inject->inj_data;
 	ASSERT(packet->ni_packet != NULL);
 
-	if ((ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical,
-	    B_FALSE, NULL, NULL, NULL, NULL, ipst)) == NULL) {
+	ill = ill_lookup_on_ifindex((uint_t)packet->ni_physical,
+	    B_FALSE, NULL, NULL, NULL, NULL, ipst);
+
+	/* Fallback to group names only if hook_emulation is set */
+	if (ill == NULL && ipst->ips_ipmp_hook_emulation) {
+		ill = ill_group_lookup_on_ifindex((uint_t)packet->ni_physical,
+		    B_FALSE, ipst);
+	}
+	if (ill == NULL) {
 		kmem_free(inject, sizeof (*inject));
 		return;
 	}
@@ -1302,3 +1346,63 @@
 		kmem_free(info->hne_data, info->hne_datalen);
 	kmem_free(arg, sizeof (hook_nic_event_t));
 }
+
+/*
+ * Temporary function to support IPMP emulation for IP Filter.
+ * Lookup an ill based on the ifindex assigned to the group.
+ * Skips unusable ones i.e. where any of these flags are set:
+ * (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE)
+ */
+ill_t *
+ill_group_lookup_on_ifindex(uint_t index, boolean_t isv6, ip_stack_t *ipst)
+{
+	ill_t	*ill;
+	phyint_t *phyi;
+
+	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+	phyi = phyint_lookup_group_ifindex(index, ipst);
+	if (phyi != NULL) {
+		ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4;
+		if (ill != NULL) {
+			mutex_enter(&ill->ill_lock);
+			if (ILL_CAN_LOOKUP(ill)) {
+				ill_refhold_locked(ill);
+				mutex_exit(&ill->ill_lock);
+				rw_exit(&ipst->ips_ill_g_lock);
+				return (ill);
+			}
+		}
+	}
+	rw_exit(&ipst->ips_ill_g_lock);
+	return (NULL);
+}
+
+/*
+ * Temporary function to support IPMP emulation for IP Filter.
+ * Lookup an ill based on the group name.
+ * Skips unusable ones i.e. where any of these flags are set:
+ * (PHYI_FAILED|PHYI_STANDBY|PHYI_OFFLINE|PHYI_INACTIVE)
+ */
+ill_t *
+ill_group_lookup_on_name(char *name, boolean_t isv6, ip_stack_t *ipst)
+{
+	ill_t	*ill;
+	phyint_t *phyi;
+
+	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+	phyi = phyint_lookup_group(name, B_TRUE, ipst);
+	if (phyi != NULL) {
+		ill = isv6 ? phyi->phyint_illv6: phyi->phyint_illv4;
+		if (ill != NULL) {
+			mutex_enter(&ill->ill_lock);
+			if (ILL_CAN_LOOKUP(ill)) {
+				ill_refhold_locked(ill);
+				mutex_exit(&ill->ill_lock);
+				rw_exit(&ipst->ips_ill_g_lock);
+				return (ill);
+			}
+		}
+	}
+	rw_exit(&ipst->ips_ill_g_lock);
+	return (NULL);
+}
--- a/usr/src/uts/common/inet/ip_if.h	Mon Apr 16 11:16:17 2007 -0700
+++ b/usr/src/uts/common/inet/ip_if.h	Mon Apr 16 11:46:20 2007 -0700
@@ -160,6 +160,8 @@
 extern	void	ill_dlpi_done(ill_t *, t_uscalar_t);
 extern	void	ill_dlpi_send(ill_t *, mblk_t *);
 extern	mblk_t	*ill_dlur_gen(uchar_t *, uint_t, t_uscalar_t, t_scalar_t);
+extern  ill_t	*ill_group_lookup_on_ifindex(uint_t, boolean_t, ip_stack_t *);
+extern	ill_t	*ill_group_lookup_on_name(char *, boolean_t, ip_stack_t *);
 /* NOTE: Keep unmodified ill_lookup_on_ifindex for ipp for now */
 extern  ill_t	*ill_lookup_on_ifindex_global_instance(uint_t, boolean_t,
     queue_t *, mblk_t *, ipsq_func_t, int *);
--- a/usr/src/uts/common/inet/ip_stack.h	Mon Apr 16 11:16:17 2007 -0700
+++ b/usr/src/uts/common/inet/ip_stack.h	Mon Apr 16 11:46:20 2007 -0700
@@ -297,6 +297,8 @@
 	int		ips_ip_g_forward;
 	int		ips_ipv6_forward;
 
+	int		ips_ipmp_hook_emulation; /* ndd variable */
+
 	time_t		ips_ip_g_frag_timeout;
 	clock_t		ips_ip_g_frag_timo_ms;