changeset 10649:ab3ce9d83b84

6847238 cannot halt/reboot an exclusive zone with IPMP interfaces 6870300 IPMP standby activation can be simplified 6880826 in.mpathd FDT change messages should not be errors
author meem <Peter.Memishian@Sun.COM>
date Fri, 25 Sep 2009 15:00:11 -0400
parents fd73c9d70fde
children 5195e7d7a5f4
files usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h usr/src/cmd/svc/shell/net_include.sh usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip_if.c usr/src/uts/common/inet/ip_if.h usr/src/uts/intel/ip/ip.global-objs.debug64 usr/src/uts/intel/ip/ip.global-objs.obj64 usr/src/uts/sparc/ip/ip.global-objs.debug64 usr/src/uts/sparc/ip/ip.global-objs.obj64
diffstat 12 files changed, 330 insertions(+), 252 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c	Fri Sep 25 15:00:11 2009 -0400
@@ -1415,10 +1415,13 @@
 
 	/*
 	 * If IFF_STANDBY has changed, indicate that the interface has changed
-	 * types.
+	 * types and refresh IFF_INACTIVE if need be.
 	 */
-	if ((old_flags ^ pii->pii_flags) & IFF_STANDBY)
+	if ((old_flags ^ pii->pii_flags) & IFF_STANDBY) {
 		phyint_changed(pi);
+		if (pii->pii_flags & IFF_STANDBY)
+			phyint_standby_refresh_inactive(pi);
+	}
 
 	/* Has just the IFF_RUNNING flag changed state ? */
 	if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) {
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c	Fri Sep 25 15:00:11 2009 -0400
@@ -910,10 +910,10 @@
 			pg->pg_fdt = pg->pg_probeint * (NUM_PROBE_FAILS + 2);
 			last_fdt_bumpup_time = gethrtime();
 			if (pg != phyint_anongroup) {
-				logerr("Cannot meet requested failure detection"
-				    " time of %d ms on (%s %s) new failure"
-				    " detection time for group \"%s\" is %d"
-				    " ms\n", user_failure_detection_time,
+				logtrace("Cannot meet requested failure"
+				    " detection time of %d ms on (%s %s) new"
+				    " failure detection time for group \"%s\""
+				    " is %d ms\n", user_failure_detection_time,
 				    AF_STR(pii->pii_af), pii->pii_name,
 				    pg->pg_name, pg->pg_fdt);
 			}
@@ -931,10 +931,10 @@
 			    user_failure_detection_time);
 			pg->pg_probeint = pg->pg_fdt / (NUM_PROBE_FAILS + 2);
 			if (pg != phyint_anongroup) {
-				logerr("Improved failure detection time %d ms "
-				    "on (%s %s) for group \"%s\"\n", pg->pg_fdt,
-				    AF_STR(pii->pii_af), pii->pii_name,
-				    pg->pg_name);
+				logtrace("Improved failure detection time %d ms"
+				    " on (%s %s) for group \"%s\"\n",
+				    pg->pg_fdt, AF_STR(pii->pii_af),
+				    pii->pii_name, pg->pg_name);
 			}
 			if (user_failure_detection_time == pg->pg_fdt) {
 				/* Avoid any truncation or rounding errors */
@@ -1356,7 +1356,7 @@
 		return;
 
 	for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
-		if (pi == pi2 || pi2->pi_state != PI_RUNNING ||
+		if (pi == pi2 || !phyint_is_functioning(pi2) ||
 		    !(pi2->pi_flags & IFF_INACTIVE))
 			continue;
 
@@ -1412,12 +1412,11 @@
 			if (!(pi2->pi_flags & IFF_STANDBY))
 				nnonstandby++;
 
-			if (pi2->pi_state == PI_RUNNING) {
-				if (!(pi2->pi_flags & IFF_INACTIVE)) {
-					nactive++;
-					if (pi2->pi_flags & IFF_STANDBY)
-						actstandbypi = pi2;
-				}
+			if (phyint_is_functioning(pi2) &&
+			    !(pi2->pi_flags & IFF_INACTIVE)) {
+				nactive++;
+				if (pi2->pi_flags & IFF_STANDBY)
+					actstandbypi = pi2;
 			}
 		}
 	}
@@ -1447,6 +1446,47 @@
 }
 
 /*
+ * Adjust IFF_INACTIVE on the provided `pi' to trend the group configuration
+ * to have at least one active interface and as many active interfaces as
+ * non-standby interfaces.
+ */
+void
+phyint_standby_refresh_inactive(struct phyint *pi)
+{
+	struct phyint *pi2;
+	uint_t nactive = 0, nnonstandby = 0;
+
+	/*
+	 * All phyints in the anonymous group are effectively in their own
+	 * group and thus active regardless of whether they're marked standby.
+	 */
+	if (pi->pi_group == phyint_anongroup) {
+		(void) change_pif_flags(pi, 0, IFF_INACTIVE);
+		return;
+	}
+
+	/*
+	 * If the phyint isn't functioning we can't consider it.
+	 */
+	if (!phyint_is_functioning(pi))
+		return;
+
+	for (pi2 = pi->pi_group->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+		if (!(pi2->pi_flags & IFF_STANDBY))
+			nnonstandby++;
+
+		if (phyint_is_functioning(pi2) &&
+		    !(pi2->pi_flags & IFF_INACTIVE))
+			nactive++;
+	}
+
+	if (nactive == 0 || nactive < nnonstandby)
+		(void) change_pif_flags(pi, 0, IFF_INACTIVE);
+	else if (nactive > nnonstandby)
+		(void) change_pif_flags(pi, IFF_INACTIVE, 0);
+}
+
+/*
  * See if a previously failed interface has started working again.
  */
 void
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c	Fri Sep 25 15:00:11 2009 -0400
@@ -409,19 +409,6 @@
 	 */
 	phyint_insert(pi, pg);
 
-	/*
-	 * If the interface is offline, we set the state to PI_OFFLINE.
-	 * Otherwise, optimistically consider this interface running.  Later
-	 * (in process_link_state_changes()), we will adjust this to match the
-	 * current state of the link.  Further, if test addresses are
-	 * subsequently assigned, we will transition to PI_NOTARGETS and then
-	 * to either PI_RUNNING or PI_FAILED depending on the probe results.
-	 */
-	if (flags & IFF_OFFLINE)
-		phyint_chstate(pi, PI_OFFLINE);
-	else
-		phyint_transition_to_running(pi); /* calls phyint_chstate() */
-
 	return (pi);
 }
 
@@ -774,12 +761,38 @@
 		return (NULL);
 	}
 
+	/*
+	 * NOTE: the change_pif_flags() implementation requires a phyint
+	 * instance before it can function, so a number of tasks that would
+	 * otherwise be done in phyint_create() are deferred to here.
+	 */
 	if (pi_created) {
 		/*
+		 * If the interface is offline, set the state to PI_OFFLINE.
+		 * Otherwise, optimistically consider this interface running.
+		 * Later (in process_link_state_changes()), we will adjust
+		 * this to match the current state of the link.  Further, if
+		 * test addresses are subsequently assigned, we will
+		 * transition to PI_NOTARGETS and then to either PI_RUNNING or
+		 * PI_FAILED depending on the probe results.
+		 */
+		if (pi->pi_flags & IFF_OFFLINE) {
+			phyint_chstate(pi, PI_OFFLINE);
+		} else {
+			/* calls phyint_chstate() */
+			phyint_transition_to_running(pi);
+		}
+
+		/*
+		 * If this a standby phyint, determine whether it should be
+		 * IFF_INACTIVE.
+		 */
+		if (pi->pi_flags & IFF_STANDBY)
+			phyint_standby_refresh_inactive(pi);
+
+		/*
 		 * If this phyint does not have a unique hardware address in its
-		 * group, offline it.  (The change_pif_flags() implementation
-		 * requires that we defer this until after the phyint_instance
-		 * is created.)
+		 * group, offline it.
 		 */
 		if (phyint_lookup_hwaddr(pi, _B_TRUE) != NULL) {
 			pi->pi_hwaddrdup = _B_TRUE;
@@ -1313,6 +1326,7 @@
 static void
 phyint_delete(struct phyint *pi)
 {
+	boolean_t active;
 	struct phyint *pi2;
 	struct phyint_group *pg = pi->pi_group;
 
@@ -1369,6 +1383,27 @@
 		assert(pi2->pi_hwaddrdup);
 		(void) phyint_undo_offline(pi2);
 	}
+
+	/*
+	 * If the interface was in a named group and was either an active
+	 * standby or the last active interface, try to activate another
+	 * interface to compensate.
+	 */
+	if (pg != phyint_anongroup) {
+		active = _B_FALSE;
+		for (pi2 = pg->pg_phyint; pi2 != NULL; pi2 = pi2->pi_pgnext) {
+			if (phyint_is_functioning(pi2) &&
+			    !(pi2->pi_flags & IFF_INACTIVE)) {
+				active = _B_TRUE;
+				break;
+			}
+		}
+
+		if (!active ||
+		    (pi->pi_flags & (IFF_STANDBY|IFF_INACTIVE)) == IFF_STANDBY)
+			phyint_activate_another(pi);
+	}
+
 	phyint_link_close(pi);
 	free(pi);
 }
@@ -2525,7 +2560,6 @@
 			pii->pii_probes[i].pr_target = NULL;
 		}
 	}
-
 }
 
 /*
@@ -2642,7 +2676,7 @@
 /*
  * Check whether a phyint is functioning.
  */
-static boolean_t
+boolean_t
 phyint_is_functioning(struct phyint *pi)
 {
 	if (pi->pi_state == PI_RUNNING)
@@ -2653,7 +2687,7 @@
 /*
  * Check whether a phyint is usable.
  */
-static boolean_t
+boolean_t
 phyint_is_usable(struct phyint *pi)
 {
 	if (logint_upcount(pi) == 0)
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h	Fri Sep 25 15:00:11 2009 -0400
@@ -426,11 +426,13 @@
 extern void phyint_group_insert(struct phyint_group *pg);
 extern void phyint_group_delete(struct phyint_group *pg);
 extern void phyint_group_refresh_state(struct phyint_group *pg);
+extern void phyint_standby_refresh_inactive(struct phyint *pi);
 extern void phyint_check_for_repair(struct phyint *pi);
 extern void phyint_transition_to_running(struct phyint *pi);
 extern void phyint_activate_another(struct phyint *pi);
 extern int phyint_offline(struct phyint *pi, unsigned int);
 extern int phyint_undo_offline(struct phyint *pi);
+extern boolean_t phyint_is_functioning(struct phyint *pi);
 
 extern void logint_init_from_k(struct phyint_instance *pii, char *li_name);
 extern void logint_delete(struct logint *li);
--- a/usr/src/cmd/svc/shell/net_include.sh	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/cmd/svc/shell/net_include.sh	Fri Sep 25 15:00:11 2009 -0400
@@ -141,41 +141,6 @@
 }
 
 #
-# get_inactive_ifname groupname
-#
-# Return the name of an inactive interface in `groupname', if one exists.
-#
-get_inactive_ifname()
-{
-	ORIGIFS="$IFS"
-	/sbin/ipmpstat -gP -o groupname,interfaces |
-	while IFS=: read groupname ifnames; do
-		#
-		# Skip other IPMP groups.
-	        #
-		[ "$groupname" != "$1" ] && continue
-
-		#
-		# Standby interfaces are always enclosed in ()'s, so look
-		# for the first interface name starting with a "(", and
-		# strip those off.
-		#
-		IFS=" "
-		for ifname in $ifnames; do
-			case "$ifname" in
-			'('*)	IFS="()"
-				echo $ifname
-				IFS="$ORIGIFS"
-				return
-				;;
-			*)	;;
-			esac
-		done
-	done
-	IFS="$ORIGIFS"
-}
-
-#
 # get_groupifname groupname
 #
 # Return the IPMP meta-interface name for the group, if it exists.
@@ -304,37 +269,6 @@
 }
 
 #
-# get_standby_for_type interface type list
-#
-# Look through the set of hostname files associated with the same physical
-# interface as "interface", and print the standby value ("standby",
-# "-standby", or nothing).  Only hostname files associated with the
-# physical interface or logical interface zero can set this flag.
-#
-get_standby_for_type()
-{
-	physical=`get_physical $1`
-	type=$2
-
-	#
-	# The last setting of "standby" or "-standby" is the one that
-	# counts, which is the reason for the second while loop.
-	#
-	shift 2
-	for ifname in "$@"; do
-		if if_comp "$physical" $ifname; then 
-			get_hostname_ipmpinfo $ifname $type standby -standby
-		fi
-	done | while :; do
-		read keyword || {
-		    	echo "$iftype"
-			break
-		}
-		iftype="$keyword"
-	done
-}
-
-#
 # get_group interface
 #
 # If there is both an inet and inet6 version of an interface, the group
@@ -349,21 +283,6 @@
 }
 
 #
-# is_standby interface
-#
-# If there is both an inet and inet6 version of an interface, the
-# "standby" or "-standby" flag could be set in either set of hostname
-# files.  Since inet6 is configured after inet, if there's a setting in
-# both files, inet6 wins.
-#
-is_standby()
-{
-	standby=`get_standby_for_type $1 inet6 $inet6_list`
-	[ -z "$standby" ] && standby=`get_standby_for_type $1 inet $inet_list`
-	[ "$standby" = "standby" ]
-}
-
-#
 # doDHCPhostname interface
 # Pass to this function the name of an interface.  It will return
 # true if one should enable the use of DHCP client-side host name
@@ -544,23 +463,21 @@
 
 		#
 		# The hostname files are processed twice.  In the first
-		# pass, we are looking for all commands that apply
-		# to the non-additional interface address.  These may be
-		# scattered over several files.  We won't know
-		# whether the address represents a failover address
-		# or not until we've read all the files associated with the
-		# interface.
+		# pass, we are looking for all commands that apply to the
+		# non-additional interface address.  These may be
+		# scattered over several files.  We won't know whether the
+		# address represents a failover address or not until we've
+		# read all the files associated with the interface.
 		#
 		# In the first pass through the hostname files, all
-		# additional logical interface commands are removed.
-		# The remaining commands are concatenated together and
-		# passed to ifparse to determine whether the 
-		# non-additional logical interface address is a failover
-		# address.  If it as a failover address, the
-		# address may not be the first item on the line,
-		# so we can't just substitute "addif" for "set".
-		# We prepend an "addif $zaddr" command, and let
-		# the embedded "set" command set the address later.	
+		# additional logical interface commands are removed.  The
+		# remaining commands are concatenated together and passed
+		# to ifparse to determine whether the non-additional
+		# logical interface address is a failover address.  If it
+		# as a failover address, the address may not be the first
+		# item on the line, so we can't just substitute "addif"
+		# for "set".  We prepend an "addif $zaddr" command, and
+		# let the embedded "set" command set the address later.
 		#
 		/sbin/ifparse -f $type `
 			for item in $list; do
@@ -591,23 +508,10 @@
 			done
 		fi
 
-		#
-		# Check if this was an active interface in the group.  If so,
-		# activate another IP interface (if possible)
-		#
-		is_standby $ifname || inactive=`get_inactive_ifname $group`
-		[ -n "$inactive" ] && /sbin/ifconfig $inactive $type -standby
-
 		in_list physical_comp $ifname $processed || { 
 			processed="$processed $ifname"
-			echo " $ifname (moved to $grifname\c"	   > /dev/msglog
-			if [ -n "$inactive" ]; then
-				echo " and cleared 'standby' on\c" > /dev/msglog
-				echo " $inactive to compensate\c"  > /dev/msglog
-			fi
-			echo ")\c"				   > /dev/msglog
+			echo " $ifname (moved to $grifname)\c" > /dev/msglog
 		}
-		inactive=""
 	done
 	echo "." >/dev/msglog
 }
--- a/usr/src/uts/common/inet/ip/ip.c	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/uts/common/inet/ip/ip.c	Fri Sep 25 15:00:11 2009 -0400
@@ -5707,8 +5707,10 @@
 	printf("ip_stack_shutdown(%p, stack %d)\n", (void *)ipst, stackid);
 #endif
 
-	/* Get rid of loopback interfaces and their IREs */
-	ip_loopback_cleanup(ipst);
+	/*
+	 * Perform cleanup for special interfaces (loopback and IPMP).
+	 */
+	ip_interface_cleanup(ipst);
 
 	/*
 	 * The *_hook_shutdown()s start the process of notifying any
--- a/usr/src/uts/common/inet/ip/ip_if.c	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/uts/common/inet/ip/ip_if.c	Fri Sep 25 15:00:11 2009 -0400
@@ -20042,108 +20042,205 @@
 	}
 }
 
-major_t IP_MAJ;
-#define	IP	"ip"
-
-#define	UDP6DEV		"/devices/pseudo/udp6@0:udp6"
-#define	UDPDEV		"/devices/pseudo/udp@0:udp"
-
-/*
- * Issue REMOVEIF ioctls to have the loopback interfaces
- * go away.  Other interfaces are either I_LINKed or I_PLINKed;
- * the former going away when the user-level processes in the zone
- * are killed  * and the latter are cleaned up by the stream head
- * str_stack_shutdown callback that undoes all I_PLINKs.
- */
-void
-ip_loopback_cleanup(ip_stack_t *ipst)
-{
-	int error;
-	ldi_handle_t	lh = NULL;
-	ldi_ident_t	li = NULL;
-	int		rval;
-	cred_t		*cr;
+/*
+ * Issue ioctl `cmd' on `lh'; caller provides the initial payload in `buf'
+ * which is `bufsize' bytes.  On success, zero is returned and `buf' updated
+ * as per the ioctl.  On failure, an errno is returned.
+ */
+static int
+ip_ioctl(ldi_handle_t lh, int cmd, void *buf, uint_t bufsize, cred_t *cr)
+{
+	int rval;
 	struct strioctl iocb;
-	struct lifreq	lifreq;
-
-	IP_MAJ = ddi_name_to_major(IP);
-
-#ifdef NS_DEBUG
-	(void) printf("ip_loopback_cleanup() stackid %d\n",
-	    ipst->ips_netstack->netstack_stackid);
-#endif
-
-	bzero(&lifreq, sizeof (lifreq));
-	(void) strcpy(lifreq.lifr_name, ipif_loopback_name);
-
-	error = ldi_ident_from_major(IP_MAJ, &li);
-	if (error) {
-#ifdef DEBUG
-		printf("ip_loopback_cleanup: lyr ident get failed error %d\n",
-		    error);
-#endif
-		return;
-	}
-
-	cr = zone_get_kcred(netstackid_to_zoneid(
-	    ipst->ips_netstack->netstack_stackid));
+
+	iocb.ic_cmd = cmd;
+	iocb.ic_timout = 15;
+	iocb.ic_len = bufsize;
+	iocb.ic_dp = buf;
+
+	return (ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, cr, &rval));
+}
+
+/*
+ * Issue an SIOCGLIFCONF for address family `af' and store the result into a
+ * dynamically-allocated `lifcp' that will be `bufsizep' bytes on success.
+ */
+static int
+ip_lifconf_ioctl(ldi_handle_t lh, int af, struct lifconf *lifcp,
+    uint_t *bufsizep, cred_t *cr)
+{
+	int err;
+	struct lifnum lifn;
+
+	bzero(&lifn, sizeof (lifn));
+	lifn.lifn_family = af;
+	lifn.lifn_flags = LIFC_UNDER_IPMP;
+
+	if ((err = ip_ioctl(lh, SIOCGLIFNUM, &lifn, sizeof (lifn), cr)) != 0)
+		return (err);
+
+	/*
+	 * Pad the interface count to account for additional interfaces that
+	 * may have been configured between the SIOCGLIFNUM and SIOCGLIFCONF.
+	 */
+	lifn.lifn_count += 4;
+	bzero(lifcp, sizeof (*lifcp));
+	lifcp->lifc_flags = LIFC_UNDER_IPMP;
+	lifcp->lifc_family = af;
+	lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq);
+	lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP);
+
+	err = ip_ioctl(lh, SIOCGLIFCONF, lifcp, sizeof (*lifcp), cr);
+	if (err != 0) {
+		kmem_free(lifcp->lifc_buf, *bufsizep);
+		return (err);
+	}
+
+	return (0);
+}
+
+/*
+ * Helper for ip_interface_cleanup() that removes the loopback interface.
+ */
+static void
+ip_loopback_removeif(ldi_handle_t lh, boolean_t isv6, cred_t *cr)
+{
+	int err;
+	struct lifreq lifr;
+
+	bzero(&lifr, sizeof (lifr));
+	(void) strcpy(lifr.lifr_name, ipif_loopback_name);
+
+	err = ip_ioctl(lh, SIOCLIFREMOVEIF, &lifr, sizeof (lifr), cr);
+	if (err != 0) {
+		ip0dbg(("ip_loopback_removeif: IP%s SIOCLIFREMOVEIF failed: "
+		    "error %d\n", isv6 ? "v6" : "v4", err));
+	}
+}
+
+/*
+ * Helper for ip_interface_cleanup() that ensures no IP interfaces are in IPMP
+ * groups and that IPMP data addresses are down.  These conditions must be met
+ * so that IPMP interfaces can be I_PUNLINK'd, as per ip_sioctl_plink_ipmp().
+ */
+static void
+ip_ipmp_cleanup(ldi_handle_t lh, boolean_t isv6, cred_t *cr)
+{
+	int af = isv6 ? AF_INET6 : AF_INET;
+	int i, nifs;
+	int err;
+	uint_t bufsize;
+	uint_t lifrsize = sizeof (struct lifreq);
+	struct lifconf lifc;
+	struct lifreq *lifrp;
+
+	if ((err = ip_lifconf_ioctl(lh, af, &lifc, &bufsize, cr)) != 0) {
+		cmn_err(CE_WARN, "ip_ipmp_cleanup: cannot get interface list "
+		    "(error %d); any IPMP interfaces cannot be shutdown", err);
+		return;
+	}
+
+	nifs = lifc.lifc_len / lifrsize;
+	for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
+		err = ip_ioctl(lh, SIOCGLIFFLAGS, lifrp, lifrsize, cr);
+		if (err != 0) {
+			cmn_err(CE_WARN, "ip_ipmp_cleanup: %s: cannot get "
+			    "flags: error %d", lifrp->lifr_name, err);
+			continue;
+		}
+
+		if (lifrp->lifr_flags & IFF_IPMP) {
+			if ((lifrp->lifr_flags & (IFF_UP|IFF_DUPLICATE)) == 0)
+				continue;
+
+			lifrp->lifr_flags &= ~IFF_UP;
+			err = ip_ioctl(lh, SIOCSLIFFLAGS, lifrp, lifrsize, cr);
+			if (err != 0) {
+				cmn_err(CE_WARN, "ip_ipmp_cleanup: %s: cannot "
+				    "bring down (error %d); IPMP interface may "
+				    "not be shutdown", lifrp->lifr_name, err);
+			}
+
+			/*
+			 * Check if IFF_DUPLICATE is still set -- and if so,
+			 * reset the address to clear it.
+			 */
+			err = ip_ioctl(lh, SIOCGLIFFLAGS, lifrp, lifrsize, cr);
+			if (err != 0 || !(lifrp->lifr_flags & IFF_DUPLICATE))
+				continue;
+
+			err = ip_ioctl(lh, SIOCGLIFADDR, lifrp, lifrsize, cr);
+			if (err != 0 || (err = ip_ioctl(lh, SIOCGLIFADDR,
+			    lifrp, lifrsize, cr)) != 0) {
+				cmn_err(CE_WARN, "ip_ipmp_cleanup: %s: cannot "
+				    "reset DAD (error %d); IPMP interface may "
+				    "not be shutdown", lifrp->lifr_name, err);
+			}
+			continue;
+		}
+
+		lifrp->lifr_groupname[0] = '\0';
+		err = ip_ioctl(lh, SIOCSLIFGROUPNAME, lifrp, lifrsize, cr);
+		if (err != 0) {
+			cmn_err(CE_WARN, "ip_ipmp_cleanup: %s: cannot leave "
+			    "IPMP group (error %d); associated IPMP interface "
+			    "may not be shutdown", lifrp->lifr_name, err);
+			continue;
+		}
+	}
+
+	kmem_free(lifc.lifc_buf, bufsize);
+}
+
+#define	UDPDEV		"/devices/pseudo/udp@0:udp"
+#define	UDP6DEV		"/devices/pseudo/udp6@0:udp6"
+
+/*
+ * Remove the loopback interfaces and prep the IPMP interfaces to be torn down.
+ * Non-loopback interfaces are either I_LINK'd or I_PLINK'd; the former go away
+ * when the user-level processes in the zone are killed and the latter are
+ * cleaned up by str_stack_shutdown().
+ */
+void
+ip_interface_cleanup(ip_stack_t *ipst)
+{
+	ldi_handle_t	lh;
+	ldi_ident_t	li;
+	cred_t		*cr;
+	int		err;
+	int		i;
+	char		*devs[] = { UDP6DEV, UDPDEV };
+	netstackid_t	stackid = ipst->ips_netstack->netstack_stackid;
+
+	if ((err = ldi_ident_from_major(ddi_name_to_major("ip"), &li)) != 0) {
+		cmn_err(CE_WARN, "ip_interface_cleanup: cannot get ldi ident:"
+		    " error %d", err);
+		return;
+	}
+
+	cr = zone_get_kcred(netstackid_to_zoneid(stackid));
 	ASSERT(cr != NULL);
-	error = ldi_open_by_name(UDP6DEV, FREAD|FWRITE, cr, &lh, li);
-	if (error) {
-#ifdef DEBUG
-		printf("ip_loopback_cleanup: open of UDP6DEV failed error %d\n",
-		    error);
-#endif
-		goto out;
-	}
-	iocb.ic_cmd = SIOCLIFREMOVEIF;
-	iocb.ic_timout = 15;
-	iocb.ic_len = sizeof (lifreq);
-	iocb.ic_dp = (char *)&lifreq;
-
-	error = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, cr, &rval);
-	/* LINTED - statement has no consequent */
-	if (error) {
-#ifdef NS_DEBUG
-		printf("ip_loopback_cleanup: ioctl SIOCLIFREMOVEIF failed on "
-		    "UDP6 error %d\n", error);
-#endif
-	}
-	(void) ldi_close(lh, FREAD|FWRITE, cr);
-	lh = NULL;
-
-	error = ldi_open_by_name(UDPDEV, FREAD|FWRITE, cr, &lh, li);
-	if (error) {
-#ifdef NS_DEBUG
-		printf("ip_loopback_cleanup: open of UDPDEV failed error %d\n",
-		    error);
-#endif
-		goto out;
-	}
-
-	iocb.ic_cmd = SIOCLIFREMOVEIF;
-	iocb.ic_timout = 15;
-	iocb.ic_len = sizeof (lifreq);
-	iocb.ic_dp = (char *)&lifreq;
-
-	error = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, cr, &rval);
-	/* LINTED - statement has no consequent */
-	if (error) {
-#ifdef NS_DEBUG
-		printf("ip_loopback_cleanup: ioctl SIOCLIFREMOVEIF failed on "
-		    "UDP error %d\n", error);
-#endif
-	}
-	(void) ldi_close(lh, FREAD|FWRITE, cr);
-	lh = NULL;
-
-out:
-	/* Close layered handles */
-	if (lh)
+
+	/*
+	 * NOTE: loop executes exactly twice and is hardcoded to know that the
+	 * first iteration is IPv6.  (Unrolling yields repetitious code, hence
+	 * the loop.)
+	 */
+	for (i = 0; i < 2; i++) {
+		err = ldi_open_by_name(devs[i], FREAD|FWRITE, cr, &lh, li);
+		if (err != 0) {
+			cmn_err(CE_WARN, "ip_interface_cleanup: cannot open %s:"
+			    " error %d", devs[i], err);
+			continue;
+		}
+
+		ip_loopback_removeif(lh, i == 0, cr);
+		ip_ipmp_cleanup(lh, i == 0, cr);
+
 		(void) ldi_close(lh, FREAD|FWRITE, cr);
-	if (li)
-		ldi_ident_release(li);
-
+	}
+
+	ldi_ident_release(li);
 	crfree(cr);
 }
 
--- a/usr/src/uts/common/inet/ip_if.h	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/uts/common/inet/ip_if.h	Fri Sep 25 15:00:11 2009 -0400
@@ -214,7 +214,7 @@
 extern uint_t	ill_appaddr_cnt(const ill_t *);
 extern uint_t	ill_ptpaddr_cnt(const ill_t *);
 
-extern	void	ip_loopback_cleanup(ip_stack_t *);
+extern	void	ip_interface_cleanup(ip_stack_t *);
 extern	void	ipif_get_name(const ipif_t *, char *, int);
 extern	ipif_t	*ipif_getby_indexes(uint_t, uint_t, boolean_t, ip_stack_t *);
 extern	void	ipif_init(ip_stack_t *);
--- a/usr/src/uts/intel/ip/ip.global-objs.debug64	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/uts/intel/ip/ip.global-objs.debug64	Fri Sep 25 15:00:11 2009 -0400
@@ -23,7 +23,6 @@
 # Use is subject to license terms.
 #
 
-IP_MAJ
 cb_inet_devops
 cl_inet_bind
 cl_inet_connect2
--- a/usr/src/uts/intel/ip/ip.global-objs.obj64	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/uts/intel/ip/ip.global-objs.obj64	Fri Sep 25 15:00:11 2009 -0400
@@ -23,7 +23,6 @@
 # Use is subject to license terms.
 #
 
-IP_MAJ
 cb_inet_devops
 cl_inet_bind
 cl_inet_connect2
--- a/usr/src/uts/sparc/ip/ip.global-objs.debug64	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/uts/sparc/ip/ip.global-objs.debug64	Fri Sep 25 15:00:11 2009 -0400
@@ -23,7 +23,6 @@
 # Use is subject to license terms.
 #
 
-IP_MAJ
 cb_inet_devops
 cl_inet_bind
 cl_inet_connect2
--- a/usr/src/uts/sparc/ip/ip.global-objs.obj64	Fri Sep 25 12:11:50 2009 -0400
+++ b/usr/src/uts/sparc/ip/ip.global-objs.obj64	Fri Sep 25 15:00:11 2009 -0400
@@ -23,7 +23,6 @@
 # Use is subject to license terms.
 #
 
-IP_MAJ
 cb_inet_devops
 cl_inet_bind
 cl_inet_connect2