changeset 9816:847676ec1c5b

PSARC 2008/353 zpool autoexpand property 6475340 when lun expands, zfs should expand too 6563887 in-place replacement allows for smaller devices 6606879 should be able to grow pool without a reboot or export/import 6844090 zfs should be able to mirror to a smaller disk
author George Wilson <George.Wilson@Sun.COM>
date Mon, 08 Jun 2009 10:35:50 -0700
parents 5d50ee5359bc
children 32ca8e40f33a
files usr/src/cmd/fs.d/zfs/Makefile usr/src/cmd/fs.d/zfs/zfsdle/Makefile usr/src/cmd/fs.d/zfs/zfsdle/etc/Makefile usr/src/cmd/fs.d/zfs/zfsdle/etc/SUNW,EC_dev_status,ESC_dev_dle,sysevent.conf usr/src/cmd/fs.d/zfs/zfsdle/zfsdle.c usr/src/cmd/zpool/zpool_main.c usr/src/cmd/ztest/ztest.c usr/src/common/zfs/zpool_prop.c usr/src/lib/libzfs/common/libzfs.h usr/src/lib/libzfs/common/libzfs_pool.c usr/src/lib/libzfs/common/mapfile-vers usr/src/lib/libzpool/Makefile.com usr/src/lib/libzpool/common/sys/zfs_context.h usr/src/pkgdefs/SUNWzfsr/prototype_com usr/src/pkgdefs/SUNWzfsu/prototype_com usr/src/uts/common/fs/zfs/arc.c usr/src/uts/common/fs/zfs/spa.c usr/src/uts/common/fs/zfs/spa_config.c usr/src/uts/common/fs/zfs/sys/arc.h usr/src/uts/common/fs/zfs/sys/spa.h usr/src/uts/common/fs/zfs/sys/spa_impl.h usr/src/uts/common/fs/zfs/sys/vdev.h usr/src/uts/common/fs/zfs/sys/vdev_impl.h usr/src/uts/common/fs/zfs/sys/zfs_context.h usr/src/uts/common/fs/zfs/vdev.c usr/src/uts/common/fs/zfs/zvol.c usr/src/uts/common/sys/fs/zfs.h
diffstat 27 files changed, 935 insertions(+), 248 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/fs.d/zfs/Makefile	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/cmd/fs.d/zfs/Makefile	Mon Jun 08 10:35:50 2009 -0700
@@ -18,14 +18,13 @@
 #
 # CDDL HEADER END
 #
+
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 
-SUBDIRS=	fstyp
+SUBDIRS=	fstyp zfsdle
 
 all:=		TARGET= all
 install:=	TARGET= install
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/fs.d/zfs/zfsdle/Makefile	Mon Jun 08 10:35:50 2009 -0700
@@ -0,0 +1,64 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+FSTYPE=		zfs
+LIBPROG=	zfsdle
+
+include ../../../../lib/Makefile.lib
+include ../../Makefile.fstype
+
+OBJS=		zfsdle.o
+SRCS=		$(OBJS:%.o=%.c)
+SUBDIRS=	etc
+
+all :=		TARGET = all
+install :=	TARGET = install
+clean :=	TARGET = clean
+clobber :=	TARGET = clobber
+lint :=		TARGET = lint
+
+CPPFLAGS += -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+LDLIBS += -lnvpair -lzfs -lc
+
+.KEEP_STATE:
+
+all: $(SUBDIRS) $(LIBPROG)
+
+install: .WAIT $(SUBDIRS)
+
+lint: lint_SRCS
+
+cstyle:
+	$(CSTYLE) $(SRCS)
+
+clean:     
+	${RM} $(OBJS)
+
+clobber: clean
+
+$(SUBDIRS): FRC
+	@cd $@; pwd; $(MAKE) $(TARGET)
+FRC:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/fs.d/zfs/zfsdle/etc/Makefile	Mon Jun 08 10:35:50 2009 -0700
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+.PARALLEL:
+
+include $(SRC)/cmd/Makefile.cmd
+
+CONF = SUNW,EC_dev_status,ESC_dev_dle,sysevent.conf
+CONFDIR = $(ROOTETC)/sysevent/config
+
+# utilize the predefined install target patterns in cmd/Makefile.targ by
+# overriding ROOTCMDDIR
+#
+ROOTCMDDIR = $(CONFDIR)
+FILEMODE = 0644
+
+.KEEP_STATE:
+
+all clean clobber lint:
+
+install: $(CONF:%=$(CONFDIR)/%)
+
+include $(SRC)/cmd/Makefile.targ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/fs.d/zfs/zfsdle/etc/SUNW,EC_dev_status,ESC_dev_dle,sysevent.conf	Mon Jun 08 10:35:50 2009 -0700
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+EC_dev_status ESC_dev_dle SUNW - - - - /usr/lib/fs/zfs/zfsdle $phys_path
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/cmd/fs.d/zfs/zfsdle/zfsdle.c	Mon Jun 08 10:35:50 2009 -0700
@@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <libnvpair.h>
+#include <libzfs.h>
+
+libzfs_handle_t *g_zfs;
+
+int debug = 0;
+
+#define	DEVICE_PREFIX	"/devices"
+
+static int
+zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
+{
+	char *devname = data;
+	boolean_t avail_spare, l2cache;
+	vdev_state_t newstate;
+	nvlist_t *tgt;
+
+	if (debug) {
+		syslog(LOG_ERR, "Searching for %s in pool %s\n",
+		    devname, zpool_get_name(zhp));
+	}
+
+	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
+	    &avail_spare, &l2cache, NULL)) != NULL) {
+		char *path, fullpath[MAXPATHLEN];
+		uint64_t wholedisk = 0ULL;
+
+		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
+		    &path) == 0);
+		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk) == 0);
+
+		(void) strlcpy(fullpath, path, sizeof (fullpath));
+		if (wholedisk)
+			fullpath[strlen(fullpath) - 2] = '\0';
+
+		if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
+			if (debug) {
+				syslog(LOG_ERR, "Setting device %s to ONLINE "
+				    "state in pool %s.\n", fullpath,
+				    zpool_get_name(zhp));
+			}
+			(void) zpool_vdev_online(zhp, fullpath, 0, &newstate);
+		}
+
+		return (1);
+	}
+	return (0);
+}
+
+int
+main(int argc, char *argv[])
+{
+	char *devname;
+
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, gettext("internal error: failed to "
+		    "initialize ZFS library\n"));
+		return (1);
+	}
+	libzfs_print_on_error(g_zfs, B_TRUE);
+
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing argument\n"));
+		libzfs_fini(g_zfs);
+		return (1);
+	}
+
+	if (argc > 2) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		libzfs_fini(g_zfs);
+		return (1);
+	}
+	devname = argv[1];
+	if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
+		(void) fprintf(stderr, gettext("invalid device name '%s'\n"),
+		    devname);
+		libzfs_fini(g_zfs);
+		return (1);
+	}
+
+	/*
+	 * We try to find the device using the physical
+	 * path that has been supplied. We need to strip off
+	 * the /devices prefix before starting our search.
+	 */
+	devname += strlen(DEVICE_PREFIX);
+	if (zpool_iter(g_zfs, zfsdle_vdev_online, devname) != 1) {
+		if (debug)
+			syslog(LOG_ERR, "device '%s': not found\n", argv[1]);
+	}
+	libzfs_fini(g_zfs);
+	return (0);
+}
--- a/usr/src/cmd/zpool/zpool_main.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/cmd/zpool/zpool_main.c	Mon Jun 08 10:35:50 2009 -0700
@@ -2635,10 +2635,14 @@
 	zpool_handle_t *zhp;
 	int ret = 0;
 	vdev_state_t newstate;
+	int flags = 0;
 
 	/* check options */
-	while ((c = getopt(argc, argv, "t")) != -1) {
+	while ((c = getopt(argc, argv, "et")) != -1) {
 		switch (c) {
+		case 'e':
+			flags |= ZFS_ONLINE_EXPAND;
+			break;
 		case 't':
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -2666,7 +2670,7 @@
 		return (1);
 
 	for (i = 1; i < argc; i++) {
-		if (zpool_vdev_online(zhp, argv[i], 0, &newstate) == 0) {
+		if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
 			if (newstate != VDEV_STATE_HEALTHY) {
 				(void) printf(gettext("warning: device '%s' "
 				    "onlined, but remains in faulted state\n"),
--- a/usr/src/cmd/ztest/ztest.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/cmd/ztest/ztest.c	Mon Jun 08 10:35:50 2009 -0700
@@ -954,7 +954,7 @@
 		 * of devices that have pending state changes.
 		 */
 		if (ztest_random(2) == 0)
-			(void) vdev_online(spa, guid, B_FALSE, NULL);
+			(void) vdev_online(spa, guid, 0, NULL);
 
 		error = spa_vdev_remove(spa, guid, B_FALSE);
 		if (error != 0 && error != EBUSY)
@@ -1032,7 +1032,7 @@
 	}
 
 	oldguid = oldvd->vdev_guid;
-	oldsize = vdev_get_rsize(oldvd);
+	oldsize = vdev_get_min_asize(oldvd);
 	oldvd_is_log = oldvd->vdev_top->vdev_islog;
 	(void) strcpy(oldpath, oldvd->vdev_path);
 	pvd = oldvd->vdev_parent;
@@ -1068,7 +1068,7 @@
 	}
 
 	if (newvd) {
-		newsize = vdev_get_rsize(newvd);
+		newsize = vdev_get_min_asize(newvd);
 	} else {
 		/*
 		 * Make newsize a little bigger or smaller than oldsize.
@@ -1144,49 +1144,202 @@
 }
 
 /*
+ * Callback function which expands the physical size of the vdev.
+ */
+vdev_t *
+grow_vdev(vdev_t *vd, void *arg)
+{
+	spa_t *spa = vd->vdev_spa;
+	size_t *newsize = arg;
+	size_t fsize;
+	int fd;
+
+	ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+	ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+	if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
+		return (vd);
+
+	fsize = lseek(fd, 0, SEEK_END);
+	(void) ftruncate(fd, *newsize);
+
+	if (zopt_verbose >= 6) {
+		(void) printf("%s grew from %lu to %lu bytes\n",
+		    vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize);
+	}
+	(void) close(fd);
+	return (NULL);
+}
+
+/*
+ * Callback function which expands a given vdev by calling vdev_online().
+ */
+/* ARGSUSED */
+vdev_t *
+online_vdev(vdev_t *vd, void *arg)
+{
+	spa_t *spa = vd->vdev_spa;
+	vdev_t *tvd = vd->vdev_top;
+	vdev_t *pvd = vd->vdev_parent;
+	uint64_t guid = vd->vdev_guid;
+
+	ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+	ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+	/* Calling vdev_online will initialize the new metaslabs */
+	spa_config_exit(spa, SCL_STATE, spa);
+	(void) vdev_online(spa, guid, ZFS_ONLINE_EXPAND, NULL);
+	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
+
+	/*
+	 * Since we dropped the lock we need to ensure that we're
+	 * still talking to the original vdev. It's possible this
+	 * vdev may have been detached/replaced while we were
+	 * trying to online it.
+	 */
+	if (vd != vdev_lookup_by_guid(tvd, guid) || vd->vdev_parent != pvd) {
+		if (zopt_verbose >= 6) {
+			(void) printf("vdev %p has disappeared, was "
+			    "guid %llu\n", (void *)vd, (u_longlong_t)guid);
+		}
+		return (vd);
+	}
+	return (NULL);
+}
+
+/*
+ * Traverse the vdev tree calling the supplied function.
+ * We continue to walk the tree until we either have walked all
+ * children or we receive a non-NULL return from the callback.
+ * If a NULL callback is passed, then we just return back the first
+ * leaf vdev we encounter.
+ */
+vdev_t *
+vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
+{
+	if (vd->vdev_ops->vdev_op_leaf) {
+		if (func == NULL)
+			return (vd);
+		else
+			return (func(vd, arg));
+	}
+
+	for (uint_t c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+		if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
+			return (cvd);
+	}
+	return (NULL);
+}
+
+/*
  * Verify that dynamic LUN growth works as expected.
  */
 void
 ztest_vdev_LUN_growth(ztest_args_t *za)
 {
 	spa_t *spa = za->za_spa;
-	char dev_name[MAXPATHLEN];
-	uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
-	uint64_t vdev;
-	size_t fsize;
-	int fd;
+	vdev_t *vd, *tvd = NULL;
+	size_t psize, newsize;
+	uint64_t spa_newsize, spa_cursize, ms_count;
 
 	(void) mutex_lock(&ztest_shared->zs_vdev_lock);
+	mutex_enter(&spa_namespace_lock);
+	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
+
+	while (tvd == NULL || tvd->vdev_islog) {
+		uint64_t vdev;
+
+		vdev = ztest_random(spa->spa_root_vdev->vdev_children);
+		tvd = spa->spa_root_vdev->vdev_child[vdev];
+	}
+
+	/*
+	 * Determine the size of the first leaf vdev associated with
+	 * our top-level device.
+	 */
+	vd = vdev_walk_tree(tvd, NULL, NULL);
+	ASSERT3P(vd, !=, NULL);
+	ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+	psize = vd->vdev_psize;
+
+	/*
+	 * We only try to expand the vdev if it's less than 4x its
+	 * original size and it has a valid psize.
+	 */
+	if (psize == 0 || psize >= 4 * zopt_vdev_size) {
+		spa_config_exit(spa, SCL_STATE, spa);
+		mutex_exit(&spa_namespace_lock);
+		(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+		return;
+	}
+	ASSERT(psize > 0);
+	newsize = psize + psize / 8;
+	ASSERT3U(newsize, >, psize);
+
+	if (zopt_verbose >= 6) {
+		(void) printf("Expanding vdev %s from %lu to %lu\n",
+		    vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
+	}
+
+	spa_cursize = spa_get_space(spa);
+	ms_count = tvd->vdev_ms_count;
 
 	/*
-	 * Pick a random leaf vdev.
+	 * Growing the vdev is a two step process:
+	 *	1). expand the physical size (i.e. relabel)
+	 *	2). online the vdev to create the new metaslabs
+	 */
+	if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
+	    vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
+	    tvd->vdev_state != VDEV_STATE_HEALTHY) {
+		if (zopt_verbose >= 5) {
+			(void) printf("Could not expand LUN because "
+			    "some vdevs were not healthy\n");
+		}
+		(void) spa_config_exit(spa, SCL_STATE, spa);
+		mutex_exit(&spa_namespace_lock);
+		(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
+		return;
+	}
+
+	(void) spa_config_exit(spa, SCL_STATE, spa);
+	mutex_exit(&spa_namespace_lock);
+
+	/*
+	 * Expanding the LUN will update the config asynchronously,
+	 * thus we must wait for the async thread to complete any
+	 * pending tasks before proceeding.
 	 */
-	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
-	vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
-	spa_config_exit(spa, SCL_VDEV, FTAG);
-
-	(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
-
-	if ((fd = open(dev_name, O_RDWR)) != -1) {
-		/*
-		 * Determine the size.
-		 */
-		fsize = lseek(fd, 0, SEEK_END);
-
-		/*
-		 * If it's less than 2x the original size, grow by around 3%.
-		 */
-		if (fsize < 2 * zopt_vdev_size) {
-			size_t newsize = fsize + ztest_random(fsize / 32);
-			(void) ftruncate(fd, newsize);
-			if (zopt_verbose >= 6) {
-				(void) printf("%s grew from %lu to %lu bytes\n",
-				    dev_name, (ulong_t)fsize, (ulong_t)newsize);
-			}
-		}
-		(void) close(fd);
+	mutex_enter(&spa->spa_async_lock);
+	while (spa->spa_async_thread != NULL || spa->spa_async_tasks)
+		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
+	mutex_exit(&spa->spa_async_lock);
+
+	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
+	spa_newsize = spa_get_space(spa);
+
+	/*
+	 * Make sure we were able to grow the pool.
+	 */
+	if (ms_count >= tvd->vdev_ms_count ||
+	    spa_cursize >= spa_newsize) {
+		(void) printf("Top-level vdev metaslab count: "
+		    "before %llu, after %llu\n",
+		    (u_longlong_t)ms_count,
+		    (u_longlong_t)tvd->vdev_ms_count);
+		fatal(0, "LUN expansion failed: before %llu, "
+		    "after %llu\n", spa_cursize, spa_newsize);
+	} else if (zopt_verbose >= 5) {
+		char oldnumbuf[6], newnumbuf[6];
+
+		nicenum(spa_cursize, oldnumbuf);
+		nicenum(spa_newsize, newnumbuf);
+		(void) printf("%s grew from %s to %s\n",
+		    spa->spa_name, oldnumbuf, newnumbuf);
 	}
-
+	spa_config_exit(spa, SCL_STATE, spa);
 	(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
 }
 
--- a/usr/src/common/zfs/zpool_prop.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/common/zfs/zpool_prop.c	Mon Jun 08 10:35:50 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -96,6 +96,8 @@
 	    ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
 	register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0, PROP_DEFAULT,
 	    ZFS_TYPE_POOL, "on | off", "LISTSNAPS", boolean_table);
+	register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0, PROP_DEFAULT,
+	    ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
 
 	/* default index properties */
 	register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
--- a/usr/src/lib/libzfs/common/libzfs.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/lib/libzfs/common/libzfs.h	Mon Jun 08 10:35:50 2009 -0700
@@ -231,6 +231,8 @@
 
 extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
     boolean_t *, boolean_t *);
+extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
+    boolean_t *, boolean_t *, boolean_t *);
 extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
 
 /*
--- a/usr/src/lib/libzfs/common/libzfs_pool.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c	Mon Jun 08 10:35:50 2009 -0700
@@ -42,6 +42,7 @@
 #include <sys/zfs_ioctl.h>
 #include <sys/zio.h>
 #include <strings.h>
+#include <dlfcn.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
@@ -55,6 +56,10 @@
 #define	BOOTCMD	"installboot(1M)"
 #endif
 
+#define	DISK_ROOT	"/dev/dsk"
+#define	RDISK_ROOT	"/dev/rdsk"
+#define	BACKUP_SLICE	"s2"
+
 /*
  * ====================================================================
  *   zpool property functions
@@ -628,6 +633,12 @@
 
 
 /*
+ * Don't start the slice at the default block of 34; many storage
+ * devices will use a stripe width of 128k, so start there instead.
+ */
+#define	NEW_START_BLOCK	256
+
+/*
  * Validate the given pool name, optionally putting an extended error message in
  * 'buf'.
  */
@@ -1369,46 +1380,90 @@
 }
 
 /*
+ * Find a vdev that matches the search criteria specified. We use the
+ * the nvpair name to determine how we should look for the device.
  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
  * spare; but FALSE if its an INUSE spare.
  */
 static nvlist_t *
-vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
-    boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
+vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
+    boolean_t *l2cache, boolean_t *log)
 {
 	uint_t c, children;
 	nvlist_t **child;
-	uint64_t theguid, present;
-	char *path;
-	uint64_t wholedisk = 0;
 	nvlist_t *ret;
 	uint64_t is_log;
-
-	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
-
-	if (search == NULL &&
-	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
+	char *srchkey;
+	nvpair_t *pair = nvlist_next_nvpair(search, NULL);
+
+	/* Nothing to look for */
+	if (search == NULL || pair == NULL)
+		return (NULL);
+
+	/* Obtain the key we will use to search */
+	srchkey = nvpair_name(pair);
+
+	switch (nvpair_type(pair)) {
+	case DATA_TYPE_UINT64: {
+		uint64_t srchval, theguid, present;
+
+		verify(nvpair_value_uint64(pair, &srchval) == 0);
+		if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
+			if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
+			    &present) == 0) {
+				/*
+				 * If the device has never been present since
+				 * import, the only reliable way to match the
+				 * vdev is by GUID.
+				 */
+				verify(nvlist_lookup_uint64(nv,
+				    ZPOOL_CONFIG_GUID, &theguid) == 0);
+				if (theguid == srchval)
+					return (nv);
+			}
+		}
+		break;
+	}
+
+	case DATA_TYPE_STRING: {
+		char *srchval, *val;
+
+		verify(nvpair_value_string(pair, &srchval) == 0);
+		if (nvlist_lookup_string(nv, srchkey, &val) != 0)
+			break;
+
 		/*
-		 * If the device has never been present since import, the only
-		 * reliable way to match the vdev is by GUID.
+		 * Search for the requested value. We special case the search
+		 * for ZPOOL_CONFIG_PATH when it's a wholedisk. Otherwise,
+		 * all other searches are simple string compares.
 		 */
-		if (theguid == guid)
+		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && val) {
+			uint64_t wholedisk = 0;
+
+			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+			    &wholedisk);
+			if (wholedisk) {
+				/*
+				 * For whole disks, the internal path has 's0',
+				 * but the path passed in by the user doesn't.
+				 */
+				if (strlen(srchval) == strlen(val) - 2 &&
+				    strncmp(srchval, val, strlen(srchval)) == 0)
+					return (nv);
+				break;
+			}
+		}
+
+		/*
+		 * Common case
+		 */
+		if (strcmp(srchval, val) == 0)
 			return (nv);
-	} else if (search != NULL &&
-	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
-		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
-		    &wholedisk);
-		if (wholedisk) {
-			/*
-			 * For whole disks, the internal path has 's0', but the
-			 * path passed in by the user doesn't.
-			 */
-			if (strlen(search) == strlen(path) - 2 &&
-			    strncmp(search, path, strlen(search)) == 0)
-				return (nv);
-		} else if (strcmp(search, path) == 0) {
-			return (nv);
-		}
+		break;
+	}
+
+	default:
+		break;
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
@@ -1416,7 +1471,7 @@
 		return (NULL);
 
 	for (c = 0; c < children; c++) {
-		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+		if ((ret = vdev_to_nvlist_iter(child[c], search,
 		    avail_spare, l2cache, NULL)) != NULL) {
 			/*
 			 * The 'is_log' value is only set for the toplevel
@@ -1437,7 +1492,7 @@
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
-			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			if ((ret = vdev_to_nvlist_iter(child[c], search,
 			    avail_spare, l2cache, NULL)) != NULL) {
 				*avail_spare = B_TRUE;
 				return (ret);
@@ -1448,7 +1503,7 @@
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
-			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			if ((ret = vdev_to_nvlist_iter(child[c], search,
 			    avail_spare, l2cache, NULL)) != NULL) {
 				*l2cache = B_TRUE;
 				return (ret);
@@ -1459,24 +1514,48 @@
 	return (NULL);
 }
 
+/*
+ * Given a physical path (minus the "/devices" prefix), find the
+ * associated vdev.
+ */
+nvlist_t *
+zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
+    boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
+{
+	nvlist_t *search, *nvroot, *ret;
+
+	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	*avail_spare = B_FALSE;
+	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
+	nvlist_free(search);
+
+	return (ret);
+}
+
 nvlist_t *
 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
 {
 	char buf[MAXPATHLEN];
-	const char *search;
 	char *end;
-	nvlist_t *nvroot;
+	nvlist_t *nvroot, *search, *ret;
 	uint64_t guid;
 
+	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
 	guid = strtoull(path, &end, 10);
 	if (guid != 0 && *end == '\0') {
-		search = NULL;
+		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
 	} else if (path[0] != '/') {
 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
-		search = buf;
+		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
 	} else {
-		search = path;
+		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
 	}
 
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
@@ -1486,8 +1565,10 @@
 	*l2cache = B_FALSE;
 	if (log != NULL)
 		*log = B_FALSE;
-	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
-	    l2cache, log));
+	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
+	nvlist_free(search);
+
+	return (ret);
 }
 
 static int
@@ -1668,6 +1749,45 @@
 }
 
 /*
+ * If the device has being dynamically expanded then we need to relabel
+ * the disk to use the new unallocated space.
+ */
+static int
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
+{
+	char path[MAXPATHLEN];
+	char errbuf[1024];
+	int fd, error;
+	int (*_efi_use_whole_disk)(int);
+
+	if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
+	    "efi_use_whole_disk")) == NULL)
+		return (-1);
+
+	(void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
+
+	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "relabel '%s': unable to open device"), name);
+		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
+	}
+
+	/*
+	 * It's possible that we might encounter an error if the device
+	 * does not have any unallocated space left. If so, we simply
+	 * ignore that error and continue on.
+	 */
+	error = _efi_use_whole_disk(fd);
+	(void) close(fd);
+	if (error && error != VT_ENOSPC) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
+		    "relabel '%s': unable to read disk capacity"), name);
+		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
+	}
+	return (0);
+}
+
+/*
  * Bring the specified vdev online.   The 'flags' parameter is a set of the
  * ZFS_ONLINE_* flags.
  */
@@ -1678,15 +1798,20 @@
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
 	nvlist_t *tgt;
-	boolean_t avail_spare, l2cache;
+	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
-	(void) snprintf(msg, sizeof (msg),
-	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+	if (flags & ZFS_ONLINE_EXPAND) {
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
+	} else {
+		(void) snprintf(msg, sizeof (msg),
+		    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
+	}
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
-	    NULL)) == NULL)
+	    &islog)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
@@ -1695,6 +1820,31 @@
 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
+	if (flags & ZFS_ONLINE_EXPAND ||
+	    zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
+		char *pathname = NULL;
+		uint64_t wholedisk = 0;
+
+		(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk);
+		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
+		    &pathname) == 0);
+
+		/*
+		 * XXX - L2ARC 1.0 devices can't support expansion.
+		 */
+		if (l2cache) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cannot expand cache devices"));
+			return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
+		}
+
+		if (wholedisk) {
+			pathname += strlen(DISK_ROOT) + 1;
+			(void) zpool_relabel_disk(zhp->zpool_hdl, pathname);
+		}
+	}
+
 	zc.zc_cookie = VDEV_STATE_ONLINE;
 	zc.zc_obj = flags;
 
@@ -2878,14 +3028,6 @@
 	free(mntpnt);
 }
 
-#define	RDISK_ROOT	"/dev/rdsk"
-#define	BACKUP_SLICE	"s2"
-/*
- * Don't start the slice at the default block of 34; many storage
- * devices will use a stripe width of 128k, so start there instead.
- */
-#define	NEW_START_BLOCK	256
-
 /*
  * Read the EFI label from the config, if a label does not exist then
  * pass back the error to the caller. If the caller has passed a non-NULL
--- a/usr/src/lib/libzfs/common/mapfile-vers	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Mon Jun 08 10:35:50 2009 -0700
@@ -149,6 +149,7 @@
 	zpool_find_import_byname;
 	zpool_find_import_cached;
 	zpool_find_vdev;
+	zpool_find_vdev_by_physpath;
 	zpool_get_config;
 	zpool_get_errlog;
 	zpool_get_handle;
--- a/usr/src/lib/libzpool/Makefile.com	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/lib/libzpool/Makefile.com	Mon Jun 08 10:35:50 2009 -0700
@@ -19,11 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 LIBRARY= libzpool.a
 VERS= .1
@@ -63,7 +61,7 @@
 
 CFLAGS +=	-g $(CCVERBOSE) $(CNOGLOBAL)
 CFLAGS64 +=	-g $(CCVERBOSE) $(CNOGLOBAL)
-LDLIBS +=	-lumem -lavl -lnvpair -lz -lc
+LDLIBS +=	-lumem -lavl -lnvpair -lz -lc -lsysevent
 CPPFLAGS +=	$(INCS)
 
 .KEEP_STATE:
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h	Mon Jun 08 10:35:50 2009 -0700
@@ -59,6 +59,7 @@
 #include <atomic.h>
 #include <dirent.h>
 #include <time.h>
+#include <libsysevent.h>
 #include <sys/note.h>
 #include <sys/types.h>
 #include <sys/cred.h>
@@ -73,6 +74,7 @@
 #include <sys/kstat.h>
 #include <sys/u8_textprep.h>
 #include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dev.h>
 
 /*
  * Debugging
@@ -541,6 +543,10 @@
 ksiddomain_t *ksid_lookupdomain(const char *);
 void ksiddomain_rele(ksiddomain_t *);
 
+#define	DDI_SLEEP	KM_SLEEP
+#define	ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) \
+	sysevent_post_event(_c, _d, _b, "libzpool", _e, _f)
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/pkgdefs/SUNWzfsr/prototype_com	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/pkgdefs/SUNWzfsr/prototype_com	Mon Jun 08 10:35:50 2009 -0700
@@ -20,11 +20,9 @@
 #
 
 #
-# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 
 # packaging files
 i copyright
@@ -39,6 +37,9 @@
 d none etc/fs/zfs 755 root sys
 s none etc/fs/zfs/mount=../../../sbin/zfs
 s none etc/fs/zfs/umount=../../../sbin/zfs
+d none etc/sysevent 755 root sys
+d none etc/sysevent/config 755 root sys
+f none etc/sysevent/config/SUNW,EC_dev_status,ESC_dev_dle,sysevent.conf 644 root sys
 d none etc/zfs 755 root sys
 d none lib 755 root bin
 s none lib/libzfs.so=libzfs.so.1
--- a/usr/src/pkgdefs/SUNWzfsu/prototype_com	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/pkgdefs/SUNWzfsu/prototype_com	Mon Jun 08 10:35:50 2009 -0700
@@ -47,6 +47,7 @@
 l none usr/lib/fs/zfs/fstyp=../../../sbin/fstyp
 s none usr/lib/fs/zfs/mount=../../../../sbin/zfs
 s none usr/lib/fs/zfs/umount=../../../../sbin/zfs
+f none usr/lib/fs/zfs/zfsdle 555 root bin
 s none usr/lib/libzfs.so.1=../../lib/libzfs.so.1
 s none usr/lib/libzfs.so=../../lib/libzfs.so.1
 f none usr/lib/libzfs_jni.so.1 755 root bin
--- a/usr/src/uts/common/fs/zfs/arc.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/arc.c	Mon Jun 08 10:35:50 2009 -0700
@@ -124,6 +124,7 @@
 #include <sys/arc.h>
 #include <sys/refcount.h>
 #include <sys/vdev.h>
+#include <sys/vdev_impl.h>
 #ifdef _KERNEL
 #include <sys/vmsystm.h>
 #include <vm/anon.h>
@@ -4535,7 +4536,7 @@
  * validated the vdev and opened it.
  */
 void
-l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end)
+l2arc_add_vdev(spa_t *spa, vdev_t *vd)
 {
 	l2arc_dev_t *adddev;
 
@@ -4549,8 +4550,8 @@
 	adddev->l2ad_vdev = vd;
 	adddev->l2ad_write = l2arc_write_max;
 	adddev->l2ad_boost = l2arc_write_boost;
-	adddev->l2ad_start = start;
-	adddev->l2ad_end = end;
+	adddev->l2ad_start = VDEV_LABEL_START_SIZE;
+	adddev->l2ad_end = VDEV_LABEL_START_SIZE + vdev_get_min_asize(vd);
 	adddev->l2ad_hand = adddev->l2ad_start;
 	adddev->l2ad_evict = adddev->l2ad_start;
 	adddev->l2ad_first = B_TRUE;
--- a/usr/src/uts/common/fs/zfs/spa.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/spa.c	Mon Jun 08 10:35:50 2009 -0700
@@ -59,6 +59,7 @@
 #include <sys/systeminfo.h>
 #include <sys/sunddi.h>
 #include <sys/spa_boot.h>
+#include <sys/zfs_ioctl.h>
 
 #ifdef	_KERNEL
 #include <sys/zone.h>
@@ -332,6 +333,7 @@
 		case ZPOOL_PROP_DELEGATION:
 		case ZPOOL_PROP_AUTOREPLACE:
 		case ZPOOL_PROP_LISTSNAPS:
+		case ZPOOL_PROP_AUTOEXPAND:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error && intval > 1)
 				error = EINVAL;
@@ -690,7 +692,7 @@
     uint_t id, int atype)
 {
 	nvlist_t **child;
-	uint_t c, children;
+	uint_t children;
 	int error;
 
 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
@@ -711,7 +713,7 @@
 		return (EINVAL);
 	}
 
-	for (c = 0; c < children; c++) {
+	for (int c = 0; c < children; c++) {
 		vdev_t *vd;
 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
 		    atype)) != 0) {
@@ -939,7 +941,7 @@
 	nvlist_t **l2cache;
 	uint_t nl2cache;
 	int i, j, oldnvdevs;
-	uint64_t guid, size;
+	uint64_t guid;
 	vdev_t *vd, **oldvdevs, **newvdevs;
 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
 
@@ -1003,12 +1005,8 @@
 
 			(void) vdev_validate_aux(vd);
 
-			if (!vdev_is_dead(vd)) {
-				size = vdev_get_rsize(vd);
-				l2arc_add_vdev(spa, vd,
-				    VDEV_LABEL_START_SIZE,
-				    size - VDEV_LABEL_START_SIZE);
-			}
+			if (!vdev_is_dead(vd))
+				l2arc_add_vdev(spa, vd);
 		}
 	}
 
@@ -1087,9 +1085,7 @@
 static void
 spa_check_removed(vdev_t *vd)
 {
-	int c;
-
-	for (c = 0; c < vd->vdev_children; c++)
+	for (int c = 0; c < vd->vdev_children; c++)
 		spa_check_removed(vd->vdev_child[c]);
 
 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
@@ -1107,7 +1103,7 @@
 {
 	nvlist_t *nv, *nvroot, **child;
 	uint64_t is_log;
-	uint_t children, c;
+	uint_t children;
 	vdev_t *rvd = spa->spa_root_vdev;
 
 	VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0);
@@ -1115,7 +1111,7 @@
 	VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0);
 
-	for (c = 0; c < children; c++) {
+	for (int c = 0; c < children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 
 		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
@@ -1513,6 +1509,10 @@
 		    spa->spa_pool_props_object,
 		    zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
 		    sizeof (uint64_t), 1, &spa->spa_failmode);
+		(void) zap_lookup(spa->spa_meta_objset,
+		    spa->spa_pool_props_object,
+		    zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND),
+		    sizeof (uint64_t), 1, &spa->spa_autoexpand);
 	}
 
 	/*
@@ -2086,7 +2086,7 @@
 	vdev_t *rvd;
 	dsl_pool_t *dp;
 	dmu_tx_t *tx;
-	int c, error = 0;
+	int error = 0;
 	uint64_t txg = TXG_INITIAL;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
@@ -2148,9 +2148,10 @@
 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
 	    (error = spa_validate_aux(spa, nvroot, txg,
 	    VDEV_ALLOC_ADD)) == 0) {
-		for (c = 0; c < rvd->vdev_children; c++)
-			vdev_init(rvd->vdev_child[c], txg);
-		vdev_config_dirty(rvd);
+		for (int c = 0; c < rvd->vdev_children; c++) {
+			vdev_metaslab_set_size(rvd->vdev_child[c]);
+			vdev_expand(rvd->vdev_child[c], txg);
+		}
 	}
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
@@ -2249,6 +2250,7 @@
 	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
 	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
+	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
 	if (props != NULL) {
 		spa_configfile_set(spa, props, B_FALSE);
 		spa_sync_props(spa, props, CRED(), tx);
@@ -2331,9 +2333,7 @@
 static void
 spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
 {
-	int c;
-
-	for (c = 0; c < vd->vdev_children; c++)
+	for (int c = 0; c < vd->vdev_children; c++)
 		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
 
 	if (vd->vdev_ops->vdev_op_leaf) {
@@ -2627,6 +2627,12 @@
 		spa_config_update_common(spa, SPA_CONFIG_UPDATE_POOL, B_FALSE);
 	}
 
+	/*
+	 * It's possible that the pool was expanded while it was exported.
+	 * We kick off an async task to handle this for us.
+	 */
+	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
+
 	mutex_exit(&spa_namespace_lock);
 
 	return (0);
@@ -3064,10 +3070,9 @@
 	}
 
 	/*
-	 * Compare the new device size with the replaceable/attachable
-	 * device size.
+	 * Make sure the new device is big enough.
 	 */
-	if (newvd->vdev_psize < vdev_get_rsize(oldvd))
+	if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
 		return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
 
 	/*
@@ -3111,12 +3116,6 @@
 	newvd->vdev_id = pvd->vdev_children;
 	vdev_add_child(pvd, newvd);
 
-	/*
-	 * If newvd is smaller than oldvd, but larger than its rsize,
-	 * the addition of newvd may have decreased our parent's asize.
-	 */
-	pvd->vdev_asize = MIN(pvd->vdev_asize, newvd->vdev_asize);
-
 	tvd = newvd->vdev_top;
 	ASSERT(pvd->vdev_top == tvd);
 	ASSERT(tvd->vdev_parent == rvd);
@@ -3333,12 +3332,16 @@
 	vdev_propagate_state(cvd);
 
 	/*
-	 * If the device we just detached was smaller than the others, it may be
-	 * possible to add metaslabs (i.e. grow the pool).  vdev_metaslab_init()
-	 * can't fail because the existing metaslabs are already in core, so
-	 * there's nothing to read from disk.
+	 * If the 'autoexpand' property is set on the pool then automatically
+	 * try to expand the size of the pool. For example if the device we
+	 * just detached was smaller than the others, it may be possible to
+	 * add metaslabs (i.e. grow the pool). We need to reopen the vdev
+	 * first so that we can obtain the updated sizes of the leaf vdevs.
 	 */
-	VERIFY(vdev_metaslab_init(tvd, txg) == 0);
+	if (spa->spa_autoexpand) {
+		vdev_reopen(tvd);
+		vdev_expand(tvd, txg);
+	}
 
 	vdev_config_dirty(tvd);
 
@@ -3496,9 +3499,8 @@
 spa_vdev_resilver_done_hunt(vdev_t *vd)
 {
 	vdev_t *newvd, *oldvd;
-	int c;
-
-	for (c = 0; c < vd->vdev_children; c++) {
+
+	for (int c = 0; c < vd->vdev_children; c++) {
 		oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
 		if (oldvd != NULL)
 			return (oldvd);
@@ -3686,6 +3688,37 @@
 }
 
 static void
+spa_async_autoexpand(spa_t *spa, vdev_t *vd)
+{
+	sysevent_id_t eid;
+	nvlist_t *attr;
+	char *physpath;
+
+	if (!spa->spa_autoexpand)
+		return;
+
+	for (int c = 0; c < vd->vdev_children; c++) {
+		vdev_t *cvd = vd->vdev_child[c];
+		spa_async_autoexpand(spa, cvd);
+	}
+
+	if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
+		return;
+
+	physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+	(void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
+
+	VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
+
+	(void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
+	    ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
+
+	nvlist_free(attr);
+	kmem_free(physpath, MAXPATHLEN);
+}
+
+static void
 spa_async_thread(spa_t *spa)
 {
 	int tasks;
@@ -3701,9 +3734,33 @@
 	 * See if the config needs to be updated.
 	 */
 	if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
+		uint64_t oldsz, space_update;
+
 		mutex_enter(&spa_namespace_lock);
+		oldsz = spa_get_space(spa);
 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
+		space_update = spa_get_space(spa) - oldsz;
 		mutex_exit(&spa_namespace_lock);
+
+		/*
+		 * If the pool grew as a result of the config update,
+		 * then log an internal history event.
+		 */
+		if (space_update) {
+			dmu_tx_t *tx;
+
+			tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+			if (dmu_tx_assign(tx, TXG_WAIT) == 0) {
+				spa_history_internal_log(LOG_POOL_VDEV_ONLINE,
+				    spa, tx, CRED(),
+				    "pool '%s' size: %llu(+%llu)",
+				    spa_name(spa), spa_get_space(spa),
+				    space_update);
+				dmu_tx_commit(tx);
+			} else {
+				dmu_tx_abort(tx);
+			}
+		}
 	}
 
 	/*
@@ -3719,6 +3776,12 @@
 		(void) spa_vdev_state_exit(spa, NULL, 0);
 	}
 
+	if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
+		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+		spa_async_autoexpand(spa, spa->spa_root_vdev);
+		spa_config_exit(spa, SCL_CONFIG, FTAG);
+	}
+
 	/*
 	 * See if any devices need to be probed.
 	 */
@@ -4031,6 +4094,10 @@
 			case ZPOOL_PROP_FAILUREMODE:
 				spa->spa_failmode = intval;
 				break;
+			case ZPOOL_PROP_AUTOEXPAND:
+				spa->spa_autoexpand = intval;
+				spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
+				break;
 			default:
 				break;
 			}
@@ -4192,9 +4259,8 @@
 			int svdcount = 0;
 			int children = rvd->vdev_children;
 			int c0 = spa_get_random(children);
-			int c;
-
-			for (c = 0; c < children; c++) {
+
+			for (int c = 0; c < children; c++) {
 				vd = rvd->vdev_child[(c0 + c) % children];
 				if (vd->vdev_ms_array == 0 || vd->vdev_islog)
 					continue;
--- a/usr/src/uts/common/fs/zfs/spa_config.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/spa_config.c	Mon Jun 08 10:35:50 2009 -0700
@@ -432,10 +432,9 @@
 		 */
 		for (c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *tvd = rvd->vdev_child[c];
-			if (tvd->vdev_ms_array == 0) {
-				vdev_init(tvd, txg);
-				vdev_config_dirty(tvd);
-			}
+			if (tvd->vdev_ms_array == 0)
+				vdev_metaslab_set_size(tvd);
+			vdev_expand(tvd, txg);
 		}
 	}
 	spa_config_exit(spa, SCL_ALL, FTAG);
--- a/usr/src/uts/common/fs/zfs/sys/arc.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h	Mon Jun 08 10:35:50 2009 -0700
@@ -136,7 +136,7 @@
  * Level 2 ARC
  */
 
-void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
+void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
 void l2arc_remove_vdev(vdev_t *vd);
 boolean_t l2arc_vdev_present(vdev_t *vd);
 void l2arc_init(void);
--- a/usr/src/uts/common/fs/zfs/sys/spa.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h	Mon Jun 08 10:35:50 2009 -0700
@@ -344,6 +344,7 @@
 #define	SPA_ASYNC_PROBE		0x04
 #define	SPA_ASYNC_RESILVER_DONE	0x08
 #define	SPA_ASYNC_RESILVER	0x10
+#define	SPA_ASYNC_AUTOEXPAND	0x20
 
 /* device manipulation */
 extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Mon Jun 08 10:35:50 2009 -0700
@@ -169,6 +169,7 @@
 	int		spa_minref;		/* num refs when first opened */
 	int		spa_mode;		/* FREAD | FWRITE */
 	spa_log_state_t spa_log_state;		/* log state */
+	uint64_t	spa_autoexpand;		/* lun expansion on/off */
 	/*
 	 * spa_refcnt & spa_config_lock must be the last elements
 	 * because refcount_t changes size based on compilation options.
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h	Mon Jun 08 10:35:50 2009 -0700
@@ -50,7 +50,6 @@
 extern int vdev_validate(vdev_t *);
 extern void vdev_close(vdev_t *);
 extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
-extern void vdev_init(vdev_t *, uint64_t txg);
 extern void vdev_reopen(vdev_t *);
 extern int vdev_validate_aux(vdev_t *vd);
 extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
@@ -71,6 +70,8 @@
 
 extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
 extern void vdev_metaslab_fini(vdev_t *vd);
+extern void vdev_metaslab_set_size(vdev_t *);
+extern void vdev_expand(vdev_t *vd, uint64_t txg);
 
 extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
 extern void vdev_clear_stats(vdev_t *vd);
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Mon Jun 08 10:35:50 2009 -0700
@@ -113,6 +113,7 @@
 	uint64_t	vdev_guid;	/* unique ID for this vdev	*/
 	uint64_t	vdev_guid_sum;	/* self guid + all child guids	*/
 	uint64_t	vdev_asize;	/* allocatable device capacity	*/
+	uint64_t	vdev_min_asize;	/* min acceptable asize		*/
 	uint64_t	vdev_ashift;	/* block alignment shift	*/
 	uint64_t	vdev_state;	/* see VDEV_STATE_* #defines	*/
 	uint64_t	vdev_prevstate;	/* used when reopening a vdev	*/
@@ -125,6 +126,7 @@
 	uint64_t	vdev_children;	/* number of children		*/
 	space_map_t	vdev_dtl[DTL_TYPES]; /* in-core dirty time logs	*/
 	vdev_stat_t	vdev_stat;	/* virtual device statistics	*/
+	boolean_t	vdev_expanding;	/* expand the vdev?		*/
 
 	/*
 	 * Top-level vdev state.
@@ -282,7 +284,8 @@
  * Common size functions
  */
 extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
-extern uint64_t vdev_get_rsize(vdev_t *vd);
+extern uint64_t vdev_get_min_asize(vdev_t *vd);
+extern void vdev_set_min_asize(vdev_t *vd);
 
 /*
  * zdb uses this tunable, so it must be declared here to make lint happy.
--- a/usr/src/uts/common/fs/zfs/sys/zfs_context.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_context.h	Mon Jun 08 10:35:50 2009 -0700
@@ -19,15 +19,13 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifndef _SYS_ZFS_CONTEXT_H
 #define	_SYS_ZFS_CONTEXT_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -62,6 +60,7 @@
 #include <sys/zfs_debug.h>
 #include <sys/sysevent.h>
 #include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dev.h>
 #include <sys/fm/util.h>
 
 #define	CPU_SEQID	(CPU->cpu_seqid)
--- a/usr/src/uts/common/fs/zfs/vdev.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Mon Jun 08 10:35:50 2009 -0700
@@ -84,9 +84,8 @@
 {
 	uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift);
 	uint64_t csize;
-	uint64_t c;
-
-	for (c = 0; c < vd->vdev_children; c++) {
+
+	for (int c = 0; c < vd->vdev_children; c++) {
 		csize = vdev_psize_to_asize(vd->vdev_child[c], psize);
 		asize = MAX(asize, csize);
 	}
@@ -95,40 +94,47 @@
 }
 
 /*
- * Get the replaceable or attachable device size.
- * If the parent is a mirror or raidz, the replaceable size is the minimum
- * psize of all its children. For the rest, just return our own psize.
- *
- * e.g.
- *			psize	rsize
- * root			-	-
- *	mirror/raidz	-	-
- *	    disk1	20g	20g
- *	    disk2 	40g	20g
- *	disk3 		80g	80g
+ * Get the minimum allocatable size. We define the allocatable size as
+ * the vdev's asize rounded to the nearest metaslab. This allows us to
+ * replace or attach devices which don't have the same physical size but
+ * can still satisfy the same number of allocations.
  */
 uint64_t
-vdev_get_rsize(vdev_t *vd)
+vdev_get_min_asize(vdev_t *vd)
 {
-	vdev_t *pvd, *cvd;
-	uint64_t c, rsize;
-
-	pvd = vd->vdev_parent;
+	vdev_t *pvd = vd->vdev_parent;
+
+	/*
+	 * The our parent is NULL (inactive spare or cache) or is the root,
+	 * just return our own asize.
+	 */
+	if (pvd == NULL)
+		return (vd->vdev_asize);
 
 	/*
-	 * If our parent is NULL or the root, just return our own psize.
+	 * The top-level vdev just returns the allocatable size rounded
+	 * to the nearest metaslab.
+	 */
+	if (vd == vd->vdev_top)
+		return (P2ALIGN(vd->vdev_asize, 1ULL << vd->vdev_ms_shift));
+
+	/*
+	 * The allocatable space for a raidz vdev is N * sizeof(smallest child),
+	 * so each child must provide at least 1/Nth of its asize.
 	 */
-	if (pvd == NULL || pvd->vdev_parent == NULL)
-		return (vd->vdev_psize);
-
-	rsize = 0;
-
-	for (c = 0; c < pvd->vdev_children; c++) {
-		cvd = pvd->vdev_child[c];
-		rsize = MIN(rsize - 1, cvd->vdev_psize - 1) + 1;
-	}
-
-	return (rsize);
+	if (pvd->vdev_ops == &vdev_raidz_ops)
+		return (pvd->vdev_min_asize / pvd->vdev_children);
+
+	return (pvd->vdev_min_asize);
+}
+
+void
+vdev_set_min_asize(vdev_t *vd)
+{
+	vd->vdev_min_asize = vdev_get_min_asize(vd);
+
+	for (int c = 0; c < vd->vdev_children; c++)
+		vdev_set_min_asize(vd->vdev_child[c]);
 }
 
 vdev_t *
@@ -149,13 +155,12 @@
 vdev_t *
 vdev_lookup_by_guid(vdev_t *vd, uint64_t guid)
 {
-	int c;
 	vdev_t *mvd;
 
 	if (vd->vdev_guid == guid)
 		return (vd);
 
-	for (c = 0; c < vd->vdev_children; c++)
+	for (int c = 0; c < vd->vdev_children; c++)
 		if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) !=
 		    NULL)
 			return (mvd);
@@ -251,17 +256,17 @@
 {
 	vdev_t **newchild, *cvd;
 	int oldc = pvd->vdev_children;
-	int newc, c;
+	int newc;
 
 	ASSERT(spa_config_held(pvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
-	for (c = newc = 0; c < oldc; c++)
+	for (int c = newc = 0; c < oldc; c++)
 		if (pvd->vdev_child[c])
 			newc++;
 
 	newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP);
 
-	for (c = newc = 0; c < oldc; c++) {
+	for (int c = newc = 0; c < oldc; c++) {
 		if ((cvd = pvd->vdev_child[c]) != NULL) {
 			newchild[newc] = cvd;
 			cvd->vdev_id = newc++;
@@ -526,7 +531,6 @@
 void
 vdev_free(vdev_t *vd)
 {
-	int c;
 	spa_t *spa = vd->vdev_spa;
 
 	/*
@@ -540,7 +544,7 @@
 	/*
 	 * Free all children.
 	 */
-	for (c = 0; c < vd->vdev_children; c++)
+	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_free(vd->vdev_child[c]);
 
 	ASSERT(vd->vdev_child == NULL);
@@ -670,14 +674,12 @@
 static void
 vdev_top_update(vdev_t *tvd, vdev_t *vd)
 {
-	int c;
-
 	if (vd == NULL)
 		return;
 
 	vd->vdev_top = tvd;
 
-	for (c = 0; c < vd->vdev_children; c++)
+	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_top_update(tvd, vd->vdev_child[c]);
 }
 
@@ -696,6 +698,7 @@
 	mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops);
 
 	mvd->vdev_asize = cvd->vdev_asize;
+	mvd->vdev_min_asize = cvd->vdev_min_asize;
 	mvd->vdev_ashift = cvd->vdev_ashift;
 	mvd->vdev_state = cvd->vdev_state;
 
@@ -998,7 +1001,6 @@
 {
 	spa_t *spa = vd->vdev_spa;
 	int error;
-	int c;
 	uint64_t osize = 0;
 	uint64_t asize, psize;
 	uint64_t ashift = 0;
@@ -1012,6 +1014,7 @@
 	vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
 	vd->vdev_cant_read = B_FALSE;
 	vd->vdev_cant_write = B_FALSE;
+	vd->vdev_min_asize = vdev_get_min_asize(vd);
 
 	if (!vd->vdev_removed && vd->vdev_faulted) {
 		ASSERT(vd->vdev_children == 0);
@@ -1049,12 +1052,13 @@
 		vd->vdev_state = VDEV_STATE_HEALTHY;
 	}
 
-	for (c = 0; c < vd->vdev_children; c++)
+	for (int c = 0; c < vd->vdev_children; c++) {
 		if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) {
 			vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
 			    VDEV_AUX_NONE);
 			break;
 		}
+	}
 
 	osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t));
 
@@ -1079,6 +1083,15 @@
 
 	vd->vdev_psize = psize;
 
+	/*
+	 * Make sure the allocatable size hasn't shrunk.
+	 */
+	if (asize < vd->vdev_min_asize) {
+		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_BAD_LABEL);
+		return (EINVAL);
+	}
+
 	if (vd->vdev_asize == 0) {
 		/*
 		 * This is the first-ever open, so use the computed values.
@@ -1095,27 +1108,20 @@
 			    VDEV_AUX_BAD_LABEL);
 			return (EINVAL);
 		}
-
-		/*
-		 * Make sure the device hasn't shrunk.
-		 */
-		if (asize < vd->vdev_asize) {
-			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
-			    VDEV_AUX_BAD_LABEL);
-			return (EINVAL);
-		}
-
-		/*
-		 * If all children are healthy and the asize has increased,
-		 * then we've experienced dynamic LUN growth.
-		 */
-		if (vd->vdev_state == VDEV_STATE_HEALTHY &&
-		    asize > vd->vdev_asize) {
-			vd->vdev_asize = asize;
-		}
 	}
 
 	/*
+	 * If all children are healthy and the asize has increased,
+	 * then we've experienced dynamic LUN growth.  If automatic
+	 * expansion is enabled then use the additional space.
+	 */
+	if (vd->vdev_state == VDEV_STATE_HEALTHY && asize > vd->vdev_asize &&
+	    (vd->vdev_expanding || spa->spa_autoexpand))
+		vd->vdev_asize = asize;
+
+	vdev_set_min_asize(vd);
+
+	/*
 	 * Ensure we can issue some IO before declaring the
 	 * vdev open for business.
 	 */
@@ -1152,12 +1158,11 @@
 vdev_validate(vdev_t *vd)
 {
 	spa_t *spa = vd->vdev_spa;
-	int c;
 	nvlist_t *label;
 	uint64_t guid, top_guid;
 	uint64_t state;
 
-	for (c = 0; c < vd->vdev_children; c++)
+	for (int c = 0; c < vd->vdev_children; c++)
 		if (vdev_validate(vd->vdev_child[c]) != 0)
 			return (EBADF);
 
@@ -1243,7 +1248,7 @@
 	vdev_cache_purge(vd);
 
 	/*
-	 * We record the previous state before we close it, so  that if we are
+	 * We record the previous state before we close it, so that if we are
 	 * doing a reopen(), we don't generate FMA ereports if we notice that
 	 * it's still faulted.
 	 */
@@ -1275,12 +1280,8 @@
 		(void) vdev_validate_aux(vd);
 		if (vdev_readable(vd) && vdev_writeable(vd) &&
 		    vd->vdev_aux == &spa->spa_l2cache &&
-		    !l2arc_vdev_present(vd)) {
-			uint64_t size = vdev_get_rsize(vd);
-			l2arc_add_vdev(spa, vd,
-			    VDEV_LABEL_START_SIZE,
-			    size - VDEV_LABEL_START_SIZE);
-		}
+		    !l2arc_vdev_present(vd))
+			l2arc_add_vdev(spa, vd);
 	} else {
 		(void) vdev_validate(vd);
 	}
@@ -1320,26 +1321,14 @@
 	return (0);
 }
 
-/*
- * The is the latter half of vdev_create().  It is distinct because it
- * involves initiating transactions in order to do metaslab creation.
- * For creation, we want to try to create all vdevs at once and then undo it
- * if anything fails; this is much harder if we have pending transactions.
- */
 void
-vdev_init(vdev_t *vd, uint64_t txg)
+vdev_metaslab_set_size(vdev_t *vd)
 {
 	/*
 	 * Aim for roughly 200 metaslabs per vdev.
 	 */
 	vd->vdev_ms_shift = highbit(vd->vdev_asize / 200);
 	vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT);
-
-	/*
-	 * Initialize the vdev's metaslabs.  This can't fail because
-	 * there's nothing to read when creating all new metaslabs.
-	 */
-	VERIFY(vdev_metaslab_init(vd, txg) == 0);
 }
 
 void
@@ -1897,7 +1886,7 @@
 int
 vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
 {
-	vdev_t *vd;
+	vdev_t *vd, *tvd, *pvd, *rvd = spa->spa_root_vdev;
 
 	spa_vdev_state_enter(spa);
 
@@ -1907,13 +1896,26 @@
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
 
+	tvd = vd->vdev_top;
 	vd->vdev_offline = B_FALSE;
 	vd->vdev_tmpoffline = B_FALSE;
 	vd->vdev_checkremove = !!(flags & ZFS_ONLINE_CHECKREMOVE);
 	vd->vdev_forcefault = !!(flags & ZFS_ONLINE_FORCEFAULT);
-	vdev_reopen(vd->vdev_top);
+
+	/* XXX - L2ARC 1.0 does not support expansion */
+	if (!vd->vdev_aux) {
+		for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
+			pvd->vdev_expanding = !!(flags & ZFS_ONLINE_EXPAND);
+	}
+
+	vdev_reopen(tvd);
 	vd->vdev_checkremove = vd->vdev_forcefault = B_FALSE;
 
+	if (!vd->vdev_aux) {
+		for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
+			pvd->vdev_expanding = B_FALSE;
+	}
+
 	if (newstate)
 		*newstate = vd->vdev_state;
 	if ((flags & ZFS_ONLINE_UNSPARE) &&
@@ -1922,6 +1924,13 @@
 	    vd->vdev_parent->vdev_child[0] == vd)
 		vd->vdev_unspare = B_TRUE;
 
+	if ((flags & ZFS_ONLINE_EXPAND) || spa->spa_autoexpand) {
+
+		/* XXX - L2ARC 1.0 does not support expansion */
+		if (vd->vdev_aux)
+			return (spa_vdev_state_exit(spa, vd, ENOTSUP));
+		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+	}
 	return (spa_vdev_state_exit(spa, vd, 0));
 }
 
@@ -2105,7 +2114,9 @@
 	vs->vs_scrub_errors = vd->vdev_spa->spa_scrub_errors;
 	vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
 	vs->vs_state = vd->vdev_state;
-	vs->vs_rsize = vdev_get_rsize(vd);
+	vs->vs_rsize = vdev_get_min_asize(vd);
+	if (vd->vdev_ops->vdev_op_leaf)
+		vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
 	mutex_exit(&vd->vdev_stat_lock);
 
 	/*
@@ -2258,10 +2269,9 @@
 void
 vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete)
 {
-	int c;
 	vdev_stat_t *vs = &vd->vdev_stat;
 
-	for (c = 0; c < vd->vdev_children; c++)
+	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_scrub_stat_update(vd->vdev_child[c], type, complete);
 
 	mutex_enter(&vd->vdev_stat_lock);
@@ -2472,11 +2482,10 @@
 	vdev_t *rvd = spa->spa_root_vdev;
 	int degraded = 0, faulted = 0;
 	int corrupted = 0;
-	int c;
 	vdev_t *child;
 
 	if (vd->vdev_children > 0) {
-		for (c = 0; c < vd->vdev_children; c++) {
+		for (int c = 0; c < vd->vdev_children; c++) {
 			child = vd->vdev_child[c];
 
 			if (!vdev_readable(child) ||
@@ -2651,8 +2660,6 @@
 boolean_t
 vdev_is_bootable(vdev_t *vd)
 {
-	int c;
-
 	if (!vd->vdev_ops->vdev_op_leaf) {
 		char *vdev_type = vd->vdev_ops->vdev_op_type;
 
@@ -2667,7 +2674,7 @@
 		return (B_FALSE);
 	}
 
-	for (c = 0; c < vd->vdev_children; c++) {
+	for (int c = 0; c < vd->vdev_children; c++) {
 		if (!vdev_is_bootable(vd->vdev_child[c]))
 			return (B_FALSE);
 	}
@@ -2677,14 +2684,14 @@
 void
 vdev_load_log_state(vdev_t *vd, nvlist_t *nv)
 {
-	uint_t c, children;
+	uint_t children;
 	nvlist_t **child;
 	uint64_t val;
 	spa_t *spa = vd->vdev_spa;
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
-		for (c = 0; c < children; c++)
+		for (int c = 0; c < children; c++)
 			vdev_load_log_state(vd->vdev_child[c], child[c]);
 	}
 
@@ -2702,3 +2709,18 @@
 		spa_config_exit(spa, SCL_STATE_ALL, FTAG);
 	}
 }
+
+/*
+ * Expand a vdev if possible.
+ */
+void
+vdev_expand(vdev_t *vd, uint64_t txg)
+{
+	ASSERT(vd->vdev_top == vd);
+	ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
+
+	if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count) {
+		VERIFY(vdev_metaslab_init(vd, txg) == 0);
+		vdev_config_dirty(vd);
+	}
+}
--- a/usr/src/uts/common/fs/zfs/zvol.c	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Mon Jun 08 10:35:50 2009 -0700
@@ -742,6 +742,27 @@
 		}
 	}
 
+	/*
+	 * Generate a LUN expansion event.
+	 */
+	if (error == 0) {
+		sysevent_id_t eid;
+		nvlist_t *attr;
+		char *physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+
+		(void) snprintf(physpath, MAXPATHLEN, "%s%uc", ZVOL_PSEUDO_DEV,
+		    zv->zv_minor);
+
+		VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
+
+		(void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
+		    ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
+
+		nvlist_free(attr);
+		kmem_free(physpath, MAXPATHLEN);
+	}
+
 out:
 	if (state.zv_objset)
 		dmu_objset_close(state.zv_objset);
--- a/usr/src/uts/common/sys/fs/zfs.h	Mon Jun 08 11:43:53 2009 -0400
+++ b/usr/src/uts/common/sys/fs/zfs.h	Mon Jun 08 10:35:50 2009 -0700
@@ -146,6 +146,7 @@
 	ZPOOL_PROP_CACHEFILE,
 	ZPOOL_PROP_FAILUREMODE,
 	ZPOOL_PROP_LISTSNAPS,
+	ZPOOL_PROP_AUTOEXPAND,
 	ZPOOL_NUM_PROPS
 } zpool_prop_t;
 
@@ -538,7 +539,7 @@
 /*
  * And here are the things we need with /dev, etc. in front of them.
  */
-#define	ZVOL_PSEUDO_DEV		"/devices/pseudo/zvol@0:"
+#define	ZVOL_PSEUDO_DEV		"/devices/pseudo/zfs@0:"
 #define	ZVOL_FULL_DEV_DIR	"/dev/" ZVOL_DEV_DIR "/"
 
 #define	ZVOL_PROP_NAME		"name"
@@ -642,6 +643,7 @@
 #define	ZFS_ONLINE_CHECKREMOVE	0x1
 #define	ZFS_ONLINE_UNSPARE	0x2
 #define	ZFS_ONLINE_FORCEFAULT	0x4
+#define	ZFS_ONLINE_EXPAND	0x8
 #define	ZFS_OFFLINE_TEMPORARY	0x1
 
 /*