changeset 10830:dd88d8700b3e

6893087 removing and inserting a faulted vdev clears faulted state 6893088 new ereports need to be generated when clearing a vdev 6893090 clearing a vdev should automatically detach spare 6893099 zpool_clear_label() would be useful
author Eric Schrock <Eric.Schrock@Sun.COM>
date Wed, 21 Oct 2009 16:03:06 -0700
parents ef1d46805087
children e65585ea170c
files usr/src/lib/libzfs/common/libzfs.h usr/src/lib/libzfs/common/libzfs_import.c usr/src/lib/libzfs/common/mapfile-vers usr/src/uts/common/fs/zfs/vdev.c
diffstat 4 files changed, 65 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/lib/libzfs/common/libzfs.h	Wed Oct 21 15:51:07 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs.h	Wed Oct 21 16:03:06 2009 -0700
@@ -581,9 +581,10 @@
     boolean_t *);
 
 /*
- * ftyp special.  Read the label from a given device.
+ * Label manipulation.
  */
 extern int zpool_read_label(int, nvlist_t **);
+extern int zpool_clear_label(int);
 
 /* is this zvol valid for use as a dump device? */
 extern int zvol_check_dump_config(char *);
--- a/usr/src/lib/libzfs/common/libzfs_import.c	Wed Oct 21 15:51:07 2009 -0700
+++ b/usr/src/lib/libzfs/common/libzfs_import.c	Wed Oct 21 16:03:06 2009 -0700
@@ -898,6 +898,36 @@
 }
 
 /*
+ * Given a file descriptor, clear (zero) the label information.  This function
+ * is currently only used in the appliance stack as part of the ZFS sysevent
+ * module.
+ */
+int
+zpool_clear_label(int fd)
+{
+	struct stat64 statbuf;
+	int l;
+	vdev_label_t *label;
+	uint64_t size;
+
+	if (fstat64(fd, &statbuf) == -1)
+		return (0);
+	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+
+	if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
+		return (-1);
+
+	for (l = 0; l < VDEV_LABELS; l++) {
+		if (pwrite64(fd, label, sizeof (vdev_label_t),
+		    label_offset(size, l)) != sizeof (vdev_label_t))
+			return (-1);
+	}
+
+	free(label);
+	return (0);
+}
+
+/*
  * Given a list of directories to search, find all pools stored on disk.  This
  * includes partial pools which are not available to import.  If no args are
  * given (argc is 0), then the default directory (/dev/dsk) is searched.
--- a/usr/src/lib/libzfs/common/mapfile-vers	Wed Oct 21 15:51:07 2009 -0700
+++ b/usr/src/lib/libzfs/common/mapfile-vers	Wed Oct 21 16:03:06 2009 -0700
@@ -151,6 +151,7 @@
 	zfs_userquota_prop_prefixes;
 	zpool_add;
 	zpool_clear;
+	zpool_clear_label;
 	zpool_close;
 	zpool_create;
 	zpool_destroy;
--- a/usr/src/uts/common/fs/zfs/vdev.c	Wed Oct 21 15:51:07 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Wed Oct 21 16:03:06 2009 -0700
@@ -1132,6 +1132,19 @@
 
 	vd->vdev_removed = B_FALSE;
 
+	/*
+	 * Recheck the faulted flag now that we have confirmed that
+	 * the vdev is accessible.  If we're faulted, bail.
+	 */
+	if (vd->vdev_faulted) {
+		ASSERT(vd->vdev_children == 0);
+		ASSERT(vd->vdev_label_aux == VDEV_AUX_ERR_EXCEEDED ||
+		    vd->vdev_label_aux == VDEV_AUX_EXTERNAL);
+		vdev_set_state(vd, B_TRUE, VDEV_STATE_FAULTED,
+		    vd->vdev_label_aux);
+		return (ENXIO);
+	}
+
 	if (vd->vdev_degraded) {
 		ASSERT(vd->vdev_children == 0);
 		vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
@@ -2231,12 +2244,21 @@
 	if (vd->vdev_faulted || vd->vdev_degraded ||
 	    !vdev_readable(vd) || !vdev_writeable(vd)) {
 
+		/*
+		 * When reopening in reponse to a clear event, it may be due to
+		 * a fmadm repair request.  In this case, if the device is
+		 * still broken, we want to still post the ereport again.
+		 */
+		vd->vdev_forcefault = B_TRUE;
+
 		vd->vdev_faulted = vd->vdev_degraded = 0;
 		vd->vdev_cant_read = B_FALSE;
 		vd->vdev_cant_write = B_FALSE;
 
 		vdev_reopen(vd);
 
+		vd->vdev_forcefault = B_FALSE;
+
 		if (vd != rvd)
 			vdev_state_dirty(vd->vdev_top);
 
@@ -2245,6 +2267,16 @@
 
 		spa_event_notify(spa, vd, ESC_ZFS_VDEV_CLEAR);
 	}
+
+	/*
+	 * When clearing a FMA-diagnosed fault, we always want to
+	 * unspare the device, as we assume that the original spare was
+	 * done in response to the FMA fault.
+	 */
+	if (!vdev_is_dead(vd) && vd->vdev_parent != NULL &&
+	    vd->vdev_parent->vdev_ops == &vdev_spare_ops &&
+	    vd->vdev_parent->vdev_child[0] == vd)
+		vd->vdev_unspare = B_TRUE;
 }
 
 boolean_t