changeset 3697:5340a4d98e0b

6456888 zpool scrubbing leads to memory exhaustion and system hang
author mishra
date Thu, 22 Feb 2007 13:40:56 -0800
parents 374685e62d7a
children e3112ae4a154
files usr/src/uts/common/fs/zfs/spa.c usr/src/uts/common/fs/zfs/sys/spa.h usr/src/uts/common/fs/zfs/sys/spa_impl.h usr/src/uts/common/fs/zfs/sys/vdev_impl.h usr/src/uts/common/fs/zfs/vdev.c usr/src/uts/common/fs/zfs/vdev_queue.c
diffstat 6 files changed, 26 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/spa.c	Thu Feb 22 10:42:15 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/spa.c	Thu Feb 22 13:40:56 2007 -0800
@@ -2179,17 +2179,6 @@
  * ==========================================================================
  */
 
-void
-spa_scrub_throttle(spa_t *spa, int direction)
-{
-	mutex_enter(&spa->spa_scrub_lock);
-	spa->spa_scrub_throttled += direction;
-	ASSERT(spa->spa_scrub_throttled >= 0);
-	if (spa->spa_scrub_throttled == 0)
-		cv_broadcast(&spa->spa_scrub_io_cv);
-	mutex_exit(&spa->spa_scrub_lock);
-}
-
 static void
 spa_scrub_io_done(zio_t *zio)
 {
@@ -2205,10 +2194,12 @@
 		vd->vdev_stat.vs_scrub_errors++;
 		mutex_exit(&vd->vdev_stat_lock);
 	}
-	if (--spa->spa_scrub_inflight == 0) {
+
+	if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight)
 		cv_broadcast(&spa->spa_scrub_io_cv);
-		ASSERT(spa->spa_scrub_throttled == 0);
-	}
+
+	ASSERT(spa->spa_scrub_inflight >= 0);
+
 	mutex_exit(&spa->spa_scrub_lock);
 }
 
@@ -2217,12 +2208,20 @@
     zbookmark_t *zb)
 {
 	size_t size = BP_GET_LSIZE(bp);
-	void *data = zio_data_buf_alloc(size);
+	void *data;
 
 	mutex_enter(&spa->spa_scrub_lock);
+	/*
+	 * Do not give too much work to vdev(s).
+	 */
+	while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) {
+		cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+	}
 	spa->spa_scrub_inflight++;
 	mutex_exit(&spa->spa_scrub_lock);
 
+	data = zio_data_buf_alloc(size);
+
 	if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
 		flags |= ZIO_FLAG_SPECULATIVE;	/* intent log block */
 
@@ -2333,7 +2332,6 @@
 	spa->spa_scrub_errors = 0;
 	spa->spa_scrub_active = 1;
 	ASSERT(spa->spa_scrub_inflight == 0);
-	ASSERT(spa->spa_scrub_throttled == 0);
 
 	while (!spa->spa_scrub_stop) {
 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
@@ -2353,9 +2351,6 @@
 		mutex_enter(&spa->spa_scrub_lock);
 		if (error != EAGAIN)
 			break;
-
-		while (spa->spa_scrub_throttled > 0)
-			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 	}
 
 	while (spa->spa_scrub_inflight)
--- a/usr/src/uts/common/fs/zfs/sys/spa.h	Thu Feb 22 10:42:15 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h	Thu Feb 22 13:40:56 2007 -0800
@@ -355,7 +355,6 @@
 extern void spa_scrub_suspend(spa_t *spa);
 extern void spa_scrub_resume(spa_t *spa);
 extern void spa_scrub_restart(spa_t *spa, uint64_t txg);
-extern void spa_scrub_throttle(spa_t *spa, int direction);
 
 /* spa syncing */
 extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Thu Feb 22 10:42:15 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h	Thu Feb 22 13:40:56 2007 -0800
@@ -111,7 +111,7 @@
 	uint64_t	spa_scrub_mintxg;	/* min txg we'll scrub */
 	uint64_t	spa_scrub_maxtxg;	/* max txg we'll scrub */
 	uint64_t	spa_scrub_inflight;	/* in-flight scrub I/Os */
-	int64_t		spa_scrub_throttled;	/* over-throttle scrub I/Os */
+	uint64_t	spa_scrub_maxinflight;	/* max in-flight scrub I/Os */
 	uint64_t	spa_scrub_errors;	/* scrub I/O error count */
 	int		spa_scrub_suspended;	/* tell scrubber to suspend */
 	kcondvar_t	spa_scrub_cv;		/* scrub thread state change */
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Thu Feb 22 10:42:15 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h	Thu Feb 22 13:40:56 2007 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -97,7 +97,6 @@
 };
 
 struct vdev_queue {
-	uint64_t	vq_scrub_count;
 	avl_tree_t	vq_deadline_tree;
 	avl_tree_t	vq_read_tree;
 	avl_tree_t	vq_write_tree;
--- a/usr/src/uts/common/fs/zfs/vdev.c	Thu Feb 22 10:42:15 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/vdev.c	Thu Feb 22 13:40:56 2007 -0800
@@ -57,6 +57,9 @@
 	NULL
 };
 
+/* maximum scrub/resilver I/O queue */
+int zfs_scrub_limit = 70;
+
 /*
  * Given a vdev type, return the appropriate ops vector.
  */
@@ -194,6 +197,9 @@
 	 */
 	for (; pvd != NULL; pvd = pvd->vdev_parent)
 		pvd->vdev_guid_sum += cvd->vdev_guid_sum;
+
+	if (cvd->vdev_ops->vdev_op_leaf)
+		cvd->vdev_spa->spa_scrub_maxinflight += zfs_scrub_limit;
 }
 
 void
@@ -228,6 +234,9 @@
 	 */
 	for (; pvd != NULL; pvd = pvd->vdev_parent)
 		pvd->vdev_guid_sum -= cvd->vdev_guid_sum;
+
+	if (cvd->vdev_ops->vdev_op_leaf)
+		cvd->vdev_spa->spa_scrub_maxinflight -= zfs_scrub_limit;
 }
 
 /*
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c	Thu Feb 22 10:42:15 2007 -0800
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c	Thu Feb 22 13:40:56 2007 -0800
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -43,9 +43,6 @@
 int zfs_vdev_max_pending = 35;
 int zfs_vdev_min_pending = 4;
 
-/* maximum scrub/resilver I/O queue */
-int zfs_scrub_limit = 70;
-
 /* deadline = pri + (lbolt >> time_shift) */
 int zfs_vdev_time_shift = 6;
 
@@ -129,8 +126,6 @@
 {
 	vdev_queue_t *vq = &vd->vdev_queue;
 
-	ASSERT(vq->vq_scrub_count == 0);
-
 	avl_destroy(&vq->vq_deadline_tree);
 	avl_destroy(&vq->vq_read_tree);
 	avl_destroy(&vq->vq_write_tree);
@@ -144,19 +139,11 @@
 {
 	avl_add(&vq->vq_deadline_tree, zio);
 	avl_add(zio->io_vdev_tree, zio);
-
-	if ((zio->io_flags & ZIO_FLAG_SCRUB_THREAD) &&
-	    ++vq->vq_scrub_count >= zfs_scrub_limit)
-		spa_scrub_throttle(zio->io_spa, 1);
 }
 
 static void
 vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
 {
-	if ((zio->io_flags & ZIO_FLAG_SCRUB_THREAD) &&
-	    vq->vq_scrub_count-- >= zfs_scrub_limit)
-		spa_scrub_throttle(zio->io_spa, -1);
-
 	avl_remove(&vq->vq_deadline_tree, zio);
 	avl_remove(zio->io_vdev_tree, zio);
 }