Mercurial > illumos > illumos-gate
changeset 13840:97fd5cdf328a
3145 single-copy arc
3212 ztest: race condition between vdev_online() and spa_vdev_remove()
Reviewed by: Matt Ahrens <matthew.ahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Eric Schrock <eric.schrock@delphix.com>
Reviewed by: Justin T. Gibbs <gibbs@scsiguy.com>
Approved by: Eric Schrock <eric.schrock@delphix.com>
author | George Wilson <george.wilson@delphix.com> |
---|---|
date | Thu, 27 Sep 2012 21:55:09 -0700 |
parents | 2d7fbebd2923 |
children | 38a9b4eba5af |
files | usr/src/cmd/mdb/common/modules/zfs/zfs.c usr/src/cmd/ztest/ztest.c usr/src/uts/common/fs/zfs/arc.c usr/src/uts/common/fs/zfs/dbuf.c usr/src/uts/common/fs/zfs/sys/arc.h |
diffstat | 5 files changed, 116 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c Thu Sep 27 13:58:00 2012 -0400 +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c Thu Sep 27 21:55:09 2012 -0700 @@ -905,7 +905,8 @@ const char *suffix; static const char *bytestats[] = { - "p", "c", "c_min", "c_max", "size", NULL + "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size", + NULL }; static const char *extras[] = {
--- a/usr/src/cmd/ztest/ztest.c Thu Sep 27 13:58:00 2012 -0400 +++ b/usr/src/cmd/ztest/ztest.c Thu Sep 27 21:55:09 2012 -0700 @@ -4695,7 +4695,18 @@ if (islog) (void) rw_unlock(&ztest_name_lock); } else { + /* + * Ideally we would like to be able to randomly + * call vdev_[on|off]line without holding locks + * to force unpredictable failures but the side + * effects of vdev_[on|off]line prevent us from + * doing so. We grab the ztest_vdev_lock here to + * prevent a race between injection testing and + * aux_vdev removal. + */ + VERIFY(mutex_lock(&ztest_vdev_lock) == 0); (void) vdev_online(spa, guid0, 0, NULL); + VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); } }
--- a/usr/src/uts/common/fs/zfs/arc.c Thu Sep 27 13:58:00 2012 -0400 +++ b/usr/src/uts/common/fs/zfs/arc.c Thu Sep 27 21:55:09 2012 -0700 @@ -188,6 +188,7 @@ int zfs_arc_grow_retry = 0; int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; +int zfs_disable_dup_eviction = 0; /* * Note that buffers can be in one of 6 states: @@ -290,6 +291,9 @@ kstat_named_t arcstat_l2_size; kstat_named_t arcstat_l2_hdr_size; kstat_named_t arcstat_memory_throttle_count; + kstat_named_t arcstat_duplicate_buffers; + kstat_named_t arcstat_duplicate_buffers_size; + kstat_named_t arcstat_duplicate_reads; } arc_stats_t; static arc_stats_t arc_stats = { @@ -345,7 +349,10 @@ { "l2_io_error", KSTAT_DATA_UINT64 }, { "l2_size", KSTAT_DATA_UINT64 }, { "l2_hdr_size", KSTAT_DATA_UINT64 }, - { "memory_throttle_count", KSTAT_DATA_UINT64 } + { "memory_throttle_count", KSTAT_DATA_UINT64 }, + { "duplicate_buffers", KSTAT_DATA_UINT64 }, + { "duplicate_buffers_size", KSTAT_DATA_UINT64 }, + { "duplicate_reads", KSTAT_DATA_UINT64 } }; #define ARCSTAT(stat) (arc_stats.stat.value.ui64) @@ -1360,6 +1367,17 @@ hdr->b_buf = buf; arc_get_data_buf(buf); bcopy(from->b_data, buf->b_data, size); + + /* + * This buffer already exists in the arc so create a duplicate + * copy for the caller. If the buffer is associated with user data + * then track the size and number of duplicates. These stats will be + * updated as duplicate buffers are created and destroyed. + */ + if (hdr->b_type == ARC_BUFC_DATA) { + ARCSTAT_BUMP(arcstat_duplicate_buffers); + ARCSTAT_INCR(arcstat_duplicate_buffers_size, size); + } hdr->b_datacnt += 1; return (buf); } @@ -1458,6 +1476,16 @@ ASSERT3U(state->arcs_size, >=, size); atomic_add_64(&state->arcs_size, -size); buf->b_data = NULL; + + /* + * If we're destroying a duplicate buffer make sure + * that the appropriate statistics are updated. + */ + if (buf->b_hdr->b_datacnt > 1 && + buf->b_hdr->b_type == ARC_BUFC_DATA) { + ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers); + ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size); + } ASSERT(buf->b_hdr->b_datacnt > 0); buf->b_hdr->b_datacnt -= 1; } @@ -1642,6 +1670,48 @@ } /* + * Called from the DMU to determine if the current buffer should be + * evicted. In order to ensure proper locking, the eviction must be initiated + * from the DMU. Return true if the buffer is associated with user data and + * duplicate buffers still exist. + */ +boolean_t +arc_buf_eviction_needed(arc_buf_t *buf) +{ + arc_buf_hdr_t *hdr; + boolean_t evict_needed = B_FALSE; + + if (zfs_disable_dup_eviction) + return (B_FALSE); + + mutex_enter(&buf->b_evict_lock); + hdr = buf->b_hdr; + if (hdr == NULL) { + /* + * We are in arc_do_user_evicts(); let that function + * perform the eviction. + */ + ASSERT(buf->b_data == NULL); + mutex_exit(&buf->b_evict_lock); + return (B_FALSE); + } else if (buf->b_data == NULL) { + /* + * We have already been added to the arc eviction list; + * recommend eviction. + */ + ASSERT3P(hdr, ==, &arc_eviction_hdr); + mutex_exit(&buf->b_evict_lock); + return (B_TRUE); + } + + if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA) + evict_needed = B_TRUE; + + mutex_exit(&buf->b_evict_lock); + return (evict_needed); +} + +/* * Evict buffers from list until we've removed the specified number of * bytes. Move the removed buffers to the appropriate evict state. * If the recycle flag is set, then attempt to "recycle" a buffer: @@ -2626,8 +2696,10 @@ abuf = buf; for (acb = callback_list; acb; acb = acb->acb_next) { if (acb->acb_done) { - if (abuf == NULL) + if (abuf == NULL) { + ARCSTAT_BUMP(arcstat_duplicate_reads); abuf = arc_buf_clone(buf); + } acb->acb_buf = abuf; abuf = NULL; } @@ -3166,6 +3238,16 @@ ASSERT3U(*size, >=, hdr->b_size); atomic_add_64(size, -hdr->b_size); } + + /* + * We're releasing a duplicate user data buffer, update + * our statistics accordingly. + */ + if (hdr->b_type == ARC_BUFC_DATA) { + ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers); + ARCSTAT_INCR(arcstat_duplicate_buffers_size, + -hdr->b_size); + } hdr->b_datacnt -= 1; arc_cksum_verify(buf); arc_buf_unwatch(buf);
--- a/usr/src/uts/common/fs/zfs/dbuf.c Thu Sep 27 13:58:00 2012 -0400 +++ b/usr/src/uts/common/fs/zfs/dbuf.c Thu Sep 27 21:55:09 2012 -0700 @@ -2089,7 +2089,24 @@ dbuf_evict(db); } else { VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); - if (!DBUF_IS_CACHEABLE(db)) + + /* + * A dbuf will be eligible for eviction if either the + * 'primarycache' property is set or a duplicate + * copy of this buffer is already cached in the arc. + * + * In the case of the 'primarycache' a buffer + * is considered for eviction if it matches the + * criteria set in the property. + * + * To decide if our buffer is considered a + * duplicate, we must call into the arc to determine + * if multiple buffers are referencing the same + * block on-disk. If so, then we simply evict + * ourselves. + */ + if (!DBUF_IS_CACHEABLE(db) || + arc_buf_eviction_needed(db->db_buf)) dbuf_clear(db); else mutex_exit(&db->db_mtx);
--- a/usr/src/uts/common/fs/zfs/sys/arc.h Thu Sep 27 13:58:00 2012 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/arc.h Thu Sep 27 21:55:09 2012 -0700 @@ -99,6 +99,7 @@ int arc_has_callback(arc_buf_t *buf); void arc_buf_freeze(arc_buf_t *buf); void arc_buf_thaw(arc_buf_t *buf); +boolean_t arc_buf_eviction_needed(arc_buf_t *buf); #ifdef ZFS_DEBUG int arc_referenced(arc_buf_t *buf); #endif