Mercurial > illumos > illumos-gate
changeset 12450:c77e20e4e046
6938089 dedup-induced latency causes FC initiator logouts/FC port resets
author | George Wilson <George.Wilson@Sun.COM> |
---|---|
date | Wed, 19 May 2010 22:59:13 -0700 |
parents | a87750d92895 |
children | 39b0738596da |
files | usr/src/uts/common/fs/zfs/dbuf.c usr/src/uts/common/fs/zfs/ddt.c usr/src/uts/common/fs/zfs/ddt_zap.c usr/src/uts/common/fs/zfs/dmu_tx.c usr/src/uts/common/fs/zfs/dsl_dataset.c usr/src/uts/common/fs/zfs/spa.c usr/src/uts/common/fs/zfs/sys/ddt.h usr/src/uts/common/fs/zfs/sys/dsl_dataset.h usr/src/uts/common/fs/zfs/sys/zap.h usr/src/uts/common/fs/zfs/sys/zap_impl.h usr/src/uts/common/fs/zfs/sys/zio.h usr/src/uts/common/fs/zfs/zap.c usr/src/uts/common/fs/zfs/zap_micro.c usr/src/uts/common/fs/zfs/zio.c |
diffstat | 14 files changed, 121 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/dbuf.c Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/dbuf.c Wed May 19 22:59:13 2010 -0700 @@ -868,7 +868,7 @@ /* If we don't exist or are in a snapshot, we can't be freed */ if (birth_txg) return (ds == NULL || - dsl_dataset_block_freeable(ds, birth_txg)); + dsl_dataset_block_freeable(ds, db->db_blkptr, birth_txg)); else return (FALSE); } @@ -1725,6 +1725,8 @@ if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { if (bp && !BP_IS_HOLE(bp)) { + int priority = dn->dn_type == DMU_OT_DDT_ZAP ? + ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ; arc_buf_t *pbuf; dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; @@ -1739,7 +1741,7 @@ pbuf = dn->dn_objset->os_phys_buf; (void) dsl_read(NULL, dn->dn_objset->os_spa, - bp, pbuf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, + bp, pbuf, NULL, NULL, priority, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, &zb); } @@ -2033,7 +2035,7 @@ if (db->db_blkptr) res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset, - db->db_blkptr->blk_birth); + db->db_blkptr, db->db_blkptr->blk_birth); return (res); }
--- a/usr/src/uts/common/fs/zfs/ddt.c Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/ddt.c Wed May 19 22:59:13 2010 -0700 @@ -160,6 +160,17 @@ ddt->ddt_object[type][class], dde)); } +static void +ddt_object_prefetch(ddt_t *ddt, enum ddt_type type, enum ddt_class class, + ddt_entry_t *dde) +{ + if (!ddt_object_exists(ddt, type, class)) + return; + + ddt_ops[type]->ddt_op_prefetch(ddt->ddt_os, + ddt->ddt_object[type][class], dde); +} + int ddt_object_update(ddt_t *ddt, enum ddt_type type, enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx) @@ -713,6 +724,30 @@ return (dde); } +void +ddt_prefetch(spa_t *spa, const blkptr_t *bp) +{ + ddt_t *ddt; + ddt_entry_t dde; + + if (!BP_GET_DEDUP(bp)) + return; + + /* + * We remove the DDT once it's empty and only prefetch dedup blocks + * when there are entries in the DDT. Thus no locking is required + * as the DDT can't disappear on us. + */ + ddt = ddt_select(spa, bp); + ddt_key_fill(&dde.dde_key, bp); + + for (enum ddt_type type = 0; type < DDT_TYPES; type++) { + for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { + ddt_object_prefetch(ddt, type, class, &dde); + } + } +} + int ddt_entry_compare(const void *x1, const void *x2) {
--- a/usr/src/uts/common/fs/zfs/ddt_zap.c Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/ddt_zap.c Wed May 19 22:59:13 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/zfs_context.h> @@ -81,6 +80,13 @@ return (0); } +static void +ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde) +{ + (void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key, + DDT_KEY_WORDS); +} + static int ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx) { @@ -143,6 +149,7 @@ ddt_zap_create, ddt_zap_destroy, ddt_zap_lookup, + ddt_zap_prefetch, ddt_zap_update, ddt_zap_remove, ddt_zap_walk,
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/dmu_tx.c Wed May 19 22:59:13 2010 -0700 @@ -195,7 +195,7 @@ } freeable = (bp && (freeable || - dsl_dataset_block_freeable(ds, bp->blk_birth))); + dsl_dataset_block_freeable(ds, bp, bp->blk_birth))); if (freeable) txh->txh_space_tooverwrite += space; @@ -390,7 +390,7 @@ if (dn && dn->dn_dbuf->db_blkptr && dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, - dn->dn_dbuf->db_blkptr->blk_birth)) { + dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) { txh->txh_space_tooverwrite += space; txh->txh_space_tounref += space; } else { @@ -465,7 +465,7 @@ blkptr_t *bp = dn->dn_phys->dn_blkptr; ASSERT3U(blkid + i, <, dn->dn_nblkptr); bp += blkid + i; - if (dsl_dataset_block_freeable(ds, bp->blk_birth)) { + if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) { dprintf_bp(bp, "can free old%s", ""); space += bp_get_dsize(spa, bp); } @@ -550,7 +550,8 @@ bp += blkoff; for (i = 0; i < tochk; i++) { - if (dsl_dataset_block_freeable(ds, bp[i].blk_birth)) { + if (dsl_dataset_block_freeable(ds, &bp[i], + bp[i].blk_birth)) { dprintf_bp(&bp[i], "can free old%s", ""); space += bp_get_dsize(spa, &bp[i]); } @@ -690,6 +691,7 @@ * the size will change between now and the dbuf dirty call. */ if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, + &dn->dn_phys->dn_blkptr[0], dn->dn_phys->dn_blkptr[0].blk_birth)) { txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; } else { @@ -1279,7 +1281,7 @@ txh->txh_space_tounref = 0; } else { if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset, - bp->blk_birth)) + bp, bp->blk_birth)) txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE; else txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Wed May 19 22:59:13 2010 -0700 @@ -40,6 +40,11 @@ #include <sys/zvol.h> #include <sys/dsl_scan.h> +/* + * Enable/disable prefetching of dedup-ed blocks which are going to be freed. + */ +int zfs_dedup_prefetch = 1; + static char *dsl_reaper = "the grim reaper"; static dsl_checkfunc_t dsl_dataset_destroy_begin_check; @@ -234,9 +239,16 @@ } boolean_t -dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) +dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, + uint64_t blk_birth) { - return (blk_birth > dsl_dataset_prev_snap_txg(ds)); + if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) + return (B_FALSE); + + if (zfs_dedup_prefetch && bp && BP_GET_DEDUP(bp)) + ddt_prefetch(dsl_dataset_get_spa(ds), bp); + + return (B_TRUE); } /* ARGSUSED */
--- a/usr/src/uts/common/fs/zfs/spa.c Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/spa.c Wed May 19 22:59:13 2010 -0700 @@ -106,7 +106,7 @@ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, { ZTI_FIX(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, { ZTI_BATCH, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) }, - { ZTI_FIX(10), ZTI_NULL, ZTI_FIX(10), ZTI_NULL }, + { ZTI_FIX(100), ZTI_NULL, ZTI_ONE, ZTI_NULL }, { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, };
--- a/usr/src/uts/common/fs/zfs/sys/ddt.h Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/ddt.h Wed May 19 22:59:13 2010 -0700 @@ -155,6 +155,8 @@ boolean_t prehash); int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx); int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde); + void (*ddt_op_prefetch)(objset_t *os, uint64_t object, + ddt_entry_t *dde); int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx); int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde, @@ -216,6 +218,7 @@ extern void ddt_enter(ddt_t *ddt); extern void ddt_exit(ddt_t *ddt); extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add); +extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp); extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde); extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Wed May 19 22:59:13 2010 -0700 @@ -215,7 +215,8 @@ dmu_tx_t *tx); int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, boolean_t async); -boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth); +boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, + uint64_t blk_birth); uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds); void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
--- a/usr/src/uts/common/fs/zfs/sys/zap.h Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/zap.h Wed May 19 22:59:13 2010 -0700 @@ -197,6 +197,8 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf); int zap_contains(objset_t *ds, uint64_t zapobj, const char *name); +int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints); int zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, uint64_t *towrite, uint64_t *tooverwrite);
--- a/usr/src/uts/common/fs/zfs/sys/zap_impl.h Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/zap_impl.h Wed May 19 22:59:13 2010 -0700 @@ -201,6 +201,7 @@ int fzap_lookup(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers, void *buf, char *realname, int rn_len, boolean_t *normalization_conflictp); +void fzap_prefetch(zap_name_t *zn); int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite, uint64_t *tooverwrite); int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
--- a/usr/src/uts/common/fs/zfs/sys/zio.h Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/zio.h Wed May 19 22:59:13 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _ZIO_H @@ -132,7 +131,8 @@ #define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[8]) #define ZIO_PRIORITY_RESILVER (zio_priority_table[9]) #define ZIO_PRIORITY_SCRUB (zio_priority_table[10]) -#define ZIO_PRIORITY_TABLE_SIZE 11 +#define ZIO_PRIORITY_DDT_PREFETCH (zio_priority_table[11]) +#define ZIO_PRIORITY_TABLE_SIZE 12 #define ZIO_PIPELINE_CONTINUE 0x100 #define ZIO_PIPELINE_STOP 0x101
--- a/usr/src/uts/common/fs/zfs/zap.c Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/zap.c Wed May 19 22:59:13 2010 -0700 @@ -927,6 +927,21 @@ return (err); } +void +fzap_prefetch(zap_name_t *zn) +{ + uint64_t idx, blk; + zap_t *zap = zn->zn_zap; + int bs; + + idx = ZAP_HASH_IDX(zn->zn_hash, + zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + if (zap_idx_to_blk(zap, idx, &blk) != 0) + return; + bs = FZAP_BLOCK_SHIFT(zap); + dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs); +} + /* * Helper functions for consumers. */
--- a/usr/src/uts/common/fs/zfs/zap_micro.c Wed May 19 22:33:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/zap_micro.c Wed May 19 22:59:13 2010 -0700 @@ -812,6 +812,29 @@ } int +zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + + fzap_prefetch(zn); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) {