Mercurial > illumos > illumos-gate
diff usr/src/uts/common/fs/zfs/dmu.c @ 2391:2fa3fd1db808
6447377 ZFS prefetch is inconsistant
author | maybee |
---|---|
date | Tue, 18 Jul 2006 18:09:14 -0700 |
parents | 45affe88ed99 |
children | 7b208a92357b |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/dmu.c Tue Jul 18 04:09:41 2006 -0700 +++ b/usr/src/uts/common/fs/zfs/dmu.c Tue Jul 18 18:09:14 2006 -0700 @@ -147,11 +147,16 @@ return (0); } -int -dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, +/* + * Note: longer-term, we should modify all of the dmu_buf_*() interfaces + * to take a held dnode rather than <os, object> -- the lookup is wasteful, + * and can induce severe lock contention when writing to several files + * whose dnodes are in the same block. + */ +static int +dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) { - dnode_t *dn; dmu_buf_t **dbp; uint64_t blkid, nblks, i; uint32_t flags; @@ -160,21 +165,10 @@ ASSERT(length <= DMU_MAX_ACCESS); - if (length == 0) { - if (numbufsp) - *numbufsp = 0; - *dbpp = NULL; - return (0); - } - flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; if (length > zfetch_array_rd_sz) flags |= DB_RF_NOPREFETCH; - err = dnode_hold(os->os, object, FTAG, &dn); - if (err) - return (err); - rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift) { int blkshift = dn->dn_datablkshift; @@ -193,12 +187,11 @@ if (db == NULL) { rw_exit(&dn->dn_struct_rwlock); dmu_buf_rele_array(dbp, nblks, tag); - dnode_rele(dn, FTAG); zio_nowait(zio); return (EIO); } /* initiate async i/o */ - if (read && db->db_state == DB_UNCACHED) { + if (read) { rw_exit(&dn->dn_struct_rwlock); (void) dbuf_read(db, zio, flags); rw_enter(&dn->dn_struct_rwlock, RW_READER); @@ -206,7 +199,6 @@ dbp[i] = &db->db; } rw_exit(&dn->dn_struct_rwlock); - dnode_rele(dn, FTAG); /* wait for async i/o */ err = zio_wait(zio); @@ -238,6 +230,38 @@ return (0); } +int +dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, + uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os->os, object, FTAG, &dn); + if (err) + return (err); + + err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, + numbufsp, dbpp); + + dnode_rele(dn, FTAG); + + return (err); +} + +int +dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, + uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) +{ + dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; + int err; + + err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, + numbufsp, dbpp); + + return (err); +} + void dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) { @@ -383,6 +407,9 @@ dmu_buf_t **dbp; int numbufs, i; + if (size == 0) + return; + VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG, &numbufs, &dbp)); @@ -424,6 +451,9 @@ int numbufs, i; int err = 0; + if (size == 0) + return (0); + err = dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG, &numbufs, &dbp); if (err) @@ -620,6 +650,7 @@ type != DMU_OT_DNODE && type != DMU_OT_OBJSET) { int blksz = BP_GET_LSIZE(bp); if (data == NULL) { + uint32_t aflags = ARC_WAIT; arc_buf_t *abuf; zbookmark_t zb; @@ -630,7 +661,7 @@ (void) arc_read(NULL, spa, bp, dmu_ot[type].ot_byteswap, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_MUSTSUCCEED, - ARC_WAIT, &zb); + &aflags, &zb); if (abuf) { err = dump_data(ba, type, object, blkid * blksz, @@ -1511,6 +1542,16 @@ * this zio to the parent zio passed in. */ cv_wait(&db->db_changed, &db->db_mtx); + if (!db->db_data_pending && + db->db_blkptr && BP_IS_HOLE(db->db_blkptr)) { + /* + * IO was compressed away + */ + *bp = *db->db_blkptr; /* structure assignment */ + mutex_exit(&db->db_mtx); + txg_resume(dp); + return (0); + } ASSERT(db->db_data_pending || (db->db_blkptr && db->db_blkptr->blk_birth == txg)); }