Mercurial > illumos > illumos-gate
diff usr/src/uts/common/fs/zfs/dnode.c @ 2445:45c1310316ff
6447381 dnode_free_range() does not handle non-power-of-two blocksizes correctly
6452372 assertion failed: dnp->dn_nlevels == 1
author | ahrens |
---|---|
date | Wed, 26 Jul 2006 09:59:06 -0700 |
parents | 76b439ec3ac1 |
children | c0259887ebbc |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/dnode.c Wed Jul 26 09:24:19 2006 -0700 +++ b/usr/src/uts/common/fs/zfs/dnode.c Wed Jul 26 09:59:06 2006 -0700 @@ -735,7 +735,6 @@ { dmu_buf_impl_t *db, *db_next; int have_db0 = FALSE; - int err = ENOTSUP; if (size == 0) size = SPA_MINBLOCKSIZE; @@ -744,18 +743,17 @@ else size = P2ROUNDUP(size, SPA_MINBLOCKSIZE); - if (ibs == 0) - ibs = dn->dn_indblkshift; + if (ibs == dn->dn_indblkshift) + ibs = 0; - if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && - ibs == dn->dn_indblkshift) + if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0) return (0); rw_enter(&dn->dn_struct_rwlock, RW_WRITER); /* Check for any allocated blocks beyond the first */ if (dn->dn_phys->dn_maxblkid != 0) - goto end; + goto fail; mutex_enter(&dn->dn_dbufs_mtx); for (db = list_head(&dn->dn_dbufs); db; db = db_next) { @@ -765,11 +763,14 @@ have_db0 = TRUE; } else if (db->db_blkid != DB_BONUS_BLKID) { mutex_exit(&dn->dn_dbufs_mtx); - goto end; + goto fail; } } mutex_exit(&dn->dn_dbufs_mtx); + if (ibs && dn->dn_nlevels != 1) + goto fail; + db = NULL; if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) || have_db0) { /* obtain the old block */ @@ -778,18 +779,22 @@ } dnode_setdblksz(dn, size); - dn->dn_indblkshift = ibs; dnode_setdirty(dn, tx); dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size; - dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs; + if (ibs) { + dn->dn_indblkshift = ibs; + dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs; + } if (db) dbuf_rele(db, FTAG); - err = 0; -end: rw_exit(&dn->dn_struct_rwlock); - return (err); + return (0); + +fail: + rw_exit(&dn->dn_struct_rwlock); + return (ENOTSUP); } uint64_t @@ -909,18 +914,15 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) { dmu_buf_impl_t *db; - uint64_t start, objsize, blkid, nblks; - int blkshift, blksz, tail, head, epbs; + uint64_t blkoff, blkid, nblks; + int blksz, head; int trunc = FALSE; rw_enter(&dn->dn_struct_rwlock, RW_WRITER); blksz = dn->dn_datablksz; - blkshift = dn->dn_datablkshift; - epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; /* If the range is past the end of the file, this is a no-op */ - objsize = blksz * (dn->dn_maxblkid+1); - if (off >= objsize) + if (off >= blksz * (dn->dn_maxblkid+1)) goto out; if (len == -1ULL) { len = UINT64_MAX - off; @@ -930,22 +932,24 @@ /* * First, block align the region to free: */ - if (dn->dn_maxblkid == 0) { - if (off == 0) { + if (ISP2(blksz)) { + head = P2NPHASE(off, blksz); + blkoff = P2PHASE(off, blksz); + } else { + ASSERT(dn->dn_maxblkid == 0); + if (off == 0 && len >= blksz) { + /* Freeing the whole block; don't do any head. */ head = 0; } else { + /* Freeing part of the block. */ head = blksz - off; ASSERT3U(head, >, 0); } - start = off; - } else { - ASSERT(ISP2(blksz)); - head = P2NPHASE(off, blksz); - start = P2PHASE(off, blksz); + blkoff = off; } /* zero out any partial block data at the start of the range */ if (head) { - ASSERT3U(start + head, ==, blksz); + ASSERT3U(blkoff + head, ==, blksz); if (len < head) head = len; if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off), TRUE, @@ -959,76 +963,96 @@ dbuf_will_dirty(db, tx); rw_enter(&dn->dn_struct_rwlock, RW_WRITER); data = db->db.db_data; - bzero(data + start, head); + bzero(data + blkoff, head); } dbuf_rele(db, FTAG); } off += head; len -= head; } - /* If the range was less than one block, we are done */ - if (len == 0) - goto out; - /* If the remaining range is past the end of the file, we are done */ - if (off > dn->dn_maxblkid << blkshift) + /* If the range was less than one block, we're done */ + if (len == 0 || off >= blksz * (dn->dn_maxblkid+1)) goto out; - if (off + len == UINT64_MAX) - tail = 0; - else - tail = P2PHASE(len, blksz); + if (!ISP2(blksz)) { + /* + * They are freeing the whole block of a + * non-power-of-two blocksize file. Skip all the messy + * math. + */ + ASSERT3U(off, ==, 0); + ASSERT3U(len, >=, blksz); + blkid = 0; + nblks = 1; + } else { + int tail; + int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + int blkshift = dn->dn_datablkshift; + + /* If the remaining range is past end of file, we're done */ + if (off > dn->dn_maxblkid << blkshift) + goto out; + + if (off + len == UINT64_MAX) + tail = 0; + else + tail = P2PHASE(len, blksz); - ASSERT3U(P2PHASE(off, blksz), ==, 0); - /* zero out any partial block data at the end of the range */ - if (tail) { - if (len < tail) - tail = len; - if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len), - TRUE, FTAG, &db) == 0) { - /* don't dirty if it isn't on disk and isn't dirty */ - if (db->db_dirtied || - (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) { - rw_exit(&dn->dn_struct_rwlock); - dbuf_will_dirty(db, tx); - rw_enter(&dn->dn_struct_rwlock, RW_WRITER); - bzero(db->db.db_data, tail); + ASSERT3U(P2PHASE(off, blksz), ==, 0); + /* zero out any partial block data at the end of the range */ + if (tail) { + if (len < tail) + tail = len; + if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len), + TRUE, FTAG, &db) == 0) { + /* don't dirty if not on disk and not dirty */ + if (db->db_dirtied || + (db->db_blkptr && + !BP_IS_HOLE(db->db_blkptr))) { + rw_exit(&dn->dn_struct_rwlock); + dbuf_will_dirty(db, tx); + rw_enter(&dn->dn_struct_rwlock, + RW_WRITER); + bzero(db->db.db_data, tail); + } + dbuf_rele(db, FTAG); } + len -= tail; + } + /* If the range did not include a full block, we are done */ + if (len == 0) + goto out; + + /* dirty the left indirects */ + if (dn->dn_nlevels > 1 && off != 0) { + db = dbuf_hold_level(dn, 1, + (off - head) >> (blkshift + epbs), FTAG); + dbuf_will_dirty(db, tx); dbuf_rele(db, FTAG); } - len -= tail; - } - /* If the range did not include a full block, we are done */ - if (len == 0) - goto out; - /* dirty the left indirects */ - if (dn->dn_nlevels > 1 && off != 0) { - db = dbuf_hold_level(dn, 1, - (off - head) >> (blkshift + epbs), FTAG); - dbuf_will_dirty(db, tx); - dbuf_rele(db, FTAG); - } + /* dirty the right indirects */ + if (dn->dn_nlevels > 1 && !trunc) { + db = dbuf_hold_level(dn, 1, + (off + len + tail - 1) >> (blkshift + epbs), FTAG); + dbuf_will_dirty(db, tx); + dbuf_rele(db, FTAG); + } - /* dirty the right indirects */ - if (dn->dn_nlevels > 1 && !trunc) { - db = dbuf_hold_level(dn, 1, - (off + len + tail - 1) >> (blkshift + epbs), FTAG); - dbuf_will_dirty(db, tx); - dbuf_rele(db, FTAG); - } + /* + * Finally, add this range to the dnode range list, we + * will finish up this free operation in the syncing phase. + */ + ASSERT(IS_P2ALIGNED(off, 1<<blkshift)); + ASSERT(off + len == UINT64_MAX || + IS_P2ALIGNED(len, 1<<blkshift)); + blkid = off >> blkshift; + nblks = len >> blkshift; - /* - * Finally, add this range to the dnode range list, we - * will finish up this free operation in the syncing phase. - */ - ASSERT(IS_P2ALIGNED(off, 1<<blkshift)); - ASSERT(off + len == UINT64_MAX || IS_P2ALIGNED(len, 1<<blkshift)); - blkid = off >> blkshift; - nblks = len >> blkshift; - - if (trunc) - dn->dn_maxblkid = (blkid ? blkid - 1 : 0); + if (trunc) + dn->dn_maxblkid = (blkid ? blkid - 1 : 0); + } mutex_enter(&dn->dn_mtx); dnode_clear_range(dn, blkid, nblks, tx);