# HG changeset patch # User maybee # Date 1185248587 25200 # Node ID dc10a713d1a07f67a7994bc04adef332c1604ea6 # Parent 83b15139bb8620277f9504bada6c447b42a38c15 6453407 rm a file when the root file system is at its quota limit reports ENOSPC 6483887 without direct management, arc ghost lists can run amok 6557767 assertion failed: error == 17 || lr->lr_length <= zp->z_blksz 6577156 zfs_putapage discards pages too easily 6581978 assertion failed: koff <= filesz, file: ../../common/fs/zfs/zfs_vnops.c, line: 2834 6582642 cannot set property while quota exceed or really out of space 6583710 assertion failed: new_state->arcs_size + to_delta >= *size (0x5a4cc00 >= 0x5a50a00) diff -r 83b15139bb86 -r dc10a713d1a0 usr/src/uts/common/fs/zfs/arc.c --- a/usr/src/uts/common/fs/zfs/arc.c Mon Jul 23 18:07:13 2007 -0700 +++ b/usr/src/uts/common/fs/zfs/arc.c Mon Jul 23 20:43:07 2007 -0700 @@ -379,6 +379,7 @@ static void arc_get_data_buf(arc_buf_t *buf); static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock); static int arc_evict_needed(arc_buf_contents_t type); +static void arc_evict_ghost(arc_state_t *state, int64_t bytes); #define GHOST_STATE(state) \ ((state) == arc_mru_ghost || (state) == arc_mfu_ghost) @@ -842,7 +843,6 @@ to_delta = ab->b_size; } atomic_add_64(size, to_delta); - ASSERT3U(new_state->arcs_size + to_delta, >=, *size); if (use_mutex) mutex_exit(&new_state->arcs_mtx); @@ -1258,6 +1258,27 @@ if (missed) ARCSTAT_INCR(arcstat_mutex_miss, missed); + /* + * We have just evicted some date into the ghost state, make + * sure we also adjust the ghost state size if necessary. + */ + if (arc_no_grow && + arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size > arc_c) { + int64_t mru_over = arc_anon->arcs_size + arc_mru->arcs_size + + arc_mru_ghost->arcs_size - arc_c; + + if (mru_over > 0 && arc_mru_ghost->arcs_lsize[type] > 0) { + int64_t todelete = + MIN(arc_mru_ghost->arcs_lsize[type], mru_over); + arc_evict_ghost(arc_mru_ghost, todelete); + } else if (arc_mfu_ghost->arcs_lsize[type] > 0) { + int64_t todelete = MIN(arc_mfu_ghost->arcs_lsize[type], + arc_mru_ghost->arcs_size + + arc_mfu_ghost->arcs_size - arc_c); + arc_evict_ghost(arc_mfu_ghost, todelete); + } + } + return (stolen); } diff -r 83b15139bb86 -r dc10a713d1a0 usr/src/uts/common/fs/zfs/dsl_dir.c --- a/usr/src/uts/common/fs/zfs/dsl_dir.c Mon Jul 23 18:07:13 2007 -0700 +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c Mon Jul 23 20:43:07 2007 -0700 @@ -698,6 +698,7 @@ uint64_t est_used, quota, parent_rsrv; int edquot = EDQUOT; int txgidx = txg & TXG_MASK; + boolean_t ismos; int i; struct tempreserve *tr; @@ -713,30 +714,31 @@ for (i = 0; i < TXG_SIZE; i++) est_used += dd->dd_tempreserved[i]; - quota = UINT64_MAX; - - if (dd->dd_phys->dd_quota) + /* + * If this transaction will result in a net free of space, we want + * to let it through. + */ + if (netfree || dd->dd_phys->dd_quota == 0) + quota = UINT64_MAX; + else quota = dd->dd_phys->dd_quota; /* - * If this transaction will result in a net free of space, we want - * to let it through, but we have to be careful: the space that it - * frees won't become available until *after* this txg syncs. - * Therefore, to ensure that it's possible to remove files from - * a full pool without inducing transient overcommits, we throttle + * Adjust the quota against the actual pool size at the root. + * To ensure that it's possible to remove files from a full + * pool without inducing transient overcommits, we throttle * netfree transactions against a quota that is slightly larger, * but still within the pool's allocation slop. In cases where * we're very close to full, this will allow a steady trickle of * removes to get through. */ - if (dd->dd_parent == NULL) { + ismos = (dd->dd_phys->dd_head_dataset_obj == 0); + if (dd->dd_parent == NULL || ismos) { uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); if (poolsize < quota) { quota = poolsize; edquot = ENOSPC; } - } else if (netfree) { - quota = UINT64_MAX; } /* @@ -771,7 +773,7 @@ list_insert_tail(tr_list, tr); /* see if it's OK with our parent */ - if (dd->dd_parent && parent_rsrv) { + if (dd->dd_parent && parent_rsrv && !ismos) { return (dsl_dir_tempreserve_impl(dd->dd_parent, parent_rsrv, netfree, tr_list, tx)); } else { diff -r 83b15139bb86 -r dc10a713d1a0 usr/src/uts/common/fs/zfs/zfs_vnops.c --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c Mon Jul 23 18:07:13 2007 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c Mon Jul 23 20:43:07 2007 -0700 @@ -875,7 +875,8 @@ lr->lr_blkoff = off - boff; error = dmu_sync(zio, db, &lr->lr_blkptr, lr->lr_common.lrc_txg, zfs_get_done, zgd); - ASSERT(error == EEXIST || lr->lr_length <= zp->z_blksz); + ASSERT((error && error != EINPROGRESS) || + lr->lr_length <= zp->z_blksz); if (error == 0) { zil_add_vdev(zfsvfs->z_log, DVA_GET_VDEV(BP_IDENTITY(&lr->lr_blkptr))); @@ -2810,8 +2811,10 @@ rl_t *rl; u_offset_t off, koff; size_t len, klen; + uint64_t filesz; int err; + filesz = zp->z_phys->zp_size; off = pp->p_offset; len = PAGESIZE; /* @@ -2819,9 +2822,7 @@ * muiltiple pages so that we write a full block (thus avoiding * a read-modify-write). */ - if (zp->z_blksz > PAGESIZE) { - uint64_t filesz = zp->z_phys->zp_size; - + if (off < filesz && zp->z_blksz > PAGESIZE) { if (!ISP2(zp->z_blksz)) { /* Only one block in the file. */ klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); @@ -2841,20 +2842,20 @@ /* * Can't push pages past end-of-file. */ - if (off >= zp->z_phys->zp_size) { - /* discard all pages */ - flags |= B_INVAL; + filesz = zp->z_phys->zp_size; + if (off >= filesz) { + /* ignore all pages */ err = 0; goto out; - } else if (off + len > zp->z_phys->zp_size) { - int npages = btopr(zp->z_phys->zp_size - off); + } else if (off + len > filesz) { + int npages = btopr(filesz - off); page_t *trunc; page_list_break(&pp, &trunc, npages); - /* discard pages past end of file */ + /* ignore pages past end of file */ if (trunc) - pvn_write_done(trunc, B_INVAL | flags); - len = zp->z_phys->zp_size - off; + pvn_write_done(trunc, flags); + len = filesz - off; } tx = dmu_tx_create(zfsvfs->z_os); @@ -2890,7 +2891,7 @@ out: zfs_range_unlock(rl); - pvn_write_done(pp, (err ? B_ERROR : 0) | B_WRITE | flags); + pvn_write_done(pp, (err ? B_ERROR : 0) | flags); if (offp) *offp = off; if (lenp)