Mercurial > illumos > illumos-gate
changeset 12772:71326a76a341
6959659 Extending end of file *may* get lost on replay of writes.
author | Neil Perrin <Neil.Perrin@Sun.COM> |
---|---|
date | Tue, 06 Jul 2010 14:50:56 -0600 |
parents | 9e4f2f9983b9 |
children | f0d486d64936 |
files | usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h usr/src/uts/common/fs/zfs/zfs_replay.c usr/src/uts/common/fs/zfs/zfs_vnops.c |
diffstat | 3 files changed, 50 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h Tue Jul 06 11:05:17 2010 -0600 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h Tue Jul 06 14:50:56 2010 -0600 @@ -79,6 +79,7 @@ kmutex_t z_lock; uint64_t z_userquota_obj; uint64_t z_groupquota_obj; + uint64_t z_replay_eof; /* New end of file - replay only */ sa_attr_type_t *z_attr_table; /* SA attr mapping->id */ #define ZFS_OBJ_MTX_SZ 64 kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
--- a/usr/src/uts/common/fs/zfs/zfs_replay.c Tue Jul 06 11:05:17 2010 -0600 +++ b/usr/src/uts/common/fs/zfs/zfs_replay.c Tue Jul 06 14:50:56 2010 -0600 @@ -624,7 +624,7 @@ znode_t *zp; int error; ssize_t resid; - uint64_t orig_eof, eod, offset, length; + uint64_t eod, offset, length; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -642,9 +642,20 @@ offset = lr->lr_offset; length = lr->lr_length; - eod = offset + length; /* end of data for this write */ + eod = offset + length; /* end of data for this write */ - orig_eof = zp->z_size; + /* + * This may be a write from a dmu_sync() for a whole block, + * and may extend beyond the current end of the file. + * We can't just replay what was written for this TX_WRITE as + * a future TX_WRITE2 may extend the eof and the data for that + * write needs to be there. So we write the whole block and + * reduce the eof. This needs to be done within the single dmu + * transaction created within vn_rdwr -> zfs_write. So a possible + * new end of file is passed through in zfsvfs->z_replay_eof + */ + + zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */ /* If it's a dmu_sync() block, write the whole block */ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { @@ -653,23 +664,15 @@ offset -= offset % blocksize; length = blocksize; } + if (zp->z_size < eod) + zfsvfs->z_replay_eof = eod; } error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); - /* - * This may be a write from a dmu_sync() for a whole block, - * and may extend beyond the current end of the file. - * We can't just replay what was written for this TX_WRITE as - * a future TX_WRITE2 may extend the eof and the data for that - * write needs to be there. So we write the whole block and - * reduce the eof. - */ - if (orig_eof < zp->z_size) /* file length grew ? */ - zp->z_size = eod; - VN_RELE(ZTOV(zp)); + zfsvfs->z_replay_eof = 0; /* safety */ return (error); } @@ -693,9 +696,32 @@ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) return (error); +top: end = lr->lr_offset + lr->lr_length; - if (end > zp->z_size) + if (end > zp->z_size) { + dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); + zp->z_size = end; + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + VN_RELE(ZTOV(zp)); + if (error == ERESTART) { + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + dmu_tx_abort(tx); + return (error); + } + (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), + (void *)&zp->z_size, sizeof (uint64_t), tx); + + /* Ensure the replayed seq is updated */ + (void) zil_replaying(zfsvfs->z_log, tx); + + dmu_tx_commit(tx); + } VN_RELE(ZTOV(zp));
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c Tue Jul 06 11:05:17 2010 -0600 +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c Tue Jul 06 14:50:56 2010 -0600 @@ -893,6 +893,14 @@ uio->uio_loffset); ASSERT(error == 0); } + /* + * If we are replaying and eof is non zero then force + * the file size to the specified eof. Note, there's no + * concurrency during replay. + */ + if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) + zp->z_size = zfsvfs->z_replay_eof; + error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);