changeset 10310:ba87b3315737

PSARC 2009/423 ZFS logbias property 6832481 ZFS separate intent log bypass property
author Neil Perrin <Neil.Perrin@Sun.COM>
date Fri, 14 Aug 2009 11:18:12 -0600
parents 1b8c848f3840
children 539b18426dae
files usr/src/common/zfs/zfs_prop.c usr/src/uts/common/fs/zfs/dmu_objset.c usr/src/uts/common/fs/zfs/sys/dmu.h usr/src/uts/common/fs/zfs/sys/dmu_objset.h usr/src/uts/common/fs/zfs/sys/zil.h usr/src/uts/common/fs/zfs/sys/zil_impl.h usr/src/uts/common/fs/zfs/sys/zio.h usr/src/uts/common/fs/zfs/zfs_log.c usr/src/uts/common/fs/zfs/zil.c usr/src/uts/common/fs/zfs/zio.c usr/src/uts/common/fs/zfs/zvol.c usr/src/uts/common/sys/fs/zfs.h
diffstat 12 files changed, 73 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/common/zfs/zfs_prop.c	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/common/zfs/zfs_prop.c	Fri Aug 14 11:18:12 2009 -0600
@@ -152,6 +152,12 @@
 		{ NULL }
 	};
 
+	static zprop_index_t logbias_table[] = {
+		{ "latency",	ZFS_LOGBIAS_LATENCY },
+		{ "throughput",	ZFS_LOGBIAS_THROUGHPUT },
+		{ NULL }
+	};
+
 	static zprop_index_t canmount_table[] = {
 		{ "off",	ZFS_CANMOUNT_OFF },
 		{ "on",		ZFS_CANMOUNT_ON },
@@ -196,6 +202,9 @@
 	    ZFS_CACHE_ALL, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
 	    "all | none | metadata", "SECONDARYCACHE", cache_table);
+	register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
+	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+	    "latency | throughput", "LOGBIAS", logbias_table);
 
 	/* inherit index (boolean) properties */
 	register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c	Fri Aug 14 11:18:12 2009 -0600
@@ -91,6 +91,12 @@
 	return (ds ? ds->ds_object : 0);
 }
 
+uint64_t
+dmu_objset_logbias(objset_t *os)
+{
+	return (os->os_logbias);
+}
+
 static void
 checksum_changed_cb(void *arg, uint64_t newval)
 {
@@ -159,6 +165,18 @@
 	os->os_secondary_cache = newval;
 }
 
+static void
+logbias_changed_cb(void *arg, uint64_t newval)
+{
+	objset_t *os = arg;
+
+	ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
+	    newval == ZFS_LOGBIAS_THROUGHPUT);
+	os->os_logbias = newval;
+	if (os->os_zil)
+		zil_set_logbias(os->os_zil, newval);
+}
+
 void
 dmu_objset_byteswap(void *buf, size_t size)
 {
@@ -262,6 +280,9 @@
 			if (err == 0)
 				err = dsl_prop_register(ds, "copies",
 				    copies_changed_cb, os);
+			if (err == 0)
+				err = dsl_prop_register(ds, "logbias",
+				    logbias_changed_cb, os);
 		}
 		if (err) {
 			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
@@ -447,6 +468,8 @@
 			    compression_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "copies",
 			    copies_changed_cb, os));
+			VERIFY(0 == dsl_prop_unregister(ds, "logbias",
+			    logbias_changed_cb, os));
 		}
 		VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
 		    primary_cache_changed_cb, os));
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h	Fri Aug 14 11:18:12 2009 -0600
@@ -566,6 +566,7 @@
 extern void dmu_objset_name(objset_t *os, char *buf);
 extern dmu_objset_type_t dmu_objset_type(objset_t *os);
 extern uint64_t dmu_objset_id(objset_t *os);
+extern uint64_t dmu_objset_logbias(objset_t *os);
 extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
 extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h	Fri Aug 14 11:18:12 2009 -0600
@@ -72,6 +72,7 @@
 	uint8_t os_copies;	/* can change, under dsl_dir's locks */
 	uint8_t os_primary_cache;	/* can change, under dsl_dir's locks */
 	uint8_t os_secondary_cache;	/* can change, under dsl_dir's locks */
+	uint8_t os_logbias;	/* can change, under dsl_dir's locks */
 
 	/* no lock needed: */
 	struct dmu_tx *os_synctx; /* XXX sketchy */
--- a/usr/src/uts/common/fs/zfs/sys/zil.h	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zil.h	Fri Aug 14 11:18:12 2009 -0600
@@ -397,6 +397,8 @@
 
 extern void	zil_add_block(zilog_t *zilog, blkptr_t *bp);
 
+extern void	zil_set_logbias(zilog_t *zilog, uint64_t slogval);
+
 extern int zil_disable;
 
 #ifdef	__cplusplus
--- a/usr/src/uts/common/fs/zfs/sys/zil_impl.h	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zil_impl.h	Fri Aug 14 11:18:12 2009 -0600
@@ -83,6 +83,7 @@
 	uint8_t		zl_stop_sync;	/* for debugging */
 	uint8_t		zl_writer;	/* boolean: write setup in progress */
 	uint8_t		zl_log_error;	/* boolean: log write error */
+	uint8_t		zl_logbias;	/* latency or throughput */
 	list_t		zl_itx_list;	/* in-memory itx list */
 	uint64_t	zl_itx_list_sz;	/* total size of records on list */
 	uint64_t	zl_cur_used;	/* current commit log size used */
--- a/usr/src/uts/common/fs/zfs/sys/zio.h	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h	Fri Aug 14 11:18:12 2009 -0600
@@ -383,7 +383,7 @@
     boolean_t labels);
 
 extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
-    blkptr_t *old_bp, uint64_t txg);
+    blkptr_t *old_bp, uint64_t txg, boolean_t bypass_slog);
 extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
 extern void zio_flush(zio_t *zio, vdev_t *vd);
 
--- a/usr/src/uts/common/fs/zfs/zfs_log.c	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zfs_log.c	Fri Aug 14 11:18:12 2009 -0600
@@ -474,14 +474,19 @@
 	itx_wr_state_t write_state;
 	boolean_t slogging;
 	uintptr_t fsync_cnt;
+	ssize_t immediate_write_sz;
 
 	if (zilog == NULL || zp->z_unlinked)
 		return;
 
 	ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
 
-	slogging = spa_has_slogs(zilog->zl_spa);
-	if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz)
+	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+	    ? 0 : zfs_immediate_write_sz;
+
+	slogging = spa_has_slogs(zilog->zl_spa) &&
+	    (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+	if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
 		write_state = WR_INDIRECT;
 	else if (ioflag & (FSYNC | FDSYNC))
 		write_state = WR_COPIED;
--- a/usr/src/uts/common/fs/zfs/zil.c	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zil.c	Fri Aug 14 11:18:12 2009 -0600
@@ -367,7 +367,7 @@
 		}
 
 		error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk,
-		    NULL, txg);
+		    NULL, txg, zilog->zl_logbias != ZFS_LOGBIAS_LATENCY);
 
 		if (error == 0)
 			zil_init_log_chain(zilog, &blk);
@@ -791,7 +791,8 @@
 
 	BP_ZERO(bp);
 	/* pass the old blkptr in order to spread log blocks across devs */
-	error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg);
+	error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg,
+	    zilog->zl_logbias != ZFS_LOGBIAS_LATENCY);
 	if (error) {
 		dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg);
 
@@ -1280,6 +1281,12 @@
 	kmem_cache_destroy(zil_lwb_cache);
 }
 
+void
+zil_set_logbias(zilog_t *zilog, uint64_t logbias)
+{
+	zilog->zl_logbias = logbias;
+}
+
 zilog_t *
 zil_alloc(objset_t *os, zil_header_t *zh_phys)
 {
@@ -1292,6 +1299,7 @@
 	zilog->zl_spa = dmu_objset_spa(os);
 	zilog->zl_dmu_pool = dmu_objset_pool(os);
 	zilog->zl_destroy_txg = TXG_INITIAL - 1;
+	zilog->zl_logbias = dmu_objset_logbias(os);
 
 	mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);
 
--- a/usr/src/uts/common/fs/zfs/zio.c	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zio.c	Fri Aug 14 11:18:12 2009 -0600
@@ -1716,12 +1716,13 @@
  */
 int
 zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp,
-    uint64_t txg)
+    uint64_t txg, boolean_t bypass_slog)
 {
-	int error;
+	int error = 1;
 
-	error = metaslab_alloc(spa, spa->spa_log_class, size,
-	    new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
+	if (!bypass_slog)
+		error = metaslab_alloc(spa, spa->spa_log_class, size,
+		    new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
 
 	if (error)
 		error = metaslab_alloc(spa, spa->spa_normal_class, size,
--- a/usr/src/uts/common/fs/zfs/zvol.c	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/fs/zfs/zvol.c	Fri Aug 14 11:18:12 2009 -0600
@@ -990,6 +990,7 @@
 	uint32_t blocksize = zv->zv_volblocksize;
 	zilog_t *zilog = zv->zv_zilog;
 	boolean_t slogging;
+	ssize_t immediate_write_sz;
 
 	if (zil_disable)
 		return;
@@ -1001,7 +1002,11 @@
 		return;
 	}
 
-	slogging = spa_has_slogs(zilog->zl_spa);
+	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+	    ? 0 : zvol_immediate_write_sz;
+
+	slogging = spa_has_slogs(zilog->zl_spa) &&
+	    (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
 
 	while (resid) {
 		itx_t *itx;
@@ -1013,7 +1018,7 @@
 		 * Unlike zfs_log_write() we can be called with
 		 * upto DMU_MAX_ACCESS/2 (5MB) writes.
 		 */
-		if (blocksize > zvol_immediate_write_sz && !slogging &&
+		if (blocksize > immediate_write_sz && !slogging &&
 		    resid >= blocksize && off % blocksize == 0) {
 			write_state = WR_INDIRECT; /* uses dmu_sync */
 			len = blocksize;
--- a/usr/src/uts/common/sys/fs/zfs.h	Fri Aug 14 09:48:09 2009 -0700
+++ b/usr/src/uts/common/sys/fs/zfs.h	Fri Aug 14 11:18:12 2009 -0600
@@ -116,6 +116,7 @@
 	ZFS_PROP_STMF_SHAREINFO,	/* not exposed to the user */
 	ZFS_PROP_DEFER_DESTROY,
 	ZFS_PROP_USERREFS,
+	ZFS_PROP_LOGBIAS,
 	ZFS_NUM_PROPS
 } zfs_prop_t;
 
@@ -245,6 +246,11 @@
 	ZFS_CANMOUNT_NOAUTO = 2
 } zfs_canmount_type_t;
 
+typedef enum {
+	ZFS_LOGBIAS_LATENCY = 0,
+	ZFS_LOGBIAS_THROUGHPUT = 1
+} zfs_logbias_op_t;
+
 typedef enum zfs_share_op {
 	ZFS_SHARE_NFS = 0,
 	ZFS_UNSHARE_NFS = 1,