# HG changeset patch # User Garrett D'Amore # Date 1418230004 28800 # Node ID 014608f1fae082dd34b0772d8a9bfe21f07faab8 # Parent 61550c9ec4128498e1e89b87971c9083c6069fc9# Parent 1d69341f66e991fdb21ebb97d0f8bc43b132aba4 Merge branch 'master' into coremerge Conflicts: usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/boot/bootadm/bootadm.c --- a/usr/src/cmd/boot/bootadm/bootadm.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/boot/bootadm/bootadm.c Wed Dec 10 08:46:44 2014 -0800 @@ -24,7 +24,7 @@ */ /* - * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ /* @@ -121,7 +121,7 @@ #define GRUB_MENU "/boot/grub/menu.lst" #define MENU_TMP "/boot/grub/menu.lst.tmp" #define GRUB_BACKUP_MENU "/etc/lu/GRUB_backup_menu" -#define RAMDISK_SPECIAL "/ramdisk" +#define RAMDISK_SPECIAL "/dev/ramdisk/" #define STUBBOOT "/stubboot" #define MULTIBOOT "/platform/i86pc/multiboot" #define GRUBSIGN_DIR "/boot/grub/bootsign" @@ -3466,7 +3466,8 @@ return (0); } - if (strstr(mnt.mnt_special, RAMDISK_SPECIAL) != NULL) { + if (strncmp(mnt.mnt_special, RAMDISK_SPECIAL, + strlen(RAMDISK_SPECIAL)) == 0) { if (bam_verbose) bam_error(IS_RAMDISK, bam_root); (void) fclose(fp); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/cmd-inet/usr.bin/pppd/options.c --- a/usr/src/cmd/cmd-inet/usr.bin/pppd/options.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/cmd-inet/usr.bin/pppd/options.c Wed Dec 10 08:46:44 2014 -0800 @@ -31,7 +31,6 @@ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#pragma ident "%Z%%M% %I% %E% SMI" #define RCSID "$Id: options.c,v 1.74 2000/04/15 01:27:13 masputra Exp $" #include @@ -1315,9 +1314,10 @@ /* * Store the resulting character for the escape sequence. */ - if (len < MAXWORDLEN-1) + if (len < MAXWORDLEN) { word[len] = value; - ++len; + ++len; + } if (!got) c = getc(f); @@ -1350,9 +1350,10 @@ /* * An ordinary character: store it in the word and get another. */ - if (len < MAXWORDLEN-1) + if (len < MAXWORDLEN) { word[len] = c; - ++len; + ++len; + } c = getc(f); } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/dladm/dladm.c --- a/usr/src/cmd/dladm/dladm.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/dladm/dladm.c Wed Dec 10 08:46:44 2014 -0800 @@ -337,7 +337,8 @@ { "delete-phys", do_delete_phys, " delete-phys " }, { "show-phys", do_show_phys, - " show-phys [-pP] [-o ,..] [-H] []\n"}, + " show-phys [-m | -H | -P] [[-p] [-o [,...]] " + "[]\n" }, { "init-phys", do_init_phys, NULL }, { "show-linkmap", do_show_linkmap, NULL }, { "create-vnic", do_create_vnic, diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/common/modules/zfs/Makefile.zfs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/mdb/common/modules/zfs/Makefile.zfs Wed Dec 10 08:46:44 2014 -0800 @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2014 Spectra Logic Corporation. All rights reserved. +# Use is subject to license terms. +# + +# lint complains about unused inline functions, even though +# they are "inline", not "static inline", with "extern inline" +# implementations provided where needed in libraries and the +# kernel. +LINTFLAGS += -erroff=E_STATIC_UNUSED +LINTFLAGS64 += -erroff=E_STATIC_UNUSED diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/common/modules/zfs/zfs.c --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c Wed Dec 10 08:46:44 2014 -0800 @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -591,6 +591,7 @@ char buf[16*1024]; int verbose = B_FALSE; int four = B_FALSE; + dmu_buf_t l_dbuf; zap_leaf_t l; zap_leaf_phys_t *zlp = (void *)buf; int i; @@ -601,7 +602,8 @@ NULL) != argc) return (DCMD_USAGE); - l.l_phys = zlp; + l_dbuf.db_data = zlp; + l.l_dbuf = &l_dbuf; l.l_bs = 14; /* assume 16k blocks */ if (four) l.l_bs = 12; @@ -958,6 +960,7 @@ static const char *bytestats[] = { "p", "c", "c_min", "c_max", "size", "duplicate_buffers_size", "arc_meta_used", "arc_meta_limit", "arc_meta_max", + "arc_meta_min", "hdr_size", "data_size", "other_size", NULL }; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/intel/amd64/libzpool/Makefile --- a/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile Wed Dec 10 08:46:44 2014 -0800 @@ -33,6 +33,7 @@ include ../../../../Makefile.cmd.64 include ../../Makefile.amd64 include ../../../Makefile.module +include ../../../common/modules/zfs/Makefile.zfs MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/intel/amd64/zfs/Makefile --- a/usr/src/cmd/mdb/intel/amd64/zfs/Makefile Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile Wed Dec 10 08:46:44 2014 -0800 @@ -33,6 +33,7 @@ include ../../../../Makefile.cmd.64 include ../../Makefile.amd64 include ../../../Makefile.module +include ../../../common/modules/zfs/Makefile.zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/intel/ia32/libzpool/Makefile --- a/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile Wed Dec 10 08:46:44 2014 -0800 @@ -32,6 +32,7 @@ include ../../../../Makefile.cmd include ../../Makefile.ia32 include ../../../Makefile.module +include ../../../common/modules/zfs/Makefile.zfs MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/intel/ia32/zfs/Makefile --- a/usr/src/cmd/mdb/intel/ia32/zfs/Makefile Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/mdb/intel/ia32/zfs/Makefile Wed Dec 10 08:46:44 2014 -0800 @@ -32,6 +32,7 @@ include ../../../../Makefile.cmd include ../../Makefile.ia32 include ../../../Makefile.module +include ../../../common/modules/zfs/Makefile.zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/sparc/v7/libzpool/Makefile --- a/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile Wed Dec 10 08:46:44 2014 -0800 @@ -32,6 +32,7 @@ include ../../../../Makefile.cmd include ../../Makefile.sparcv7 include ../../../Makefile.module +include ../../../common/modules/zfs/Makefile.zfs MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/sparc/v9/libzpool/Makefile --- a/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile Wed Dec 10 08:46:44 2014 -0800 @@ -33,6 +33,7 @@ include ../../../../Makefile.cmd.64 include ../../Makefile.sparcv9 include ../../../Makefile.module +include ../../../common/modules/zfs/Makefile.zfs MODSRCS_DIR = ../../../common/modules/zfs GENUNIX_DIR = ../../../common/modules/genunix diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/mdb/sparc/v9/zfs/Makefile --- a/usr/src/cmd/mdb/sparc/v9/zfs/Makefile Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/mdb/sparc/v9/zfs/Makefile Wed Dec 10 08:46:44 2014 -0800 @@ -33,6 +33,7 @@ include ../../../../Makefile.cmd.64 include ../../Makefile.sparcv9 include ../../../Makefile.module +include ../../../common/modules/zfs/Makefile.zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/prstat/prsort.c --- a/usr/src/cmd/prstat/prsort.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/prstat/prsort.c Wed Dec 10 08:46:44 2014 -0800 @@ -22,10 +22,9 @@ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 Joyent, Inc. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -38,10 +37,11 @@ void list_alloc(list_t *list, int size) { - if (size > 0) { - list->l_size = size; + list->l_size = size; + if (size > 0) list->l_ptrs = Zalloc(sizeof (void *) * (size + 1)); - } + else + list->l_ptrs = NULL; } void @@ -208,8 +208,11 @@ void list_sort(list_t *list) { + list->l_used = 0; + if (list->l_size == 0) + return; + (void) memset(list->l_ptrs, 0, sizeof (void *) * list->l_size); - list->l_used = 0; if (list->l_type == LT_LWPS) { lwp_info_t *lwp = list->l_head; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/prstat/prstat.c --- a/usr/src/cmd/prstat/prstat.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/prstat/prstat.c Wed Dec 10 08:46:44 2014 -0800 @@ -367,6 +367,9 @@ double loadavg[3] = {0, 0, 0}; int i, lwpid; + if (list->l_size == 0) + return; + if (foreach_element(&set_tbl, &loadavg, psetloadavg) == 0) { /* * If processor sets aren't specified, we display system-wide @@ -1160,7 +1163,10 @@ return; } if (opts.o_outpmode & OPT_SPLIT) { - n = opts.o_ntop + opts.o_nbottom + 2; + if (opts.o_ntop == 0) + n = opts.o_nbottom + 1; + else + n = opts.o_ntop + opts.o_nbottom + 2; } else { if (opts.o_outpmode & OPT_USERS) n = opts.o_nbottom + 1; @@ -1434,6 +1440,8 @@ opts.o_ntop = Atoi(p); if (p = strtok(NULL, ",")) opts.o_nbottom = Atoi(p); + else if (opts.o_ntop == 0) + opts.o_nbottom = 5; opts.o_outpmode &= ~OPT_FULLSCREEN; break; case 's': @@ -1499,7 +1507,9 @@ if ((opts.o_outpmode & OPT_USERS) && !(opts.o_outpmode & OPT_SPLIT)) opts.o_nbottom = opts.o_ntop; - if (opts.o_ntop == 0 || opts.o_nbottom == 0) + if (!(opts.o_outpmode & OPT_SPLIT) && opts.o_ntop == 0) + Die(gettext("invalid argument for -n\n")); + if (opts.o_nbottom == 0) Die(gettext("invalid argument for -n\n")); if (!(opts.o_outpmode & OPT_SPLIT) && (opts.o_outpmode & OPT_USERS) && ((opts.o_outpmode & (OPT_PSINFO | OPT_MSACCT)))) diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/zdb/Makefile.com --- a/usr/src/cmd/zdb/Makefile.com Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/zdb/Makefile.com Wed Dec 10 08:46:44 2014 -0800 @@ -51,6 +51,12 @@ LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2 LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 +# lint complains about unused inline functions, even though +# they are "inline", not "static inline", with "extern inline" +# implementations and usage in libzpool. +LINTFLAGS += -erroff=E_STATIC_UNUSED +LINTFLAGS64 += -erroff=E_STATIC_UNUSED + .KEEP_STATE: all: $(PROG) diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/zdb/zdb.c --- a/usr/src/cmd/zdb/zdb.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/zdb/zdb.c Wed Dec 10 08:46:44 2014 -0800 @@ -1157,7 +1157,7 @@ print_indirect(bp, zb, dnp); if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { - uint32_t flags = ARC_WAIT; + arc_flags_t flags = ARC_FLAG_WAIT; int i; blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; @@ -1857,8 +1857,8 @@ if (dds.dds_type == DMU_OST_META) { dds.dds_creation_txg = TXG_INITIAL; usedobjs = BP_GET_FILL(os->os_rootbp); - refdbytes = os->os_spa->spa_dsl_pool-> - dp_mos_dir->dd_phys->dd_used_bytes; + refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)-> + dd_used_bytes; } else { dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/zoneadmd/zoneadmd.c --- a/usr/src/cmd/zoneadmd/zoneadmd.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/zoneadmd/zoneadmd.c Wed Dec 10 08:46:44 2014 -0800 @@ -817,6 +817,7 @@ dladm_status_t status; char errmsg[DLADM_STRSIZE]; int err; + boolean_t restart_init; if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) return (-1); @@ -873,6 +874,9 @@ goto bad; } + /* See if this zone's brand should restart init if it dies. */ + restart_init = brand_restartinit(bh); + brand_close(bh); err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, @@ -939,6 +943,12 @@ goto bad; } + if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART, + NULL, 0) == -1) { + zerror(zlogp, B_TRUE, "could not set zone init-no-restart"); + goto bad; + } + /* * Inform zonestatd of a new zone so that it can install a door for * the zone to contact it. diff -r 61550c9ec412 -r 014608f1fae0 usr/src/cmd/ztest/Makefile.com --- a/usr/src/cmd/ztest/Makefile.com Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/cmd/ztest/Makefile.com Wed Dec 10 08:46:44 2014 -0800 @@ -46,6 +46,12 @@ LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2 LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 +# lint complains about unused inline functions, even though +# they are "inline", not "static inline", with "extern inline" +# implementations and usage in libzpool. +LINTFLAGS += -erroff=E_STATIC_UNUSED +LINTFLAGS64 += -erroff=E_STATIC_UNUSED + CERRWARN += -_gcc=-Wno-switch .KEEP_STATE: diff -r 61550c9ec412 -r 014608f1fae0 usr/src/head/grp.h --- a/usr/src/head/grp.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/head/grp.h Wed Dec 10 08:46:44 2014 -0800 @@ -114,9 +114,10 @@ #ifdef __PRAGMA_REDEFINE_EXTNAME #pragma redefine_extname getgrgid_r __posix_getgrgid_r #pragma redefine_extname getgrnam_r __posix_getgrnam_r -extern int getgrgid_r(gid_t, struct group *, char *, int, struct group **); -extern int getgrnam_r(const char *, struct group *, char *, int, - struct group **); +extern int getgrgid_r(gid_t, struct group *, char *, + size_t, struct group **); +extern int getgrnam_r(const char *, struct group *, char *, + size_t, struct group **); #else /* __PRAGMA_REDEFINE_EXTNAME */ extern int __posix_getgrgid_r(gid_t, struct group *, char *, size_t, @@ -132,13 +133,13 @@ #else /* !__lint */ static int -getgrgid_r(gid_t __gid, struct group *__grp, char *__buf, int __len, +getgrgid_r(gid_t __gid, struct group *__grp, char *__buf, size_t __len, struct group **__res) { return (__posix_getgrgid_r(__gid, __grp, __buf, __len, __res)); } static int -getgrnam_r(const char *__cb, struct group *__grp, char *__buf, int __len, +getgrnam_r(const char *__cb, struct group *__grp, char *__buf, size_t __len, struct group **__res) { return (__posix_getgrnam_r(__cb, __grp, __buf, __len, __res)); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/lib/libbrand/common/libbrand.c --- a/usr/src/lib/libbrand/common/libbrand.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/lib/libbrand/common/libbrand.c Wed Dec 10 08:46:44 2014 -0800 @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent, Inc. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ @@ -61,6 +62,7 @@ #define DTD_ELEM_FORCELOGIN_CMD ((const xmlChar *) "forcedlogin_cmd") #define DTD_ELEM_MODNAME ((const xmlChar *) "modname") #define DTD_ELEM_MOUNT ((const xmlChar *) "mount") +#define DTD_ELEM_RESTARTINIT ((const xmlChar *) "restartinit") #define DTD_ELEM_POSTATTACH ((const xmlChar *) "postattach") #define DTD_ELEM_POSTCLONE ((const xmlChar *) "postclone") #define DTD_ELEM_POSTINSTALL ((const xmlChar *) "postinstall") @@ -520,6 +522,21 @@ buf, len, DTD_ELEM_INITNAME, B_FALSE, B_FALSE)); } +boolean_t +brand_restartinit(brand_handle_t bh) +{ + struct brand_handle *bhp = (struct brand_handle *)bh; + char val[80]; + + if (brand_get_value(bhp, NULL, NULL, NULL, NULL, + val, sizeof (val), DTD_ELEM_RESTARTINIT, B_FALSE, B_FALSE) != 0) + return (B_TRUE); + + if (strcmp(val, "false") == 0) + return (B_FALSE); + return (B_TRUE); +} + int brand_get_login_cmd(brand_handle_t bh, const char *username, char *buf, size_t len) diff -r 61550c9ec412 -r 014608f1fae0 usr/src/lib/libbrand/common/libbrand.h --- a/usr/src/lib/libbrand/common/libbrand.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/lib/libbrand/common/libbrand.h Wed Dec 10 08:46:44 2014 -0800 @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent, Inc. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ @@ -60,6 +61,7 @@ extern int brand_get_halt(brand_handle_t, const char *, const char *, char *, size_t); extern int brand_get_initname(brand_handle_t, char *, size_t); +extern boolean_t brand_restartinit(brand_handle_t); extern int brand_get_install(brand_handle_t, const char *, const char *, char *, size_t); extern int brand_get_installopts(brand_handle_t, char *, size_t); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/lib/libbrand/common/mapfile-vers --- a/usr/src/lib/libbrand/common/mapfile-vers Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/lib/libbrand/common/mapfile-vers Wed Dec 10 08:46:44 2014 -0800 @@ -20,6 +20,7 @@ # # # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, Joyent, Inc. All rights reserved. # Copyright 2014 Nexenta Systems, Inc. All rights reserved. # @@ -78,6 +79,7 @@ brand_platform_iter_gmounts; brand_platform_iter_link; brand_platform_iter_mounts; + brand_restartinit; local: *; }; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/lib/libbrand/dtd/brand.dtd.1 --- a/usr/src/lib/libbrand/dtd/brand.dtd.1 Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/lib/libbrand/dtd/brand.dtd.1 Wed Dec 10 08:46:44 2014 -0800 @@ -21,6 +21,7 @@ CDDL HEADER END Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2011, Joyent, Inc. All rights reserved. DO NOT EDIT THIS FILE. @@ -211,6 +212,19 @@ + + + + -ctype->lc_mbsnrtowcs(pwcs, &sp, ULONG_MAX, n, &mbs)); } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/lib/libc/port/locale/strcoll.c --- a/usr/src/lib/libc/port/locale/strcoll.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/lib/libc/port/locale/strcoll.c Wed Dec 10 08:46:44 2014 -0800 @@ -55,9 +55,6 @@ size_t sz1, sz2; const struct lc_collate *lcc = loc->collate; - mbstate_t mbs1 = { 0 }; /* initial states */ - mbstate_t mbs2 = { 0 }; - if (lcc->lc_is_posix) return (strcmp(s1, s2)); @@ -89,10 +86,10 @@ goto error; } - if ((mbsrtowcs_l(w1, &s1, sz1, &mbs1, loc)) == (size_t)-1) + if ((mbstowcs_l(w1, s1, sz1, loc)) == (size_t)-1) goto error; - if ((mbsrtowcs_l(w2, &s2, sz2, &mbs2, loc)) == (size_t)-1) + if ((mbstowcs_l(w2, s2, sz2, loc)) == (size_t)-1) goto error; ret = wcscoll_l(w1, w2, loc); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man1m/dladm.1m --- a/usr/src/man/man1m/dladm.1m Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man1m/dladm.1m Wed Dec 10 08:46:44 2014 -0800 @@ -9,7 +9,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH DLADM 1M "Jul 17, 2014" +.TH DLADM 1M "Dec 03, 2014" .SH NAME dladm \- administer data links .SH SYNOPSIS @@ -22,7 +22,7 @@ .LP .nf \fBdladm delete-phys\fR \fIphys-link\fR -\fBdladm show-phys\fR [\fB-P\fR] [\fB-m\fR] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] [\fB-H\fR] [\fIphys-link\fR] +\fBdladm show-phys\fR [\fB-m\fR | \fB-H\fR | \fB-P\fR] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] [\fIphys-link\fR] .fi .LP @@ -144,7 +144,6 @@ .fi .SH DESCRIPTION -.sp .LP The \fBdladm\fR command is used to administer data-links. A data-link is represented in the system as a \fBSTREAMS DLPI\fR (v2) interface which can be @@ -298,7 +297,6 @@ .RE .SS "Options" -.sp .LP Each \fBdladm\fR subcommand has its own set of options. However, many of the subcommands have the following as a common option: @@ -314,7 +312,6 @@ .RE .SS "SUBCOMMANDS" -.sp .LP The following subcommands are supported: .sp @@ -607,8 +604,8 @@ .sp .ne 2 .na -\fB\fBdladm show-phys\fR [\fB-P\fR] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] -[\fB-H\fR] [\fIphys-link\fR]\fR +\fB\fBdladm show-phys\fR [\fB-m\fR | \fB-H\fR | \fB-P\fR] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] +[\fIphys-link\fR]\fR .ad .sp .6 .RS 4n @@ -680,13 +677,72 @@ .sp .ne 2 .na +\fB\fB-m\fR\fR +.ad +.sp .6 +.RS 4n +Show MAC addresses and related information. Output from \fB-m\fR +displays the following elements: +.sp +.ne 2 +.na +\fB\fBLINK\fR\fR +.ad +.sp .6 +.RS 4n +A physical device corresponding to a NIC driver. +.RE +.sp +.ne 2 +.na +\fB\fBSLOT\fR\fR +.ad +.sp .6 +.RS 4n +When a given physical device has multiple factory MAC addresses, this +indicates the slot of the corresponding MAC address which can be used as +part of a call to \fBcreate-vnic\fR. +.RE +.sp +.ne 2 +.na +\fB\fBADDRESS\fR\fR +.ad +.sp .6 +.RS 4n +Displays the MAC address of the device. +.RE +.sp +.ne 2 +.na +\fB\fBINUSE\fR\fR +.ad +.sp .6 +.RS 4n +Displays whether or not a MAC Address is actively being used. +.RE +.sp +.ne 2 +.na +\fB\fBCLIENT\fR\fR +.ad +.sp .6 +.RS 4n +MAC clients that are using the address. +.RE +.RE +.sp +.ne 2 +.na \fB\fB-o\fR \fIfield\fR, \fB--output\fR=\fIfield\fR\fR .ad .sp .6 .RS 4n A case-insensitive, comma-separated list of output fields to display. The field name must be one of the fields listed below, or the special value \fBall\fR, to -display all fields. For each link, the following fields can be displayed: +display all fields. Note that if either \fB-H\fR or \fB-m\fR are specified, then +the valid options are those described in their respective sections. For each +link, the following fields can be displayed: .sp .ne 2 .na @@ -4347,7 +4403,6 @@ .RE .SS "Parseable Output Format" -.sp .LP Many \fBdladm\fR subcommands have an option that displays output in a machine-parseable format. The output format is one or more lines of colon @@ -4364,7 +4419,6 @@ \fBIFS=:\fR (see \fBEXAMPLES\fR, below). Note that escaping is not done when you request only a single field. .SS "General Link Properties" -.sp .LP The following general link properties are supported: .sp @@ -4621,7 +4675,6 @@ .RE .SS "Wifi Link Properties" -.sp .LP The following \fBWiFi\fR link properties are supported. Note that the ability to set a given property to a given value depends on the driver and hardware. @@ -4674,7 +4727,6 @@ .RE .SS "Ethernet Link Properties" -.sp .LP The following MII Properties, as documented in \fBieee802.3\fR(5), are supported in read-only mode: @@ -4890,7 +4942,6 @@ .RE .SS "IP Tunnel Link Properties" -.sp .LP The following IP tunnel link properties are supported. .sp @@ -5393,7 +5444,6 @@ configured on 6to4 tunnel links. .SH ATTRIBUTES -.sp .LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -5427,13 +5477,11 @@ .TE .SH SEE ALSO -.sp .LP \fBacctadm\fR(1M), \fBautopush\fR(1M), \fBifconfig\fR(1M), \fBipsecconf\fR(1M), \fBndd\fR(1M), \fBpsrset\fR(1M), \fBwpad\fR(1M), \fBzonecfg\fR(1M), \fBattributes\fR(5), \fBieee802.3\fR(5), \fBdlpi\fR(7P) .SH NOTES -.sp .LP The preferred method of referring to an aggregation in the aggregation subcommands is by its link name. Referring to an aggregation by its integer diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man1m/prstat.1m --- a/usr/src/man/man1m/prstat.1m Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man1m/prstat.1m Wed Dec 10 08:46:44 2014 -0800 @@ -4,7 +4,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with .\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH PRSTAT 1M "Apr 15, 2013" +.TH PRSTAT 1M "Nov 14, 2014" .SH NAME prstat \- report active process statistics .SH SYNOPSIS @@ -18,7 +18,6 @@ .fi .SH DESCRIPTION -.sp .LP The \fBprstat\fR utility iteratively examines all active processes on the system and reports statistics based on the selected output mode and sort order. @@ -35,7 +34,6 @@ If you do not specify an option, \fBprstat\fR examines all processes and reports statistics sorted by \fBCPU\fR usage. .SH OPTIONS -.sp .LP The following options are supported: .sp @@ -177,10 +175,10 @@ .RS 4n Restrict number of output lines. The \fIntop\fR argument determines how many lines of process or \fBlwp\fR statistics are reported, and the \fInbottom\fR -argument determines how many lines of user, task, or projects statistics are -reported if the \fB-a\fR, \fB-t\fR, \fB-T\fR, or \fB-J\fR options are -specified. By default, \fBprstat\fR displays as many lines of output that fit -in a window or terminal. When you specify the \fB-c\fR option or direct the +argument determines how many lines of user, task, project or zone statistics +are reported if the \fB-a\fR, \fB-t\fR, \fB-T\fR, \fB-J\fR or \fB-Z\fR options +are specified. By default, \fBprstat\fR displays as many lines of output that +fit in a window or terminal. When you specify the \fB-c\fR option or direct the output to a file, the default values for \fBntop\fR and \fBnbottom\fR are \fB15\fR and \fB5\fR. .RE @@ -404,7 +402,6 @@ .RE .SH OUTPUT -.sp .LP The following list defines the column headings and the meanings of a \fBprstat\fR report: @@ -767,7 +764,6 @@ .RE .SH OPERANDS -.sp .LP The following operands are supported: .sp @@ -842,7 +838,6 @@ .sp .SH EXIT STATUS -.sp .LP The following exit values are returned: .sp @@ -866,14 +861,12 @@ .RE .SH SEE ALSO -.sp .LP \fBdate\fR(1), \fBlgrpinfo\fR(1), \fBplgrp\fR(1), \fBproc\fR(1), \fBps\fR(1), \fBtime\fR(2), \fBpsrinfo\fR(1M), \fBpsrset\fR(1M), \fBsar\fR(1M), \fBpset_getloadavg\fR(3C), \fBproc\fR(4), \fBproject\fR(4), \fBattributes\fR(5), \fBresource_controls\fR(5), \fBzones\fR(5) .SH NOTES -.sp .LP The snapshot of system usage displayed by \fBprstat\fR is true only for a split-second, and it may not be accurate by the time it is displayed. When the diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man2/read.2 --- a/usr/src/man/man2/read.2 Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man2/read.2 Wed Dec 10 08:46:44 2014 -0800 @@ -9,7 +9,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH READ 2 "Sep 13, 2007" +.TH READ 2 "Dec 03, 2014" .SH NAME read, readv, pread \- read from file .SH SYNOPSIS @@ -33,7 +33,6 @@ .fi .SH DESCRIPTION -.sp .LP The \fBread()\fR function attempts to read \fInbyte\fR bytes from the file associated with the open file descriptor, \fIfildes\fR, into the buffer pointed @@ -245,7 +244,6 @@ \fBread()\fR continues to operate normally until the stream head read queue is empty. Thereafter, it returns \fB0\fR. .SS "\fBreadv()\fR" -.sp .LP The \fBreadv()\fR function is equivalent to \fBread()\fR, but places the input data into the \fIiovcnt\fR buffers specified by the members of the \fIiov\fR @@ -273,7 +271,6 @@ Upon successful completion, \fBreadv()\fR marks for update the \fBst_atime\fR field of the file. .SS "\fBpread()\fR" -.sp .LP The \fBpread()\fR function performs the same action as \fBread()\fR, except that it reads from a given position in the file without changing the file @@ -284,13 +281,11 @@ attempt to perform a \fBpread()\fR on a file that is incapable of seeking results in an error. .SH RETURN VALUES -.sp .LP Upon successful completion, \fBread()\fR and \fBreadv()\fR return a non-negative integer indicating the number of bytes actually read. Otherwise, the functions return \fB\(mi1\fR and set \fBerrno\fR to indicate the error. .SH ERRORS -.sp .LP The \fBread()\fR, \fBreadv()\fR, and \fBpread()\fR functions will fail if: .sp @@ -328,6 +323,17 @@ .sp .ne 2 .na +\fB\fBECONNRESET\fR\fR +.ad +.RS 11n +The \fIfiledes\fR argument refers to a connection oriented socket and the +connection was forcibly closed by the peer and is no longer valid. I/O can no +longer be performed to \fIfiledes\fR. +.RE + +.sp +.ne 2 +.na \fB\fBEDEADLK\fR\fR .ad .RS 11n @@ -478,12 +484,10 @@ .RE .SH USAGE -.sp .LP The \fBpread()\fR function has a transitional interface for 64-bit file offsets. See \fBlf64\fR(5). .SH ATTRIBUTES -.sp .LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -503,7 +507,6 @@ .TE .SH SEE ALSO -.sp .LP \fBIntro\fR(2), \fBchmod\fR(2), \fBcreat\fR(2), \fBdup\fR(2), \fBfcntl\fR(2), \fBgetmsg\fR(2), \fBioctl\fR(2), \fBlseek\fR(2), \fBopen\fR(2), \fBpipe\fR(2), diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man2/write.2 --- a/usr/src/man/man2/write.2 Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man2/write.2 Wed Dec 10 08:46:44 2014 -0800 @@ -9,7 +9,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH WRITE 2 "Jan 29, 2008" +.TH WRITE 2 "Dec 03, 2014" .SH NAME write, pwrite, writev \- write on a file .SH SYNOPSIS @@ -34,7 +34,6 @@ .fi .SH DESCRIPTION -.sp .LP The \fBwrite()\fR function attempts to write \fInbyte\fR bytes from the buffer pointed to by \fIbuf\fR to the file associated with the open file descriptor, @@ -258,7 +257,6 @@ \fBerrno\fR does not reflect the result of \fBwrite()\fR or \fBwritev()\fR but reflects the prior error. .SS "\fBpwrite()\fR" -.sp .LP The \fBpwrite()\fR function is equivalent to \fBwrite()\fR, except that it writes into a given position and does not change the file offset (regardless of @@ -266,7 +264,6 @@ the same as \fBwrite()\fR, with the addition of a fourth argument \fIoffset\fR for the desired position inside the file. .SS "\fBwritev()\fR" -.sp .LP The \fBwritev()\fR function performs the same action as \fBwrite()\fR, but gathers the output data from the \fIiovcnt\fR buffers specified by the members @@ -300,7 +297,6 @@ If the sum of the \fBiov_len\fR values is greater than \fBSSIZE_MAX\fR, the operation fails and no data is transferred. .SH RETURN VALUES -.sp .LP Upon successful completion, \fBwrite()\fR returns the number of bytes actually written to the file associated with \fIfildes\fR. This number is never greater @@ -312,7 +308,6 @@ written. Otherwise, it returns \fB\(mi1\fR, the file-pointer remains unchanged, and \fBerrno\fR is set to indicate an error. .SH ERRORS -.sp .LP The \fBwrite()\fR, \fBpwrite()\fR, and \fBwritev()\fR functions will fail if: .sp @@ -340,6 +335,17 @@ .sp .ne 2 .na +\fB\fBECONNRESET\fR\fR +.ad +.RS 11n +The \fIfiledes\fR argument refers to a connection oriented socket and the +connection was forcibly closed by the peer and is no longer valid. I/O can no +longer be performed to \fIfiledes\fR. +.RE + +.sp +.ne 2 +.na \fB\fBEDEADLK\fR\fR .ad .RS 11n @@ -557,12 +563,10 @@ .RE .SH USAGE -.sp .LP The \fBpwrite()\fR function has a transitional interface for 64-bit file offsets. See \fBlf64\fR(5). .SH ATTRIBUTES -.sp .LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -582,7 +586,6 @@ .TE .SH SEE ALSO -.sp .LP \fBIntro\fR(2), \fBchmod\fR(2), \fBcreat\fR(2), \fBdup\fR(2), \fBfcntl\fR(2), \fBgetrlimit\fR(2), \fBioctl\fR(2), \fBlseek\fR(2), \fBopen\fR(2), diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man3socket/connect.3socket --- a/usr/src/man/man3socket/connect.3socket Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man3socket/connect.3socket Wed Dec 10 08:46:44 2014 -0800 @@ -1,11 +1,12 @@ '\" te .\" Copyright (C) 2005, Sun Microsystems, Inc. .\" All Rights Reserved +.\" Copyright (c) 2014, Joyent, Inc. .\" Copyright 1989 AT&T All Rights Reserved .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH CONNECT 3SOCKET "Mar 08, 2005" +.TH CONNECT 3SOCKET "Nov 25, 2014" .SH NAME connect \- initiate a connection on a socket .SH SYNOPSIS @@ -35,6 +36,173 @@ Generally, stream sockets can successfully \fBconnect()\fR only once. Datagram sockets can use \fBconnect()\fR multiple times to change their association. Datagram sockets can dissolve the association by connecting to a null address. +.SS Non-blocking Sockets +When a socket is created, it is by default a \fBblocking\fR socket. A socket may +be configured to be \fBnon-blocking\fR either at socket creation time or through +the use of \fBfcntl\fR(2). When a socket is set to be \fBnon-blocking\fR, a call +to connect initiates an asynchronous connection. If the connection cannot be +completed without blocking, such as when making a TCP connection to a remote +server, then the connection attempt is made in the background and \fBconnect\fR +returns -1 and errno is set to \fBEINPROGRESS\fR. +.LP +Applications can obtain the state of this connection attempt by polling the +socket's file descriptor for \fBPOLLOUT\fR. The event ports facility is the +preferred means of polling on the file descriptor, see \fBport_create\fR(3C) and +\fBport_get\fR(3C) for more information on event ports; however, applications +may also use traditional portable routines like \fBpoll\fR(2) and +\fBselect\fR(3C). +.LP +When an asynchronous connection has completed, the application must call +\fBgetsockopt\fR(3SOCKET) using the macro \fBSOL_SOCKET\fR as the \fIlevel\fR +argument and the macro \fBSO_ERROR\fR as the value of the \fIoption\fR argument. +If the value of the \fBSO_ERROR\fR socket option is zero, then the +connect was successfully established. Otherwise, the connection could not be +established and the value is the corresponding error code that would be commonly +found in \fBerrno\fR. +.LP +Even when a socket is in \fBnon-blocking\fR mode, a call to \fBconnect\fR may +fail synchronously. If any error other \fBEINPROGRESS\fR or \fBEINTR\fR occurs, +then there is no need for the application to poll for asynchronous completion. +Similarly, if a call to \fBconnect\fR returns successfully, then the socket +connection will be established and there is no need to poll for completion. +.SH EXAMPLES +.LP +\fBExample 1\fR Performing an asynchronous connection +.sp +.LP +The following sample C program shows how to create and connect to a remote host +using TCP. The program should be compiled and linked against libnsl and +libsocket. For example, if the contents of this example where in a file called +example.c, one would run cc example.c -lnsl -lsocket. +.sp +.in +2 +.nf +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int +main(int argc, char *argv[]) +{ + char *eptr; + long port; + int sock, ret, eport; + struct sockaddr_in6 sin6; + + if (argc != 3) { + fprintf(stderr, "connect: \\n"); + return (1); + } + + bzero(&sin6, sizeof (struct sockaddr_in6)); + sin6.sin6_family = AF_INET6; + + /* + * Try to parse as an IPv6 address and then try v4. + */ + ret = inet_pton(AF_INET6, argv[1], &sin6.sin6_addr); + if (ret == -1) { + perror("inet_pton"); + return (1); + } else if (ret == 0) { + struct in_addr v4; + ret = inet_pton(AF_INET, argv[1], &v4); + if (ret == -1) { + perror("inet_pton"); + return (1); + } else if (ret == 0) { + fprintf(stderr, "connect: %s is not a valid " + "IPv4 or IPv6 address\\n", argv[1]); + return (1); + } + /* N.B. Not a portable macro */ + IN6_INADDR_TO_V4MAPPED(&v4, &sin6.sin6_addr); + } + + errno = 0; + port = strtol(argv[2], &eptr, 10); + if (errno != 0 || *eptr != '\0') { + fprintf(stderr, "failed to parse port %s\\n", argv[2]); + return (1); + } + if (port <= 0 || port > UINT16_MAX) { + fprintf(stderr, "invalid port: %ld\\n", port); + return (1); + } + sin6.sin6_port = htons(port); + + sock = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (sock < 0) { + perror("socket"); + return (1); + } + + eport = port_create(); + if (eport < 0) { + perror("port_create"); + (void) close(sock); + return (1); + } + + ret = connect(sock, (struct sockaddr *)&sin6, + sizeof (struct sockaddr_in6)); + if (ret != 0 && errno != EINPROGRESS && errno != EINTR) { + perror("connect"); + (void) close(sock); + (void) close(eport); + return (1); + } + + if (ret != 0) { + port_event_t pe; + int err; + socklen_t sz = sizeof (err); + if (port_associate(eport, PORT_SOURCE_FD, sock, POLLOUT, + NULL) != 0) { + perror("port_associate"); + (void) close(sock); + (void) close(eport); + return (1); + } + if (port_get(eport, &pe, NULL) != 0) { + perror("port_get"); + (void) close(sock); + (void) close(eport); + return (1); + } + assert(pe.portev_source == PORT_SOURCE_FD); + assert(pe.portev_object == (uintptr_t)sock); + if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &err, &sz) != 0) { + perror("getsockopt"); + (void) close(sock); + (void) close(eport); + return (1); + } + if (err != 0) { + /* Asynch connect failed */ + fprintf(stderr, "asnchronous connect: %s\\n", + strerror(err)); + (void) close(sock); + (void) close(eport); + return (1); + } + } + + /* Read and write to the socket and then clean up */ + + return (0); +} +.fi +.in -2 .SH RETURN VALUES .LP If the connection or binding succeeds, \fB0\fR is returned. Otherwise, @@ -116,8 +284,7 @@ .ad .RS 17n The socket is non-blocking, and the connection cannot be completed immediately. -You can use \fBselect\fR(3C) to complete the connection by selecting the -socket for writing. +See the section on \fBNon-blocking Sockets\fR for more information. .RE .sp diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man3socket/recv.3socket --- a/usr/src/man/man3socket/recv.3socket Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man3socket/recv.3socket Wed Dec 10 08:46:44 2014 -0800 @@ -4,7 +4,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH RECV 3SOCKET "Aug 20, 2007" +.TH RECV 3SOCKET "Dec 03, 2014" .SH NAME recv, recvfrom, recvmsg \- receive a message from a socket .SH SYNOPSIS @@ -135,6 +135,17 @@ .sp .ne 2 .na +\fB\fBECONNRESET\fR\fR +.ad +.RS 16n +The \fIs\fR argument refers to a connection oriented socket and the connection +was forcibly closed by the peer and is no longer valid. I/O can no longer be +performed to \fIfiledes\fR. +.RE + +.sp +.ne 2 +.na \fB\fBEINVAL\fR\fR .ad .RS 16n diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man3socket/send.3socket --- a/usr/src/man/man3socket/send.3socket Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man3socket/send.3socket Wed Dec 10 08:46:44 2014 -0800 @@ -4,7 +4,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with .\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH SEND 3SOCKET "Aug 31, 2009" +.TH SEND 3SOCKET "Dec 03, 2014" .SH NAME send, sendto, sendmsg \- send a message from a socket .SH SYNOPSIS @@ -104,6 +104,17 @@ .sp .ne 2 .na +\fB\fBECONNRESET\fR\fR +.ad +.RS 16n +The \fIs\fR argument refers to a connection oriented socket and the connection +was forcibly closed by the peer and is no longer valid. I/O can no longer be +performed to \fIfiledes\fR. +.RE + +.sp +.ne 2 +.na \fB\fBEINTR\fR\fR .ad .RS 16n diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man3xnet/recv.3xnet --- a/usr/src/man/man3xnet/recv.3xnet Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man3xnet/recv.3xnet Wed Dec 10 08:46:44 2014 -0800 @@ -7,7 +7,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH RECV 3XNET "Jun 10, 2002" +.TH RECV 3XNET "Dec 03, 2014" .SH NAME recv \- receive a message from a connected socket .SH SYNOPSIS @@ -20,7 +20,6 @@ .fi .SH DESCRIPTION -.sp .LP The \fBrecv()\fR function receives a message from a connection-mode or connectionless-mode socket. It is normally used with connected sockets because @@ -118,7 +117,6 @@ socket's file descriptor, \fBrecv()\fR fails and sets \fBerrno\fR to \fBEAGAIN\fR or \fBEWOULDBLOCK\fR. .SH USAGE -.sp .LP The \fBrecv()\fR function is identical to \fBrecvfrom\fR(3XNET) with a zero \fIaddress_len\fR argument, and to \fBread()\fR if no flags are used. @@ -127,14 +125,12 @@ The \fBselect\fR(3C) and \fBpoll\fR(2) functions can be used to determine when data is available to be received. .SH RETURN VALUES -.sp .LP Upon successful completion, \fBrecv()\fR returns the length of the message in bytes. If no messages are available to be received and the peer has performed an orderly shutdown, \fBrecv()\fR returns 0. Otherwise, -1 is returned and \fBerrno\fR is set to indicate the error. .SH ERRORS -.sp .LP The \fBrecv()\fR function will fail if: .sp @@ -168,7 +164,9 @@ \fB\fBECONNRESET\fR\fR .ad .RS 15n -A connection was forcibly closed by a peer. +The \fIsocket\fR argument refers to a connection oriented socket and the +connection was forcibly closed by the peer and is no longer valid. I/O can no +longer be performed to \fIfiledes\fR. .RE .sp @@ -277,7 +275,6 @@ .RE .SH ATTRIBUTES -.sp .LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -295,7 +292,6 @@ .TE .SH SEE ALSO -.sp .LP \fBpoll\fR(2), \fBrecvmsg\fR(3XNET), \fBrecvfrom\fR(3XNET), \fBselect\fR(3C), \fBsend\fR(3XNET), \fBsendmsg\fR(3XNET), \fBsendto\fR(3XNET), diff -r 61550c9ec412 -r 014608f1fae0 usr/src/man/man3xnet/send.3xnet --- a/usr/src/man/man3xnet/send.3xnet Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/man/man3xnet/send.3xnet Wed Dec 10 08:46:44 2014 -0800 @@ -7,7 +7,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH SEND 3XNET "Nov 1, 2003" +.TH SEND 3XNET "Dec 3, 2014" .SH NAME send \- send a message on a socket .SH SYNOPSIS @@ -20,7 +20,6 @@ .fi .SH PARAMETERS -.sp .ne 2 .na \fB\fIsocket\fR\fR @@ -77,7 +76,6 @@ .RE .SH DESCRIPTION -.sp .LP The \fBsend()\fR function initiates transmission of a message from the specified socket to its peer. The \fBsend()\fR function sends a message only @@ -106,17 +104,14 @@ The socket in use may require the process to have appropriate privileges to use the \fBsend()\fR function. .SH USAGE -.sp .LP The \fBsend()\fR function is identical to \fBsendto\fR(3XNET) with a null pointer \fIdest_len\fR argument, and to \fBwrite()\fR if no flags are used. .SH RETURN VALUES -.sp .LP Upon successful completion, \fBsend()\fR returns the number of bytes sent. Otherwise, \(mi1 is returned and \fBerrno\fR is set to indicate the error. .SH ERRORS -.sp .LP The \fBsend()\fR function will fail if: .sp @@ -148,7 +143,9 @@ \fB\fBECONNRESET\fR\fR .ad .RS 16n -A connection was forcibly closed by a peer. +The \fIsocket\fR argument refers to a connection oriented socket and the +connection was forcibly closed by the peer and is no longer valid. I/O can no +longer be performed to \fIfiledes\fR. .RE .sp @@ -285,7 +282,6 @@ .RE .SH ATTRIBUTES -.sp .LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -303,7 +299,6 @@ .TE .SH SEE ALSO -.sp .LP \fBconnect\fR(3XNET), \fBgetsockopt\fR(3XNET), \fBpoll\fR(2), \fBrecv\fR(3XNET), \fBrecvfrom\fR(3XNET), \fBrecvmsg\fR(3XNET), diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/dev/sdev_zvolops.c --- a/usr/src/uts/common/fs/dev/sdev_zvolops.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/dev/sdev_zvolops.c Wed Dec 10 08:46:44 2014 -0800 @@ -792,7 +792,10 @@ return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); } - ptr = strchr(ptr + 1, '/') + 1; + ptr = strchr(ptr + 1, '/'); + if (ptr == NULL) + return (ENOENT); + ptr++; rw_exit(&sdvp->sdev_contents); sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr); rw_enter(&sdvp->sdev_contents, RW_READER); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/arc.c --- a/usr/src/uts/common/fs/zfs/arc.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/arc.c Wed Dec 10 08:46:44 2014 -0800 @@ -196,6 +196,7 @@ uint64_t zfs_arc_max; uint64_t zfs_arc_min; uint64_t zfs_arc_meta_limit = 0; +uint64_t zfs_arc_meta_min = 0; int zfs_arc_grow_retry = 0; int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; @@ -324,6 +325,7 @@ kstat_named_t arcstat_meta_used; kstat_named_t arcstat_meta_limit; kstat_named_t arcstat_meta_max; + kstat_named_t arcstat_meta_min; } arc_stats_t; static arc_stats_t arc_stats = { @@ -389,7 +391,8 @@ { "duplicate_reads", KSTAT_DATA_UINT64 }, { "arc_meta_used", KSTAT_DATA_UINT64 }, { "arc_meta_limit", KSTAT_DATA_UINT64 }, - { "arc_meta_max", KSTAT_DATA_UINT64 } + { "arc_meta_max", KSTAT_DATA_UINT64 }, + { "arc_meta_min", KSTAT_DATA_UINT64 } }; #define ARCSTAT(stat) (arc_stats.stat.value.ui64) @@ -452,6 +455,7 @@ #define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */ #define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */ #define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */ +#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */ #define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */ #define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */ @@ -496,7 +500,7 @@ arc_buf_hdr_t *b_hash_next; arc_buf_t *b_buf; - uint32_t b_flags; + arc_flags_t b_flags; uint32_t b_datacnt; arc_callback_t *b_acb; @@ -524,50 +528,26 @@ static arc_buf_t *arc_eviction_list; static kmutex_t arc_eviction_mtx; static arc_buf_hdr_t arc_eviction_hdr; -static void arc_get_data_buf(arc_buf_t *buf); -static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock); -static int arc_evict_needed(arc_buf_contents_t type); -static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes); -static void arc_buf_watch(arc_buf_t *buf); - -static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab); #define GHOST_STATE(state) \ ((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \ (state) == arc_l2c_only) -/* - * Private ARC flags. These flags are private ARC only flags that will show up - * in b_flags in the arc_hdr_buf_t. Some flags are publicly declared, and can - * be passed in as arc_flags in things like arc_read. However, these flags - * should never be passed and should only be set by ARC code. When adding new - * public flags, make sure not to smash the private ones. - */ - -#define ARC_IN_HASH_TABLE (1 << 9) /* this buffer is hashed */ -#define ARC_IO_IN_PROGRESS (1 << 10) /* I/O in progress for buf */ -#define ARC_IO_ERROR (1 << 11) /* I/O failed for buf */ -#define ARC_FREED_IN_READ (1 << 12) /* buf freed while in read */ -#define ARC_BUF_AVAILABLE (1 << 13) /* block not in active use */ -#define ARC_INDIRECT (1 << 14) /* this is an indirect block */ -#define ARC_FREE_IN_PROGRESS (1 << 15) /* hdr about to be freed */ -#define ARC_L2_WRITING (1 << 16) /* L2ARC write in progress */ -#define ARC_L2_EVICTED (1 << 17) /* evicted during I/O */ -#define ARC_L2_WRITE_HEAD (1 << 18) /* head of write list */ - -#define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_IN_HASH_TABLE) -#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS) -#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_IO_ERROR) -#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_PREFETCH) -#define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FREED_IN_READ) -#define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_BUF_AVAILABLE) -#define HDR_FREE_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FREE_IN_PROGRESS) -#define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_L2CACHE) -#define HDR_L2_READING(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS && \ - (hdr)->b_l2hdr != NULL) -#define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_L2_WRITING) -#define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_L2_EVICTED) -#define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_L2_WRITE_HEAD) +#define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_FLAG_IN_HASH_TABLE) +#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) +#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_FLAG_IO_ERROR) +#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH) +#define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FLAG_FREED_IN_READ) +#define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_FLAG_BUF_AVAILABLE) +#define HDR_FREE_IN_PROGRESS(hdr) \ + ((hdr)->b_flags & ARC_FLAG_FREE_IN_PROGRESS) +#define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE) +#define HDR_L2_READING(hdr) \ + ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS && \ + (hdr)->b_l2hdr != NULL) +#define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING) +#define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED) +#define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD) /* * Other sizes @@ -699,14 +679,20 @@ static kcondvar_t l2arc_feed_thr_cv; static uint8_t l2arc_thread_exit; -static void l2arc_read_done(zio_t *zio); +static void arc_get_data_buf(arc_buf_t *); +static void arc_access(arc_buf_hdr_t *, kmutex_t *); +static int arc_evict_needed(arc_buf_contents_t); +static void arc_evict_ghost(arc_state_t *, uint64_t, int64_t); +static void arc_buf_watch(arc_buf_t *); + +static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *); +static void l2arc_read_done(zio_t *); static void l2arc_hdr_stat_add(void); static void l2arc_hdr_stat_remove(void); -static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr); -static void l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr, - enum zio_compress c); -static void l2arc_release_cdata_buf(arc_buf_hdr_t *ab); +static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *); +static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress); +static void l2arc_release_cdata_buf(arc_buf_hdr_t *); static uint64_t buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth) @@ -751,14 +737,14 @@ uint64_t birth = BP_PHYSICAL_BIRTH(bp); uint64_t idx = BUF_HASH_INDEX(spa, dva, birth); kmutex_t *hash_lock = BUF_HASH_LOCK(idx); - arc_buf_hdr_t *buf; + arc_buf_hdr_t *hdr; mutex_enter(hash_lock); - for (buf = buf_hash_table.ht_table[idx]; buf != NULL; - buf = buf->b_hash_next) { - if (BUF_EQUAL(spa, dva, birth, buf)) { + for (hdr = buf_hash_table.ht_table[idx]; hdr != NULL; + hdr = hdr->b_hash_next) { + if (BUF_EQUAL(spa, dva, birth, hdr)) { *lockp = hash_lock; - return (buf); + return (hdr); } } mutex_exit(hash_lock); @@ -773,27 +759,27 @@ * Otherwise returns NULL. */ static arc_buf_hdr_t * -buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp) +buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp) { - uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth); + uint64_t idx = BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth); kmutex_t *hash_lock = BUF_HASH_LOCK(idx); - arc_buf_hdr_t *fbuf; + arc_buf_hdr_t *fhdr; uint32_t i; - ASSERT(!DVA_IS_EMPTY(&buf->b_dva)); - ASSERT(buf->b_birth != 0); - ASSERT(!HDR_IN_HASH_TABLE(buf)); + ASSERT(!DVA_IS_EMPTY(&hdr->b_dva)); + ASSERT(hdr->b_birth != 0); + ASSERT(!HDR_IN_HASH_TABLE(hdr)); *lockp = hash_lock; mutex_enter(hash_lock); - for (fbuf = buf_hash_table.ht_table[idx], i = 0; fbuf != NULL; - fbuf = fbuf->b_hash_next, i++) { - if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf)) - return (fbuf); + for (fhdr = buf_hash_table.ht_table[idx], i = 0; fhdr != NULL; + fhdr = fhdr->b_hash_next, i++) { + if (BUF_EQUAL(hdr->b_spa, &hdr->b_dva, hdr->b_birth, fhdr)) + return (fhdr); } - buf->b_hash_next = buf_hash_table.ht_table[idx]; - buf_hash_table.ht_table[idx] = buf; - buf->b_flags |= ARC_IN_HASH_TABLE; + hdr->b_hash_next = buf_hash_table.ht_table[idx]; + buf_hash_table.ht_table[idx] = hdr; + hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE; /* collect some hash table performance data */ if (i > 0) { @@ -811,22 +797,22 @@ } static void -buf_hash_remove(arc_buf_hdr_t *buf) +buf_hash_remove(arc_buf_hdr_t *hdr) { - arc_buf_hdr_t *fbuf, **bufp; - uint64_t idx = BUF_HASH_INDEX(buf->b_spa, &buf->b_dva, buf->b_birth); + arc_buf_hdr_t *fhdr, **hdrp; + uint64_t idx = BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth); ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx))); - ASSERT(HDR_IN_HASH_TABLE(buf)); - - bufp = &buf_hash_table.ht_table[idx]; - while ((fbuf = *bufp) != buf) { - ASSERT(fbuf != NULL); - bufp = &fbuf->b_hash_next; + ASSERT(HDR_IN_HASH_TABLE(hdr)); + + hdrp = &buf_hash_table.ht_table[idx]; + while ((fhdr = *hdrp) != hdr) { + ASSERT(fhdr != NULL); + hdrp = &fhdr->b_hash_next; } - *bufp = buf->b_hash_next; - buf->b_hash_next = NULL; - buf->b_flags &= ~ARC_IN_HASH_TABLE; + *hdrp = hdr->b_hash_next; + hdr->b_hash_next = NULL; + hdr->b_flags &= ~ARC_FLAG_IN_HASH_TABLE; /* collect some hash table performance data */ ARCSTAT_BUMPDOWN(arcstat_hash_elements); @@ -863,12 +849,12 @@ static int hdr_cons(void *vbuf, void *unused, int kmflag) { - arc_buf_hdr_t *buf = vbuf; - - bzero(buf, sizeof (arc_buf_hdr_t)); - refcount_create(&buf->b_refcnt); - cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL); - mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); + arc_buf_hdr_t *hdr = vbuf; + + bzero(hdr, sizeof (arc_buf_hdr_t)); + refcount_create(&hdr->b_refcnt); + cv_init(&hdr->b_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&hdr->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS); return (0); @@ -895,12 +881,12 @@ static void hdr_dest(void *vbuf, void *unused) { - arc_buf_hdr_t *buf = vbuf; - - ASSERT(BUF_EMPTY(buf)); - refcount_destroy(&buf->b_refcnt); - cv_destroy(&buf->b_cv); - mutex_destroy(&buf->b_freeze_lock); + arc_buf_hdr_t *hdr = vbuf; + + ASSERT(BUF_EMPTY(hdr)); + refcount_destroy(&hdr->b_refcnt); + cv_destroy(&hdr->b_cv); + mutex_destroy(&hdr->b_freeze_lock); arc_space_return(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS); } @@ -982,7 +968,7 @@ mutex_enter(&buf->b_hdr->b_freeze_lock); if (buf->b_hdr->b_freeze_cksum == NULL || - (buf->b_hdr->b_flags & ARC_IO_ERROR)) { + (buf->b_hdr->b_flags & ARC_FLAG_IO_ERROR)) { mutex_exit(&buf->b_hdr->b_freeze_lock); return; } @@ -1073,7 +1059,7 @@ if (zfs_flags & ZFS_DEBUG_MODIFY) { if (buf->b_hdr->b_state != arc_anon) panic("modifying non-anon buffer!"); - if (buf->b_hdr->b_flags & ARC_IO_IN_PROGRESS) + if (buf->b_hdr->b_flags & ARC_FLAG_IO_IN_PROGRESS) panic("modifying buffer while i/o in progress!"); arc_cksum_verify(buf); } @@ -1114,54 +1100,54 @@ } static void -add_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) +add_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag) { ASSERT(MUTEX_HELD(hash_lock)); - if ((refcount_add(&ab->b_refcnt, tag) == 1) && - (ab->b_state != arc_anon)) { - uint64_t delta = ab->b_size * ab->b_datacnt; - list_t *list = &ab->b_state->arcs_list[ab->b_type]; - uint64_t *size = &ab->b_state->arcs_lsize[ab->b_type]; - - ASSERT(!MUTEX_HELD(&ab->b_state->arcs_mtx)); - mutex_enter(&ab->b_state->arcs_mtx); - ASSERT(list_link_active(&ab->b_arc_node)); - list_remove(list, ab); - if (GHOST_STATE(ab->b_state)) { - ASSERT0(ab->b_datacnt); - ASSERT3P(ab->b_buf, ==, NULL); - delta = ab->b_size; + if ((refcount_add(&hdr->b_refcnt, tag) == 1) && + (hdr->b_state != arc_anon)) { + uint64_t delta = hdr->b_size * hdr->b_datacnt; + list_t *list = &hdr->b_state->arcs_list[hdr->b_type]; + uint64_t *size = &hdr->b_state->arcs_lsize[hdr->b_type]; + + ASSERT(!MUTEX_HELD(&hdr->b_state->arcs_mtx)); + mutex_enter(&hdr->b_state->arcs_mtx); + ASSERT(list_link_active(&hdr->b_arc_node)); + list_remove(list, hdr); + if (GHOST_STATE(hdr->b_state)) { + ASSERT0(hdr->b_datacnt); + ASSERT3P(hdr->b_buf, ==, NULL); + delta = hdr->b_size; } ASSERT(delta > 0); ASSERT3U(*size, >=, delta); atomic_add_64(size, -delta); - mutex_exit(&ab->b_state->arcs_mtx); + mutex_exit(&hdr->b_state->arcs_mtx); /* remove the prefetch flag if we get a reference */ - if (ab->b_flags & ARC_PREFETCH) - ab->b_flags &= ~ARC_PREFETCH; + if (hdr->b_flags & ARC_FLAG_PREFETCH) + hdr->b_flags &= ~ARC_FLAG_PREFETCH; } } static int -remove_reference(arc_buf_hdr_t *ab, kmutex_t *hash_lock, void *tag) +remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag) { int cnt; - arc_state_t *state = ab->b_state; + arc_state_t *state = hdr->b_state; ASSERT(state == arc_anon || MUTEX_HELD(hash_lock)); ASSERT(!GHOST_STATE(state)); - if (((cnt = refcount_remove(&ab->b_refcnt, tag)) == 0) && + if (((cnt = refcount_remove(&hdr->b_refcnt, tag)) == 0) && (state != arc_anon)) { - uint64_t *size = &state->arcs_lsize[ab->b_type]; + uint64_t *size = &state->arcs_lsize[hdr->b_type]; ASSERT(!MUTEX_HELD(&state->arcs_mtx)); mutex_enter(&state->arcs_mtx); - ASSERT(!list_link_active(&ab->b_arc_node)); - list_insert_head(&state->arcs_list[ab->b_type], ab); - ASSERT(ab->b_datacnt > 0); - atomic_add_64(size, ab->b_size * ab->b_datacnt); + ASSERT(!list_link_active(&hdr->b_arc_node)); + list_insert_head(&state->arcs_list[hdr->b_type], hdr); + ASSERT(hdr->b_datacnt > 0); + atomic_add_64(size, hdr->b_size * hdr->b_datacnt); mutex_exit(&state->arcs_mtx); } return (cnt); @@ -1172,19 +1158,20 @@ * for the buffer must be held by the caller. */ static void -arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *ab, kmutex_t *hash_lock) +arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, + kmutex_t *hash_lock) { - arc_state_t *old_state = ab->b_state; - int64_t refcnt = refcount_count(&ab->b_refcnt); + arc_state_t *old_state = hdr->b_state; + int64_t refcnt = refcount_count(&hdr->b_refcnt); uint64_t from_delta, to_delta; ASSERT(MUTEX_HELD(hash_lock)); ASSERT3P(new_state, !=, old_state); - ASSERT(refcnt == 0 || ab->b_datacnt > 0); - ASSERT(ab->b_datacnt == 0 || !GHOST_STATE(new_state)); - ASSERT(ab->b_datacnt <= 1 || old_state != arc_anon); - - from_delta = to_delta = ab->b_datacnt * ab->b_size; + ASSERT(refcnt == 0 || hdr->b_datacnt > 0); + ASSERT(hdr->b_datacnt == 0 || !GHOST_STATE(new_state)); + ASSERT(hdr->b_datacnt <= 1 || old_state != arc_anon); + + from_delta = to_delta = hdr->b_datacnt * hdr->b_size; /* * If this buffer is evictable, transfer it from the @@ -1193,22 +1180,22 @@ if (refcnt == 0) { if (old_state != arc_anon) { int use_mutex = !MUTEX_HELD(&old_state->arcs_mtx); - uint64_t *size = &old_state->arcs_lsize[ab->b_type]; + uint64_t *size = &old_state->arcs_lsize[hdr->b_type]; if (use_mutex) mutex_enter(&old_state->arcs_mtx); - ASSERT(list_link_active(&ab->b_arc_node)); - list_remove(&old_state->arcs_list[ab->b_type], ab); + ASSERT(list_link_active(&hdr->b_arc_node)); + list_remove(&old_state->arcs_list[hdr->b_type], hdr); /* * If prefetching out of the ghost cache, * we will have a non-zero datacnt. */ - if (GHOST_STATE(old_state) && ab->b_datacnt == 0) { + if (GHOST_STATE(old_state) && hdr->b_datacnt == 0) { /* ghost elements have a ghost size */ - ASSERT(ab->b_buf == NULL); - from_delta = ab->b_size; + ASSERT(hdr->b_buf == NULL); + from_delta = hdr->b_size; } ASSERT3U(*size, >=, from_delta); atomic_add_64(size, -from_delta); @@ -1218,18 +1205,19 @@ } if (new_state != arc_anon) { int use_mutex = !MUTEX_HELD(&new_state->arcs_mtx); - uint64_t *size = &new_state->arcs_lsize[ab->b_type]; + uint64_t *size = &new_state->arcs_lsize[hdr->b_type]; if (use_mutex) mutex_enter(&new_state->arcs_mtx); - list_insert_head(&new_state->arcs_list[ab->b_type], ab); + list_insert_head(&new_state->arcs_list[hdr->b_type], + hdr); /* ghost elements have a ghost size */ if (GHOST_STATE(new_state)) { - ASSERT(ab->b_datacnt == 0); - ASSERT(ab->b_buf == NULL); - to_delta = ab->b_size; + ASSERT(hdr->b_datacnt == 0); + ASSERT(hdr->b_buf == NULL); + to_delta = hdr->b_size; } atomic_add_64(size, to_delta); @@ -1238,9 +1226,9 @@ } } - ASSERT(!BUF_EMPTY(ab)); - if (new_state == arc_anon && HDR_IN_HASH_TABLE(ab)) - buf_hash_remove(ab); + ASSERT(!BUF_EMPTY(hdr)); + if (new_state == arc_anon && HDR_IN_HASH_TABLE(hdr)) + buf_hash_remove(hdr); /* adjust state sizes */ if (to_delta) @@ -1249,7 +1237,7 @@ ASSERT3U(old_state->arcs_size, >=, from_delta); atomic_add_64(&old_state->arcs_size, -from_delta); } - ab->b_state = new_state; + hdr->b_state = new_state; /* adjust l2arc hdr stats */ if (new_state == arc_l2c_only) @@ -1451,7 +1439,7 @@ arc_access(hdr, hash_lock); mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_hits); - ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH), + ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH), demand, prefetch, hdr->b_type != ARC_BUFC_METADATA, data, metadata, hits); } @@ -1652,7 +1640,7 @@ } else { ASSERT(buf == hdr->b_buf); ASSERT(buf->b_efunc == NULL); - hdr->b_flags |= ARC_BUF_AVAILABLE; + hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE; } mutex_exit(hash_lock); } else if (HDR_IO_IN_PROGRESS(hdr)) { @@ -1703,7 +1691,7 @@ } else if (no_callback) { ASSERT(hdr->b_buf == buf && buf->b_next == NULL); ASSERT(buf->b_efunc == NULL); - hdr->b_flags |= ARC_BUF_AVAILABLE; + hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE; } ASSERT(no_callback || hdr->b_datacnt > 1 || refcount_is_zero(&hdr->b_refcnt)); @@ -1778,8 +1766,7 @@ { arc_state_t *evicted_state; uint64_t bytes_evicted = 0, skipped = 0, missed = 0; - arc_buf_hdr_t *ab, *ab_prev = NULL; - list_t *list = &state->arcs_list[type]; + arc_buf_hdr_t *hdr, *hdr_prev = NULL; kmutex_t *hash_lock; boolean_t have_lock; void *stolen = NULL; @@ -1793,24 +1780,68 @@ mutex_enter(&state->arcs_mtx); mutex_enter(&evicted_state->arcs_mtx); - for (ab = list_tail(list); ab; ab = ab_prev) { - ab_prev = list_prev(list, ab); + /* + * Decide which "type" (data vs metadata) to recycle from. + * + * If we are over the metadata limit, recycle from metadata. + * If we are under the metadata minimum, recycle from data. + * Otherwise, recycle from whichever type has the oldest (least + * recently accessed) header. + */ + if (recycle) { + arc_buf_hdr_t *data_hdr = + list_tail(&state->arcs_list[ARC_BUFC_DATA]); + arc_buf_hdr_t *metadata_hdr = + list_tail(&state->arcs_list[ARC_BUFC_METADATA]); + arc_buf_contents_t realtype; + if (data_hdr == NULL) { + realtype = ARC_BUFC_METADATA; + } else if (metadata_hdr == NULL) { + realtype = ARC_BUFC_DATA; + } else if (arc_meta_used >= arc_meta_limit) { + realtype = ARC_BUFC_METADATA; + } else if (arc_meta_used <= arc_meta_min) { + realtype = ARC_BUFC_DATA; + } else { + if (data_hdr->b_arc_access < + metadata_hdr->b_arc_access) { + realtype = ARC_BUFC_DATA; + } else { + realtype = ARC_BUFC_METADATA; + } + } + if (realtype != type) { + /* + * If we want to evict from a different list, + * we can not recycle, because DATA vs METADATA + * buffers are segregated into different kmem + * caches (and vmem arenas). + */ + type = realtype; + recycle = B_FALSE; + } + } + + list_t *list = &state->arcs_list[type]; + + for (hdr = list_tail(list); hdr; hdr = hdr_prev) { + hdr_prev = list_prev(list, hdr); /* prefetch buffers have a minimum lifespan */ - if (HDR_IO_IN_PROGRESS(ab) || - (spa && ab->b_spa != spa) || - (ab->b_flags & (ARC_PREFETCH|ARC_INDIRECT) && - ddi_get_lbolt() - ab->b_arc_access < + if (HDR_IO_IN_PROGRESS(hdr) || + (spa && hdr->b_spa != spa) || + (hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT) && + ddi_get_lbolt() - hdr->b_arc_access < arc_min_prefetch_lifespan)) { skipped++; continue; } /* "lookahead" for better eviction candidate */ - if (recycle && ab->b_size != bytes && - ab_prev && ab_prev->b_size == bytes) + if (recycle && hdr->b_size != bytes && + hdr_prev && hdr_prev->b_size == bytes) continue; /* ignore markers */ - if (ab->b_spa == 0) + if (hdr->b_spa == 0) continue; /* @@ -1823,34 +1854,34 @@ * the hot code path, so don't sleep. */ if (!recycle && count++ > arc_evict_iterations) { - list_insert_after(list, ab, &marker); + list_insert_after(list, hdr, &marker); mutex_exit(&evicted_state->arcs_mtx); mutex_exit(&state->arcs_mtx); kpreempt(KPREEMPT_SYNC); mutex_enter(&state->arcs_mtx); mutex_enter(&evicted_state->arcs_mtx); - ab_prev = list_prev(list, &marker); + hdr_prev = list_prev(list, &marker); list_remove(list, &marker); count = 0; continue; } - hash_lock = HDR_LOCK(ab); + hash_lock = HDR_LOCK(hdr); have_lock = MUTEX_HELD(hash_lock); if (have_lock || mutex_tryenter(hash_lock)) { - ASSERT0(refcount_count(&ab->b_refcnt)); - ASSERT(ab->b_datacnt > 0); - while (ab->b_buf) { - arc_buf_t *buf = ab->b_buf; + ASSERT0(refcount_count(&hdr->b_refcnt)); + ASSERT(hdr->b_datacnt > 0); + while (hdr->b_buf) { + arc_buf_t *buf = hdr->b_buf; if (!mutex_tryenter(&buf->b_evict_lock)) { missed += 1; break; } if (buf->b_data) { - bytes_evicted += ab->b_size; - if (recycle && ab->b_type == type && - ab->b_size == bytes && - !HDR_L2_WRITING(ab)) { + bytes_evicted += hdr->b_size; + if (recycle && hdr->b_type == type && + hdr->b_size == bytes && + !HDR_L2_WRITING(hdr)) { stolen = buf->b_data; recycle = FALSE; } @@ -1859,7 +1890,7 @@ mutex_enter(&arc_eviction_mtx); arc_buf_destroy(buf, buf->b_data == stolen, FALSE); - ab->b_buf = buf->b_next; + hdr->b_buf = buf->b_next; buf->b_hdr = &arc_eviction_hdr; buf->b_next = arc_eviction_list; arc_eviction_list = buf; @@ -1872,26 +1903,26 @@ } } - if (ab->b_l2hdr) { + if (hdr->b_l2hdr) { ARCSTAT_INCR(arcstat_evict_l2_cached, - ab->b_size); + hdr->b_size); } else { - if (l2arc_write_eligible(ab->b_spa, ab)) { + if (l2arc_write_eligible(hdr->b_spa, hdr)) { ARCSTAT_INCR(arcstat_evict_l2_eligible, - ab->b_size); + hdr->b_size); } else { ARCSTAT_INCR( arcstat_evict_l2_ineligible, - ab->b_size); + hdr->b_size); } } - if (ab->b_datacnt == 0) { - arc_change_state(evicted_state, ab, hash_lock); - ASSERT(HDR_IN_HASH_TABLE(ab)); - ab->b_flags |= ARC_IN_HASH_TABLE; - ab->b_flags &= ~ARC_BUF_AVAILABLE; - DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, ab); + if (hdr->b_datacnt == 0) { + arc_change_state(evicted_state, hdr, hash_lock); + ASSERT(HDR_IN_HASH_TABLE(hdr)); + hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE; + hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE; + DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, hdr); } if (!have_lock) mutex_exit(hash_lock); @@ -1932,7 +1963,7 @@ static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes) { - arc_buf_hdr_t *ab, *ab_prev; + arc_buf_hdr_t *hdr, *hdr_prev; arc_buf_hdr_t marker = { 0 }; list_t *list = &state->arcs_list[ARC_BUFC_DATA]; kmutex_t *hash_lock; @@ -1943,18 +1974,18 @@ ASSERT(GHOST_STATE(state)); top: mutex_enter(&state->arcs_mtx); - for (ab = list_tail(list); ab; ab = ab_prev) { - ab_prev = list_prev(list, ab); - if (ab->b_type > ARC_BUFC_NUMTYPES) - panic("invalid ab=%p", (void *)ab); - if (spa && ab->b_spa != spa) + for (hdr = list_tail(list); hdr; hdr = hdr_prev) { + hdr_prev = list_prev(list, hdr); + if (hdr->b_type > ARC_BUFC_NUMTYPES) + panic("invalid hdr=%p", (void *)hdr); + if (spa && hdr->b_spa != spa) continue; /* ignore markers */ - if (ab->b_spa == 0) + if (hdr->b_spa == 0) continue; - hash_lock = HDR_LOCK(ab); + hash_lock = HDR_LOCK(hdr); /* caller may be trying to modify this buffer, skip it */ if (MUTEX_HELD(hash_lock)) continue; @@ -1966,35 +1997,35 @@ * before reacquiring the lock. */ if (count++ > arc_evict_iterations) { - list_insert_after(list, ab, &marker); + list_insert_after(list, hdr, &marker); mutex_exit(&state->arcs_mtx); kpreempt(KPREEMPT_SYNC); mutex_enter(&state->arcs_mtx); - ab_prev = list_prev(list, &marker); + hdr_prev = list_prev(list, &marker); list_remove(list, &marker); count = 0; continue; } if (mutex_tryenter(hash_lock)) { - ASSERT(!HDR_IO_IN_PROGRESS(ab)); - ASSERT(ab->b_buf == NULL); + ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + ASSERT(hdr->b_buf == NULL); ARCSTAT_BUMP(arcstat_deleted); - bytes_deleted += ab->b_size; - - if (ab->b_l2hdr != NULL) { + bytes_deleted += hdr->b_size; + + if (hdr->b_l2hdr != NULL) { /* * This buffer is cached on the 2nd Level ARC; * don't destroy the header. */ - arc_change_state(arc_l2c_only, ab, hash_lock); + arc_change_state(arc_l2c_only, hdr, hash_lock); mutex_exit(hash_lock); } else { - arc_change_state(arc_anon, ab, hash_lock); + arc_change_state(arc_anon, hdr, hash_lock); mutex_exit(hash_lock); - arc_hdr_destroy(ab); + arc_hdr_destroy(hdr); } - DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, ab); + DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr); if (bytes >= 0 && bytes_deleted >= bytes) break; } else if (bytes < 0) { @@ -2003,12 +2034,12 @@ * hash lock to become available. Once its * available, restart from where we left off. */ - list_insert_after(list, ab, &marker); + list_insert_after(list, hdr, &marker); mutex_exit(&state->arcs_mtx); mutex_enter(hash_lock); mutex_exit(hash_lock); mutex_enter(&state->arcs_mtx); - ab_prev = list_prev(list, &marker); + hdr_prev = list_prev(list, &marker); list_remove(list, &marker); } else { bufs_skipped += 1; @@ -2524,7 +2555,8 @@ * will end up on the mru list; so steal space from there. */ if (state == arc_mfu_ghost) - state = buf->b_hdr->b_flags & ARC_PREFETCH ? arc_mru : arc_mfu; + state = buf->b_hdr->b_flags & ARC_FLAG_PREFETCH ? + arc_mru : arc_mfu; else if (state == arc_mru_ghost) state = arc_mru; @@ -2579,25 +2611,25 @@ * NOTE: the hash lock is dropped in this function. */ static void -arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock) +arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) { clock_t now; ASSERT(MUTEX_HELD(hash_lock)); - if (buf->b_state == arc_anon) { + if (hdr->b_state == arc_anon) { /* * This buffer is not in the cache, and does not * appear in our "ghost" list. Add the new buffer * to the MRU state. */ - ASSERT(buf->b_arc_access == 0); - buf->b_arc_access = ddi_get_lbolt(); - DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf); - arc_change_state(arc_mru, buf, hash_lock); - - } else if (buf->b_state == arc_mru) { + ASSERT(hdr->b_arc_access == 0); + hdr->b_arc_access = ddi_get_lbolt(); + DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); + arc_change_state(arc_mru, hdr, hash_lock); + + } else if (hdr->b_state == arc_mru) { now = ddi_get_lbolt(); /* @@ -2608,14 +2640,14 @@ * - move the buffer to the head of the list if this is * another prefetch (to make it less likely to be evicted). */ - if ((buf->b_flags & ARC_PREFETCH) != 0) { - if (refcount_count(&buf->b_refcnt) == 0) { - ASSERT(list_link_active(&buf->b_arc_node)); + if ((hdr->b_flags & ARC_FLAG_PREFETCH) != 0) { + if (refcount_count(&hdr->b_refcnt) == 0) { + ASSERT(list_link_active(&hdr->b_arc_node)); } else { - buf->b_flags &= ~ARC_PREFETCH; + hdr->b_flags &= ~ARC_FLAG_PREFETCH; ARCSTAT_BUMP(arcstat_mru_hits); } - buf->b_arc_access = now; + hdr->b_arc_access = now; return; } @@ -2624,18 +2656,18 @@ * but it is still in the cache. Move it to the MFU * state. */ - if (now > buf->b_arc_access + ARC_MINTIME) { + if (now > hdr->b_arc_access + ARC_MINTIME) { /* * More than 125ms have passed since we * instantiated this buffer. Move it to the * most frequently used state. */ - buf->b_arc_access = now; - DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); - arc_change_state(arc_mfu, buf, hash_lock); + hdr->b_arc_access = now; + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); + arc_change_state(arc_mfu, hdr, hash_lock); } ARCSTAT_BUMP(arcstat_mru_hits); - } else if (buf->b_state == arc_mru_ghost) { + } else if (hdr->b_state == arc_mru_ghost) { arc_state_t *new_state; /* * This buffer has been "accessed" recently, but @@ -2643,21 +2675,21 @@ * MFU state. */ - if (buf->b_flags & ARC_PREFETCH) { + if (hdr->b_flags & ARC_FLAG_PREFETCH) { new_state = arc_mru; - if (refcount_count(&buf->b_refcnt) > 0) - buf->b_flags &= ~ARC_PREFETCH; - DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, buf); + if (refcount_count(&hdr->b_refcnt) > 0) + hdr->b_flags &= ~ARC_FLAG_PREFETCH; + DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); } else { new_state = arc_mfu; - DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); } - buf->b_arc_access = ddi_get_lbolt(); - arc_change_state(new_state, buf, hash_lock); + hdr->b_arc_access = ddi_get_lbolt(); + arc_change_state(new_state, hdr, hash_lock); ARCSTAT_BUMP(arcstat_mru_ghost_hits); - } else if (buf->b_state == arc_mfu) { + } else if (hdr->b_state == arc_mfu) { /* * This buffer has been accessed more than once and is * still in the cache. Keep it in the MFU state. @@ -2667,13 +2699,13 @@ * If it was a prefetch, we will explicitly move it to * the head of the list now. */ - if ((buf->b_flags & ARC_PREFETCH) != 0) { - ASSERT(refcount_count(&buf->b_refcnt) == 0); - ASSERT(list_link_active(&buf->b_arc_node)); + if ((hdr->b_flags & ARC_FLAG_PREFETCH) != 0) { + ASSERT(refcount_count(&hdr->b_refcnt) == 0); + ASSERT(list_link_active(&hdr->b_arc_node)); } ARCSTAT_BUMP(arcstat_mfu_hits); - buf->b_arc_access = ddi_get_lbolt(); - } else if (buf->b_state == arc_mfu_ghost) { + hdr->b_arc_access = ddi_get_lbolt(); + } else if (hdr->b_state == arc_mfu_ghost) { arc_state_t *new_state = arc_mfu; /* * This buffer has been accessed more than once but has @@ -2681,28 +2713,28 @@ * MFU state. */ - if (buf->b_flags & ARC_PREFETCH) { + if (hdr->b_flags & ARC_FLAG_PREFETCH) { /* * This is a prefetch access... * move this block back to the MRU state. */ - ASSERT0(refcount_count(&buf->b_refcnt)); + ASSERT0(refcount_count(&hdr->b_refcnt)); new_state = arc_mru; } - buf->b_arc_access = ddi_get_lbolt(); - DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); - arc_change_state(new_state, buf, hash_lock); + hdr->b_arc_access = ddi_get_lbolt(); + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); + arc_change_state(new_state, hdr, hash_lock); ARCSTAT_BUMP(arcstat_mfu_ghost_hits); - } else if (buf->b_state == arc_l2c_only) { + } else if (hdr->b_state == arc_l2c_only) { /* * This buffer is on the 2nd Level ARC. */ - buf->b_arc_access = ddi_get_lbolt(); - DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf); - arc_change_state(arc_mfu, buf, hash_lock); + hdr->b_arc_access = ddi_get_lbolt(); + DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); + arc_change_state(arc_mfu, hdr, hash_lock); } else { ASSERT(!"invalid arc state"); } @@ -2770,9 +2802,9 @@ (found == hdr && HDR_L2_READING(hdr))); } - hdr->b_flags &= ~ARC_L2_EVICTED; - if (l2arc_noprefetch && (hdr->b_flags & ARC_PREFETCH)) - hdr->b_flags &= ~ARC_L2CACHE; + hdr->b_flags &= ~ARC_FLAG_L2_EVICTED; + if (l2arc_noprefetch && (hdr->b_flags & ARC_FLAG_PREFETCH)) + hdr->b_flags &= ~ARC_FLAG_L2CACHE; /* byteswap if necessary */ callback_list = hdr->b_acb; @@ -2812,18 +2844,18 @@ } } hdr->b_acb = NULL; - hdr->b_flags &= ~ARC_IO_IN_PROGRESS; + hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS; ASSERT(!HDR_BUF_AVAILABLE(hdr)); if (abuf == buf) { ASSERT(buf->b_efunc == NULL); ASSERT(hdr->b_datacnt == 1); - hdr->b_flags |= ARC_BUF_AVAILABLE; + hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE; } ASSERT(refcount_is_zero(&hdr->b_refcnt) || callback_list != NULL); if (zio->io_error != 0) { - hdr->b_flags |= ARC_IO_ERROR; + hdr->b_flags |= ARC_FLAG_IO_ERROR; if (hdr->b_state != arc_anon) arc_change_state(arc_anon, hdr, hash_lock); if (HDR_IN_HASH_TABLE(hdr)) @@ -2889,8 +2921,8 @@ */ int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, - void *private, zio_priority_t priority, int zio_flags, uint32_t *arc_flags, - const zbookmark_phys_t *zb) + void *private, zio_priority_t priority, int zio_flags, + arc_flags_t *arc_flags, const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = NULL; arc_buf_t *buf = NULL; @@ -2912,16 +2944,16 @@ if (hdr != NULL && hdr->b_datacnt > 0) { - *arc_flags |= ARC_CACHED; + *arc_flags |= ARC_FLAG_CACHED; if (HDR_IO_IN_PROGRESS(hdr)) { - if (*arc_flags & ARC_WAIT) { + if (*arc_flags & ARC_FLAG_WAIT) { cv_wait(&hdr->b_cv, hash_lock); mutex_exit(hash_lock); goto top; } - ASSERT(*arc_flags & ARC_NOWAIT); + ASSERT(*arc_flags & ARC_FLAG_NOWAIT); if (done) { arc_callback_t *acb = NULL; @@ -2959,24 +2991,24 @@ ASSERT(buf->b_data); if (HDR_BUF_AVAILABLE(hdr)) { ASSERT(buf->b_efunc == NULL); - hdr->b_flags &= ~ARC_BUF_AVAILABLE; + hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE; } else { buf = arc_buf_clone(buf); } - } else if (*arc_flags & ARC_PREFETCH && + } else if (*arc_flags & ARC_FLAG_PREFETCH && refcount_count(&hdr->b_refcnt) == 0) { - hdr->b_flags |= ARC_PREFETCH; + hdr->b_flags |= ARC_FLAG_PREFETCH; } DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); arc_access(hdr, hash_lock); - if (*arc_flags & ARC_L2CACHE) - hdr->b_flags |= ARC_L2CACHE; - if (*arc_flags & ARC_L2COMPRESS) - hdr->b_flags |= ARC_L2COMPRESS; + if (*arc_flags & ARC_FLAG_L2CACHE) + hdr->b_flags |= ARC_FLAG_L2CACHE; + if (*arc_flags & ARC_FLAG_L2COMPRESS) + hdr->b_flags |= ARC_FLAG_L2COMPRESS; mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_hits); - ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH), + ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH), demand, prefetch, hdr->b_type != ARC_BUFC_METADATA, data, metadata, hits); @@ -3010,18 +3042,19 @@ (void) arc_buf_remove_ref(buf, private); goto top; /* restart the IO request */ } + /* if this is a prefetch, we don't have a reference */ - if (*arc_flags & ARC_PREFETCH) { + if (*arc_flags & ARC_FLAG_PREFETCH) { (void) remove_reference(hdr, hash_lock, private); - hdr->b_flags |= ARC_PREFETCH; + hdr->b_flags |= ARC_FLAG_PREFETCH; } - if (*arc_flags & ARC_L2CACHE) - hdr->b_flags |= ARC_L2CACHE; - if (*arc_flags & ARC_L2COMPRESS) - hdr->b_flags |= ARC_L2COMPRESS; + if (*arc_flags & ARC_FLAG_L2CACHE) + hdr->b_flags |= ARC_FLAG_L2CACHE; + if (*arc_flags & ARC_FLAG_L2COMPRESS) + hdr->b_flags |= ARC_FLAG_L2COMPRESS; if (BP_GET_LEVEL(bp) > 0) - hdr->b_flags |= ARC_INDIRECT; + hdr->b_flags |= ARC_FLAG_INDIRECT; } else { /* this block is in the ghost cache */ ASSERT(GHOST_STATE(hdr->b_state)); @@ -3030,14 +3063,14 @@ ASSERT(hdr->b_buf == NULL); /* if this is a prefetch, we don't have a reference */ - if (*arc_flags & ARC_PREFETCH) - hdr->b_flags |= ARC_PREFETCH; + if (*arc_flags & ARC_FLAG_PREFETCH) + hdr->b_flags |= ARC_FLAG_PREFETCH; else add_reference(hdr, hash_lock, private); - if (*arc_flags & ARC_L2CACHE) - hdr->b_flags |= ARC_L2CACHE; - if (*arc_flags & ARC_L2COMPRESS) - hdr->b_flags |= ARC_L2COMPRESS; + if (*arc_flags & ARC_FLAG_L2CACHE) + hdr->b_flags |= ARC_FLAG_L2CACHE; + if (*arc_flags & ARC_FLAG_L2COMPRESS) + hdr->b_flags |= ARC_FLAG_L2COMPRESS; buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); buf->b_hdr = hdr; buf->b_data = NULL; @@ -3059,7 +3092,7 @@ ASSERT(hdr->b_acb == NULL); hdr->b_acb = acb; - hdr->b_flags |= ARC_IO_IN_PROGRESS; + hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS; if (hdr->b_l2hdr != NULL && (vd = hdr->b_l2hdr->b_dev->l2ad_vdev) != NULL) { @@ -3086,7 +3119,7 @@ DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp, uint64_t, size, zbookmark_phys_t *, zb); ARCSTAT_BUMP(arcstat_misses); - ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH), + ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH), demand, prefetch, hdr->b_type != ARC_BUFC_METADATA, data, metadata, misses); @@ -3148,12 +3181,12 @@ zio_t *, rzio); ARCSTAT_INCR(arcstat_l2_read_bytes, b_asize); - if (*arc_flags & ARC_NOWAIT) { + if (*arc_flags & ARC_FLAG_NOWAIT) { zio_nowait(rzio); return (0); } - ASSERT(*arc_flags & ARC_WAIT); + ASSERT(*arc_flags & ARC_FLAG_WAIT); if (zio_wait(rzio) == 0) return (0); @@ -3179,10 +3212,10 @@ rzio = zio_read(pio, spa, bp, buf->b_data, size, arc_read_done, buf, priority, zio_flags, zb); - if (*arc_flags & ARC_WAIT) + if (*arc_flags & ARC_FLAG_WAIT) return (zio_wait(rzio)); - ASSERT(*arc_flags & ARC_NOWAIT); + ASSERT(*arc_flags & ARC_FLAG_NOWAIT); zio_nowait(rzio); } return (0); @@ -3219,7 +3252,7 @@ if (HDR_BUF_AVAILABLE(hdr)) { arc_buf_t *buf = hdr->b_buf; add_reference(hdr, hash_lock, FTAG); - hdr->b_flags &= ~ARC_BUF_AVAILABLE; + hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE; mutex_exit(hash_lock); arc_release(buf, FTAG); @@ -3286,7 +3319,7 @@ arc_buf_destroy(buf, FALSE, TRUE); } else { ASSERT(buf == hdr->b_buf); - hdr->b_flags |= ARC_BUF_AVAILABLE; + hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE; mutex_exit(&buf->b_evict_lock); } @@ -3392,7 +3425,7 @@ nhdr->b_buf = buf; nhdr->b_state = arc_anon; nhdr->b_arc_access = 0; - nhdr->b_flags = flags & ARC_L2_WRITING; + nhdr->b_flags = flags & ARC_FLAG_L2_WRITING; nhdr->b_l2hdr = NULL; nhdr->b_datacnt = 1; nhdr->b_freeze_cksum = NULL; @@ -3476,7 +3509,7 @@ mutex_exit(&hdr->b_freeze_lock); } arc_cksum_compute(buf, B_FALSE); - hdr->b_flags |= ARC_IO_IN_PROGRESS; + hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS; } /* @@ -3557,13 +3590,13 @@ ASSERT(BP_GET_LEVEL(zio->io_bp) == 0); } } - hdr->b_flags &= ~ARC_IO_IN_PROGRESS; + hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS; /* if it's not anon, we are doing a scrub */ if (!exists && hdr->b_state == arc_anon) arc_access(hdr, hash_lock); mutex_exit(hash_lock); } else { - hdr->b_flags &= ~ARC_IO_IN_PROGRESS; + hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS; } ASSERT(!refcount_is_zero(&hdr->b_refcnt)); @@ -3586,12 +3619,12 @@ ASSERT(ready != NULL); ASSERT(done != NULL); ASSERT(!HDR_IO_ERROR(hdr)); - ASSERT((hdr->b_flags & ARC_IO_IN_PROGRESS) == 0); + ASSERT((hdr->b_flags & ARC_FLAG_IO_IN_PROGRESS) == 0); ASSERT(hdr->b_acb == NULL); if (l2arc) - hdr->b_flags |= ARC_L2CACHE; + hdr->b_flags |= ARC_FLAG_L2CACHE; if (l2arc_compress) - hdr->b_flags |= ARC_L2COMPRESS; + hdr->b_flags |= ARC_FLAG_L2COMPRESS; callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); callback->awcb_ready = ready; callback->awcb_physdone = physdone; @@ -3755,6 +3788,12 @@ if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0) arc_c_min = arc_meta_limit / 2; + if (zfs_arc_meta_min > 0) { + arc_meta_min = zfs_arc_meta_min; + } else { + arc_meta_min = arc_c_min / 2; + } + if (zfs_arc_grow_retry > 0) arc_grow_retry = zfs_arc_grow_retry; @@ -4032,7 +4071,7 @@ */ static boolean_t -l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab) +l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr) { /* * A buffer is *not* eligible for the L2ARC if it: @@ -4041,8 +4080,8 @@ * 3. has an I/O in progress (it may be an incomplete read). * 4. is flagged not eligible (zfs property). */ - if (ab->b_spa != spa_guid || ab->b_l2hdr != NULL || - HDR_IO_IN_PROGRESS(ab) || !HDR_L2CACHE(ab)) + if (hdr->b_spa != spa_guid || hdr->b_l2hdr != NULL || + HDR_IO_IN_PROGRESS(hdr) || !HDR_L2CACHE(hdr)) return (B_FALSE); return (B_TRUE); @@ -4203,7 +4242,7 @@ l2arc_write_callback_t *cb; l2arc_dev_t *dev; list_t *buflist; - arc_buf_hdr_t *head, *ab, *ab_prev; + arc_buf_hdr_t *head, *hdr, *hdr_prev; l2arc_buf_hdr_t *abl2; kmutex_t *hash_lock; int64_t bytes_dropped = 0; @@ -4227,17 +4266,17 @@ /* * All writes completed, or an error was hit. */ - for (ab = list_prev(buflist, head); ab; ab = ab_prev) { - ab_prev = list_prev(buflist, ab); - abl2 = ab->b_l2hdr; + for (hdr = list_prev(buflist, head); hdr; hdr = hdr_prev) { + hdr_prev = list_prev(buflist, hdr); + abl2 = hdr->b_l2hdr; /* * Release the temporary compressed buffer as soon as possible. */ if (abl2->b_compress != ZIO_COMPRESS_OFF) - l2arc_release_cdata_buf(ab); - - hash_lock = HDR_LOCK(ab); + l2arc_release_cdata_buf(hdr); + + hash_lock = HDR_LOCK(hdr); if (!mutex_tryenter(hash_lock)) { /* * This buffer misses out. It may be in a stage @@ -4252,18 +4291,18 @@ /* * Error - drop L2ARC entry. */ - list_remove(buflist, ab); + list_remove(buflist, hdr); ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize); bytes_dropped += abl2->b_asize; - ab->b_l2hdr = NULL; + hdr->b_l2hdr = NULL; kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); - ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); + ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size); } /* * Allow ARC to begin reads to this L2ARC entry. */ - ab->b_flags &= ~ARC_L2_WRITING; + hdr->b_flags &= ~ARC_FLAG_L2_WRITING; mutex_exit(hash_lock); } @@ -4410,7 +4449,7 @@ { list_t *buflist; l2arc_buf_hdr_t *abl2; - arc_buf_hdr_t *ab, *ab_prev; + arc_buf_hdr_t *hdr, *hdr_prev; kmutex_t *hash_lock; uint64_t taddr; int64_t bytes_evicted = 0; @@ -4442,10 +4481,10 @@ top: mutex_enter(&l2arc_buflist_mtx); - for (ab = list_tail(buflist); ab; ab = ab_prev) { - ab_prev = list_prev(buflist, ab); - - hash_lock = HDR_LOCK(ab); + for (hdr = list_tail(buflist); hdr; hdr = hdr_prev) { + hdr_prev = list_prev(buflist, hdr); + + hash_lock = HDR_LOCK(hdr); if (!mutex_tryenter(hash_lock)) { /* * Missed the hash lock. Retry. @@ -4457,19 +4496,19 @@ goto top; } - if (HDR_L2_WRITE_HEAD(ab)) { + if (HDR_L2_WRITE_HEAD(hdr)) { /* * We hit a write head node. Leave it for * l2arc_write_done(). */ - list_remove(buflist, ab); + list_remove(buflist, hdr); mutex_exit(hash_lock); continue; } - if (!all && ab->b_l2hdr != NULL && - (ab->b_l2hdr->b_daddr > taddr || - ab->b_l2hdr->b_daddr < dev->l2ad_hand)) { + if (!all && hdr->b_l2hdr != NULL && + (hdr->b_l2hdr->b_daddr > taddr || + hdr->b_l2hdr->b_daddr < dev->l2ad_hand)) { /* * We've evicted to the target address, * or the end of the device. @@ -4478,7 +4517,7 @@ break; } - if (HDR_FREE_IN_PROGRESS(ab)) { + if (HDR_FREE_IN_PROGRESS(hdr)) { /* * Already on the path to destruction. */ @@ -4486,44 +4525,44 @@ continue; } - if (ab->b_state == arc_l2c_only) { - ASSERT(!HDR_L2_READING(ab)); + if (hdr->b_state == arc_l2c_only) { + ASSERT(!HDR_L2_READING(hdr)); /* * This doesn't exist in the ARC. Destroy. * arc_hdr_destroy() will call list_remove() * and decrement arcstat_l2_size. */ - arc_change_state(arc_anon, ab, hash_lock); - arc_hdr_destroy(ab); + arc_change_state(arc_anon, hdr, hash_lock); + arc_hdr_destroy(hdr); } else { /* * Invalidate issued or about to be issued * reads, since we may be about to write * over this location. */ - if (HDR_L2_READING(ab)) { + if (HDR_L2_READING(hdr)) { ARCSTAT_BUMP(arcstat_l2_evict_reading); - ab->b_flags |= ARC_L2_EVICTED; + hdr->b_flags |= ARC_FLAG_L2_EVICTED; } /* * Tell ARC this no longer exists in L2ARC. */ - if (ab->b_l2hdr != NULL) { - abl2 = ab->b_l2hdr; + if (hdr->b_l2hdr != NULL) { + abl2 = hdr->b_l2hdr; ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize); bytes_evicted += abl2->b_asize; - ab->b_l2hdr = NULL; + hdr->b_l2hdr = NULL; kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); - ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); + ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size); } - list_remove(buflist, ab); + list_remove(buflist, hdr); /* * This may have been leftover after a * failed write. */ - ab->b_flags &= ~ARC_L2_WRITING; + hdr->b_flags &= ~ARC_FLAG_L2_WRITING; } mutex_exit(hash_lock); } @@ -4536,7 +4575,7 @@ /* * Find and write ARC buffers to the L2ARC device. * - * An ARC_L2_WRITING flag is set so that the L2ARC buffers are not valid + * An ARC_FLAG_L2_WRITING flag is set so that the L2ARC buffers are not valid * for reading until they have completed writing. * The headroom_boost is an in-out parameter used to maintain headroom boost * state between calls to this function. @@ -4548,7 +4587,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, boolean_t *headroom_boost) { - arc_buf_hdr_t *ab, *ab_prev, *head; + arc_buf_hdr_t *hdr, *hdr_prev, *head; list_t *list; uint64_t write_asize, write_psize, write_sz, headroom, buf_compress_minsz; @@ -4569,7 +4608,7 @@ write_sz = write_asize = write_psize = 0; full = B_FALSE; head = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE); - head->b_flags |= ARC_L2_WRITE_HEAD; + head->b_flags |= ARC_FLAG_L2_WRITE_HEAD; /* * We will want to try to compress buffers that are at least 2x the @@ -4593,25 +4632,25 @@ * head of the ARC lists rather than the tail. */ if (arc_warm == B_FALSE) - ab = list_head(list); + hdr = list_head(list); else - ab = list_tail(list); + hdr = list_tail(list); headroom = target_sz * l2arc_headroom; if (do_headroom_boost) headroom = (headroom * l2arc_headroom_boost) / 100; - for (; ab; ab = ab_prev) { + for (; hdr; hdr = hdr_prev) { l2arc_buf_hdr_t *l2hdr; kmutex_t *hash_lock; uint64_t buf_sz; if (arc_warm == B_FALSE) - ab_prev = list_next(list, ab); + hdr_prev = list_next(list, hdr); else - ab_prev = list_prev(list, ab); - - hash_lock = HDR_LOCK(ab); + hdr_prev = list_prev(list, hdr); + + hash_lock = HDR_LOCK(hdr); if (!mutex_tryenter(hash_lock)) { /* * Skip this buffer rather than waiting. @@ -4619,7 +4658,7 @@ continue; } - passed_sz += ab->b_size; + passed_sz += hdr->b_size; if (passed_sz > headroom) { /* * Searched too far. @@ -4628,12 +4667,12 @@ break; } - if (!l2arc_write_eligible(guid, ab)) { + if (!l2arc_write_eligible(guid, hdr)) { mutex_exit(hash_lock); continue; } - if ((write_sz + ab->b_size) > target_sz) { + if ((write_sz + hdr->b_size) > target_sz) { full = B_TRUE; mutex_exit(hash_lock); break; @@ -4660,31 +4699,31 @@ */ l2hdr = kmem_zalloc(sizeof (l2arc_buf_hdr_t), KM_SLEEP); l2hdr->b_dev = dev; - ab->b_flags |= ARC_L2_WRITING; + hdr->b_flags |= ARC_FLAG_L2_WRITING; /* * Temporarily stash the data buffer in b_tmp_cdata. * The subsequent write step will pick it up from - * there. This is because can't access ab->b_buf + * there. This is because can't access hdr->b_buf * without holding the hash_lock, which we in turn * can't access without holding the ARC list locks * (which we want to avoid during compression/writing). */ l2hdr->b_compress = ZIO_COMPRESS_OFF; - l2hdr->b_asize = ab->b_size; - l2hdr->b_tmp_cdata = ab->b_buf->b_data; - - buf_sz = ab->b_size; - ab->b_l2hdr = l2hdr; - - list_insert_head(dev->l2ad_buflist, ab); + l2hdr->b_asize = hdr->b_size; + l2hdr->b_tmp_cdata = hdr->b_buf->b_data; + + buf_sz = hdr->b_size; + hdr->b_l2hdr = l2hdr; + + list_insert_head(dev->l2ad_buflist, hdr); /* * Compute and store the buffer cksum before * writing. On debug the cksum is verified first. */ - arc_cksum_verify(ab->b_buf); - arc_cksum_compute(ab->b_buf, B_TRUE); + arc_cksum_verify(hdr->b_buf); + arc_cksum_compute(hdr->b_buf, B_TRUE); mutex_exit(hash_lock); @@ -4710,21 +4749,22 @@ * and work backwards, retracing the course of the buffer selector * loop above. */ - for (ab = list_prev(dev->l2ad_buflist, head); ab; - ab = list_prev(dev->l2ad_buflist, ab)) { + for (hdr = list_prev(dev->l2ad_buflist, head); hdr; + hdr = list_prev(dev->l2ad_buflist, hdr)) { l2arc_buf_hdr_t *l2hdr; uint64_t buf_sz; /* * We shouldn't need to lock the buffer here, since we flagged - * it as ARC_L2_WRITING in the previous step, but we must take - * care to only access its L2 cache parameters. In particular, - * ab->b_buf may be invalid by now due to ARC eviction. + * it as ARC_FLAG_L2_WRITING in the previous step, but we must + * take care to only access its L2 cache parameters. In + * particular, hdr->b_buf may be invalid by now due to + * ARC eviction. */ - l2hdr = ab->b_l2hdr; + l2hdr = hdr->b_l2hdr; l2hdr->b_daddr = dev->l2ad_hand; - if ((ab->b_flags & ARC_L2COMPRESS) && + if ((hdr->b_flags & ARC_FLAG_L2COMPRESS) && l2hdr->b_asize >= buf_compress_minsz) { if (l2arc_compress_buf(l2hdr)) { /* @@ -4928,9 +4968,9 @@ * done, we can dispose of it. */ static void -l2arc_release_cdata_buf(arc_buf_hdr_t *ab) +l2arc_release_cdata_buf(arc_buf_hdr_t *hdr) { - l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr; + l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr; if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) { /* @@ -4938,7 +4978,7 @@ * temporary buffer for it, so now we need to release it. */ ASSERT(l2hdr->b_tmp_cdata != NULL); - zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size); + zio_data_buf_free(l2hdr->b_tmp_cdata, hdr->b_size); } l2hdr->b_tmp_cdata = NULL; } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dbuf.c --- a/usr/src/uts/common/fs/zfs/dbuf.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dbuf.c Wed Dec 10 08:46:44 2014 -0800 @@ -223,11 +223,8 @@ if (db->db_level != 0 || db->db_evict_func == NULL) return; - if (db->db_user_data_ptr_ptr) - *db->db_user_data_ptr_ptr = db->db.db_data; db->db_evict_func(&db->db, db->db_user_ptr); db->db_user_ptr = NULL; - db->db_user_data_ptr_ptr = NULL; db->db_evict_func = NULL; } @@ -418,16 +415,6 @@ #endif static void -dbuf_update_data(dmu_buf_impl_t *db) -{ - ASSERT(MUTEX_HELD(&db->db_mtx)); - if (db->db_level == 0 && db->db_user_data_ptr_ptr) { - ASSERT(!refcount_is_zero(&db->db_holds)); - *db->db_user_data_ptr_ptr = db->db.db_data; - } -} - -static void dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) { ASSERT(MUTEX_HELD(&db->db_mtx)); @@ -437,7 +424,6 @@ db->db.db_data = buf->b_data; if (!arc_released(buf)) arc_set_callback(buf, dbuf_do_evict, db); - dbuf_update_data(db); } else { dbuf_evict_user(db); db->db.db_data = NULL; @@ -521,7 +507,7 @@ { dnode_t *dn; zbookmark_phys_t zb; - uint32_t aflags = ARC_NOWAIT; + arc_flags_t aflags = ARC_FLAG_NOWAIT; DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -543,7 +529,6 @@ if (bonuslen) bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen); DB_DNODE_EXIT(db); - dbuf_update_data(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); return; @@ -575,9 +560,9 @@ mutex_exit(&db->db_mtx); if (DBUF_IS_L2CACHEABLE(db)) - aflags |= ARC_L2CACHE; + aflags |= ARC_FLAG_L2CACHE; if (DBUF_IS_L2COMPRESSIBLE(db)) - aflags |= ARC_L2COMPRESS; + aflags |= ARC_FLAG_L2COMPRESS; SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ? db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET, @@ -589,7 +574,7 @@ dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, &aflags, &zb); - if (aflags & ARC_CACHED) + if (aflags & ARC_FLAG_CACHED) *flags |= DB_RF_CACHED; } @@ -1726,7 +1711,6 @@ db->db_blkptr = blkptr; db->db_user_ptr = NULL; - db->db_user_data_ptr_ptr = NULL; db->db_evict_func = NULL; db->db_immediate_evict = 0; db->db_freed_in_flight = 0; @@ -1879,7 +1863,8 @@ if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; - uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; + arc_flags_t aflags = + ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; zbookmark_phys_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, @@ -1971,7 +1956,6 @@ } (void) refcount_add(&db->db_holds, tag); - dbuf_update_data(db); DBUF_VERIFY(db); mutex_exit(&db->db_mtx); @@ -2182,27 +2166,25 @@ } void * -dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr, +dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, dmu_buf_evict_func_t *evict_func) { - return (dmu_buf_update_user(db_fake, NULL, user_ptr, - user_data_ptr_ptr, evict_func)); + return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func)); } void * -dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr, +dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, dmu_buf_evict_func_t *evict_func) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; db->db_immediate_evict = TRUE; - return (dmu_buf_update_user(db_fake, NULL, user_ptr, - user_data_ptr_ptr, evict_func)); + return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func)); } void * dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr, - void *user_data_ptr_ptr, dmu_buf_evict_func_t *evict_func) + dmu_buf_evict_func_t *evict_func) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; ASSERT(db->db_level == 0); @@ -2213,10 +2195,7 @@ if (db->db_user_ptr == old_user_ptr) { db->db_user_ptr = user_ptr; - db->db_user_data_ptr_ptr = user_data_ptr_ptr; db->db_evict_func = evict_func; - - dbuf_update_data(db); } else { old_user_ptr = db->db_user_ptr; } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dmu_diff.c --- a/usr/src/uts/common/fs/zfs/dmu_diff.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dmu_diff.c Wed Dec 10 08:46:44 2014 -0800 @@ -129,7 +129,7 @@ } else if (zb->zb_level == 0) { dnode_phys_t *blk; arc_buf_t *abuf; - uint32_t aflags = ARC_WAIT; + arc_flags_t aflags = ARC_FLAG_WAIT; int blksz = BP_GET_LSIZE(bp); int i; @@ -194,7 +194,7 @@ return (SET_ERROR(EXDEV)); } - fromtxg = fromsnap->ds_phys->ds_creation_txg; + fromtxg = dsl_dataset_phys(fromsnap)->ds_creation_txg; dsl_dataset_rele(fromsnap, FTAG); dsl_dataset_long_hold(tosnap, FTAG); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dmu_objset.c --- a/usr/src/uts/common/fs/zfs/dmu_objset.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c Wed Dec 10 08:46:44 2014 -0800 @@ -293,15 +293,15 @@ os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { - uint32_t aflags = ARC_WAIT; + arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) - aflags |= ARC_L2CACHE; + aflags |= ARC_FLAG_L2CACHE; if (DMU_OS_IS_L2COMPRESSIBLE(os)) - aflags |= ARC_L2COMPRESS; + aflags |= ARC_FLAG_L2COMPRESS; dprintf_bp(os->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, os->os_rootbp, @@ -1479,12 +1479,12 @@ dsl_dataset_t *ds = os->os_dsl_dataset; uint64_t ignored; - if (ds->ds_phys->ds_snapnames_zapobj == 0) + if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) return (SET_ERROR(ENOENT)); return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST, - real, maxlen, conflict)); + dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored, + MT_FIRST, real, maxlen, conflict)); } int @@ -1497,12 +1497,12 @@ ASSERT(dsl_pool_config_held(dmu_objset_pool(os))); - if (ds->ds_phys->ds_snapnames_zapobj == 0) + if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0) return (SET_ERROR(ENOENT)); zap_cursor_init_serialized(&cursor, ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, *offp); + dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp); if (zap_cursor_retrieve(&cursor, &attr) != 0) { zap_cursor_fini(&cursor); @@ -1536,12 +1536,12 @@ /* there is no next dir on a snapshot! */ if (os->os_dsl_dataset->ds_object != - dd->dd_phys->dd_head_dataset_obj) + dsl_dir_phys(dd)->dd_head_dataset_obj) return (SET_ERROR(ENOENT)); zap_cursor_init_serialized(&cursor, dd->dd_pool->dp_meta_objset, - dd->dd_phys->dd_child_dir_zapobj, *offp); + dsl_dir_phys(dd)->dd_child_dir_zapobj, *offp); if (zap_cursor_retrieve(&cursor, &attr) != 0) { zap_cursor_fini(&cursor); @@ -1589,7 +1589,7 @@ return (0); } - thisobj = dd->dd_phys->dd_head_dataset_obj; + thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* @@ -1597,7 +1597,7 @@ */ if (flags & DS_FIND_CHILDREN) { for (zap_cursor_init(&zc, dp->dp_meta_objset, - dd->dd_phys->dd_child_dir_zapobj); + dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { ASSERT3U(attr->za_integer_length, ==, @@ -1626,7 +1626,9 @@ err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); if (err == 0) { - uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + uint64_t snapobj; + + snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; dsl_dataset_rele(ds, FTAG); for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); @@ -1701,7 +1703,7 @@ return (0); } - thisobj = dd->dd_phys->dd_head_dataset_obj; + thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj; attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* @@ -1709,7 +1711,7 @@ */ if (flags & DS_FIND_CHILDREN) { for (zap_cursor_init(&zc, dp->dp_meta_objset, - dd->dd_phys->dd_child_dir_zapobj); + dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, attr) == 0; (void) zap_cursor_advance(&zc)) { ASSERT3U(attr->za_integer_length, ==, @@ -1742,7 +1744,9 @@ err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds); if (err == 0) { - uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + uint64_t snapobj; + + snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; dsl_dataset_rele(ds, FTAG); for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dmu_send.c --- a/usr/src/uts/common/fs/zfs/dmu_send.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dmu_send.c Wed Dec 10 08:46:44 2014 -0800 @@ -458,7 +458,7 @@ dnode_phys_t *blk; int i; int blksz = BP_GET_LSIZE(bp); - uint32_t aflags = ARC_WAIT; + arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, @@ -476,7 +476,7 @@ } (void) arc_buf_remove_ref(abuf, &abuf); } else if (type == DMU_OT_SA) { - uint32_t aflags = ARC_WAIT; + arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; int blksz = BP_GET_LSIZE(bp); @@ -493,7 +493,7 @@ err = dump_write_embedded(dsp, zb->zb_object, zb->zb_blkid * blksz, blksz, bp); } else { /* it's a level-0 block of a regular object */ - uint32_t aflags = ARC_WAIT; + arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; int blksz = BP_GET_LSIZE(bp); uint64_t offset; @@ -598,12 +598,12 @@ featureflags); drr->drr_u.drr_begin.drr_creation_time = - ds->ds_phys->ds_creation_time; + dsl_dataset_phys(ds)->ds_creation_time; drr->drr_u.drr_begin.drr_type = dmu_objset_type(os); if (is_clone) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; - drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; - if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(ds)->ds_guid; + if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA; if (fromzb != NULL) { @@ -624,7 +624,7 @@ dsp->dsa_proc = curproc; dsp->dsa_os = os; dsp->dsa_off = off; - dsp->dsa_toguid = ds->ds_phys->ds_guid; + dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid; ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); dsp->dsa_pending_op = PENDING_NONE; dsp->dsa_incremental = (fromzb != NULL); @@ -710,9 +710,10 @@ } if (!dsl_dataset_is_before(ds, fromds, 0)) err = SET_ERROR(EXDEV); - zb.zbm_creation_time = fromds->ds_phys->ds_creation_time; - zb.zbm_creation_txg = fromds->ds_phys->ds_creation_txg; - zb.zbm_guid = fromds->ds_phys->ds_guid; + zb.zbm_creation_time = + dsl_dataset_phys(fromds)->ds_creation_time; + zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg; + zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid; is_clone = (fromds->ds_dir != ds->ds_dir); dsl_dataset_rele(fromds, FTAG); err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, @@ -779,10 +780,10 @@ if (!dsl_dataset_is_before(ds, fromds, 0)) err = SET_ERROR(EXDEV); zb.zbm_creation_time = - fromds->ds_phys->ds_creation_time; + dsl_dataset_phys(fromds)->ds_creation_time; zb.zbm_creation_txg = - fromds->ds_phys->ds_creation_txg; - zb.zbm_guid = fromds->ds_phys->ds_guid; + dsl_dataset_phys(fromds)->ds_creation_txg; + zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid; is_clone = (ds->ds_dir != fromds->ds_dir); dsl_dataset_rele(fromds, FTAG); } @@ -829,7 +830,7 @@ /* Get uncompressed size estimate of changed data. */ if (fromds == NULL) { - size = ds->ds_phys->ds_uncompressed_bytes; + size = dsl_dataset_phys(ds)->ds_uncompressed_bytes; } else { uint64_t used, comp; err = dsl_dataset_space_written(fromds, ds, @@ -884,15 +885,15 @@ /* temporary clone name must not exist */ error = zap_lookup(dp->dp_meta_objset, - ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name, + dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, recv_clone_name, 8, 1, &val); if (error != ENOENT) return (error == 0 ? EBUSY : error); /* new snapshot name must not exist */ error = zap_lookup(dp->dp_meta_objset, - ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap, - 8, 1, &val); + dsl_dataset_phys(ds)->ds_snapnames_zapobj, + drba->drba_cookie->drc_tosnap, 8, 1, &val); if (error != ENOENT) return (error == 0 ? EEXIST : error); @@ -912,7 +913,7 @@ if (fromguid != 0) { dsl_dataset_t *snap; - uint64_t obj = ds->ds_phys->ds_prev_snap_obj; + uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; /* Find snapshot in this dir that matches fromguid. */ while (obj != 0) { @@ -924,9 +925,9 @@ dsl_dataset_rele(snap, FTAG); return (SET_ERROR(ENODEV)); } - if (snap->ds_phys->ds_guid == fromguid) + if (dsl_dataset_phys(snap)->ds_guid == fromguid) break; - obj = snap->ds_phys->ds_prev_snap_obj; + obj = dsl_dataset_phys(snap)->ds_prev_snap_obj; dsl_dataset_rele(snap, FTAG); } if (obj == 0) @@ -949,9 +950,9 @@ dsl_dataset_rele(snap, FTAG); } else { /* if full, most recent snapshot must be $ORIGIN */ - if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL) + if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= TXG_INITIAL) return (SET_ERROR(ENODEV)); - drba->drba_snapobj = ds->ds_phys->ds_prev_snap_obj; + drba->drba_snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj; } return (0); @@ -1069,7 +1070,7 @@ dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } - if (origin->ds_phys->ds_guid != fromguid) { + if (dsl_dataset_phys(origin)->ds_guid != fromguid) { dsl_dataset_rele(origin, FTAG); dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENODEV)); @@ -1140,7 +1141,7 @@ } dmu_buf_will_dirty(newds->ds_dbuf, tx); - newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; + dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT; /* * If we actually created a non-clone, we need to create the @@ -1777,7 +1778,7 @@ */ VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os)); - ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); + ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT); featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); @@ -1940,8 +1941,11 @@ * the snap before drc_ds, because drc_ds can not * have any snaps of its own). */ - uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; - while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { + uint64_t obj; + + obj = dsl_dataset_phys(origin_head)->ds_prev_snap_obj; + while (obj != + dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj) { dsl_dataset_t *snap; error = dsl_dataset_hold_obj(dp, obj, FTAG, &snap); @@ -1953,7 +1957,7 @@ error = dsl_destroy_snapshot_check_impl( snap, B_FALSE); } - obj = snap->ds_phys->ds_prev_snap_obj; + obj = dsl_dataset_phys(snap)->ds_prev_snap_obj; dsl_dataset_rele(snap, FTAG); if (error != 0) return (error); @@ -1999,13 +2003,16 @@ * Destroy any snapshots of drc_tofs (origin_head) * after the origin (the snap before drc_ds). */ - uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj; - while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) { + uint64_t obj; + + obj = dsl_dataset_phys(origin_head)->ds_prev_snap_obj; + while (obj != + dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj) { dsl_dataset_t *snap; VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &snap)); ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir); - obj = snap->ds_phys->ds_prev_snap_obj; + obj = dsl_dataset_phys(snap)->ds_prev_snap_obj; dsl_destroy_snapshot_sync_impl(snap, B_FALSE, tx); dsl_dataset_rele(snap, FTAG); @@ -2021,15 +2028,16 @@ /* set snapshot's creation time and guid */ dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx); - origin_head->ds_prev->ds_phys->ds_creation_time = + dsl_dataset_phys(origin_head->ds_prev)->ds_creation_time = drc->drc_drrb->drr_creation_time; - origin_head->ds_prev->ds_phys->ds_guid = + dsl_dataset_phys(origin_head->ds_prev)->ds_guid = drc->drc_drrb->drr_toguid; - origin_head->ds_prev->ds_phys->ds_flags &= + dsl_dataset_phys(origin_head->ds_prev)->ds_flags &= ~DS_FLAG_INCONSISTENT; dmu_buf_will_dirty(origin_head->ds_dbuf, tx); - origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + dsl_dataset_phys(origin_head)->ds_flags &= + ~DS_FLAG_INCONSISTENT; dsl_dataset_rele(origin_head, FTAG); dsl_destroy_head_sync_impl(drc->drc_ds, tx); @@ -2043,15 +2051,17 @@ /* set snapshot's creation time and guid */ dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); - ds->ds_prev->ds_phys->ds_creation_time = + dsl_dataset_phys(ds->ds_prev)->ds_creation_time = drc->drc_drrb->drr_creation_time; - ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid; - ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + dsl_dataset_phys(ds->ds_prev)->ds_guid = + drc->drc_drrb->drr_toguid; + dsl_dataset_phys(ds->ds_prev)->ds_flags &= + ~DS_FLAG_INCONSISTENT; dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; + dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT; } - drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj; + drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj; /* * Release the hold from dmu_recv_begin. This must be done before * we return to open context, so that when we free the dataset's dnode, @@ -2077,7 +2087,7 @@ gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); if (err == 0) { - gmep->guid = snapds->ds_phys->ds_guid; + gmep->guid = dsl_dataset_phys(snapds)->ds_guid; gmep->gme_ds = snapds; avl_add(guid_map, gmep); dsl_dataset_long_hold(snapds, gmep); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dmu_traverse.c --- a/usr/src/uts/common/fs/zfs/dmu_traverse.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c Wed Dec 10 08:46:44 2014 -0800 @@ -178,7 +178,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const blkptr_t *bp, const zbookmark_phys_t *zb) { - uint32_t flags = ARC_NOWAIT | ARC_PREFETCH; + arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) return; @@ -275,7 +275,7 @@ } if (BP_GET_LEVEL(bp) > 0) { - uint32_t flags = ARC_WAIT; + arc_flags_t flags = ARC_FLAG_WAIT; int i; blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; @@ -303,7 +303,7 @@ break; } } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { - uint32_t flags = ARC_WAIT; + arc_flags_t flags = ARC_FLAG_WAIT; int i; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; @@ -326,7 +326,7 @@ break; } } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { - uint32_t flags = ARC_WAIT; + arc_flags_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; dnode_phys_t *dnp; @@ -442,7 +442,7 @@ const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { prefetch_data_t *pfd = arg; - uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; + arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; ASSERT(pfd->pd_blks_fetched >= 0); if (pfd->pd_cancel) @@ -533,7 +533,7 @@ /* See comment on ZIL traversal in dsl_scan_visitds. */ if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) { - uint32_t flags = ARC_WAIT; + arc_flags_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; arc_buf_t *buf; @@ -579,7 +579,7 @@ blkptr_cb_t func, void *arg) { return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object, - &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg)); + &dsl_dataset_phys(ds)->ds_bp, txg_start, NULL, flags, func, arg)); } int @@ -634,8 +634,8 @@ continue; break; } - if (ds->ds_phys->ds_prev_snap_txg > txg) - txg = ds->ds_phys->ds_prev_snap_txg; + if (dsl_dataset_phys(ds)->ds_prev_snap_txg > txg) + txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; err = traverse_dataset(ds, txg, flags, func, arg); dsl_dataset_rele(ds, FTAG); if (err != 0) diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dmu_tx.c --- a/usr/src/uts/common/fs/zfs/dmu_tx.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dmu_tx.c Wed Dec 10 08:46:44 2014 -0800 @@ -713,6 +713,7 @@ { dmu_tx_hold_t *txh; dnode_t *dn; + dsl_dataset_phys_t *ds_phys; uint64_t nblocks; int epbs, err; @@ -787,8 +788,9 @@ * we'll have to modify an indirect twig for each. */ epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; + ds_phys = dsl_dataset_phys(dn->dn_objset->os_dsl_dataset); for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs) - if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj) + if (ds_phys->ds_prev_snap_obj) txh->txh_space_towrite += 3 << dn->dn_indblkshift; else txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dnode.c --- a/usr/src/uts/common/fs/zfs/dnode.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dnode.c Wed Dec 10 08:46:44 2014 -0800 @@ -1116,7 +1116,7 @@ zrl_init(&dnh[i].dnh_zrlock); dnh[i].dnh_dnode = NULL; } - if (winner = dmu_buf_set_user(&db->db, children_dnodes, NULL, + if (winner = dmu_buf_set_user(&db->db, children_dnodes, dnode_buf_pageout)) { for (i = 0; i < epb; i++) { diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_bookmark.c --- a/usr/src/uts/common/fs/zfs/dsl_bookmark.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_bookmark.c Wed Dec 10 08:46:44 2014 -0800 @@ -65,7 +65,7 @@ if (bmark_zapobj == 0) return (SET_ERROR(ESRCH)); - if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) mt = MT_FIRST; else mt = MT_EXACT; @@ -208,10 +208,11 @@ &bmark_fs->ds_bookmarks, tx)); } - bmark_phys.zbm_guid = snapds->ds_phys->ds_guid; - bmark_phys.zbm_creation_txg = snapds->ds_phys->ds_creation_txg; + bmark_phys.zbm_guid = dsl_dataset_phys(snapds)->ds_guid; + bmark_phys.zbm_creation_txg = + dsl_dataset_phys(snapds)->ds_creation_txg; bmark_phys.zbm_creation_time = - snapds->ds_phys->ds_creation_time; + dsl_dataset_phys(snapds)->ds_creation_time; VERIFY0(zap_add(mos, bmark_fs->ds_bookmarks, shortname, sizeof (uint64_t), @@ -340,7 +341,7 @@ uint64_t bmark_zapobj = ds->ds_bookmarks; matchtype_t mt; - if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) mt = MT_FIRST; else mt = MT_EXACT; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_dataset.c --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Wed Dec 10 08:46:44 2014 -0800 @@ -70,6 +70,9 @@ #define DS_REF_MAX (1ULL << 62) +extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds); +extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds); + /* * Figure out how much of this delta should be propogated to the dsl_dir * layer. If there's a refreservation, that space has already been @@ -78,13 +81,15 @@ static int64_t parent_delta(dsl_dataset_t *ds, int64_t delta) { + dsl_dataset_phys_t *ds_phys; uint64_t old_bytes, new_bytes; if (ds->ds_reserved == 0) return (delta); - old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); - new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); + ds_phys = dsl_dataset_phys(ds); + old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved); + new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved); ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); return (new_bytes - old_bytes); @@ -115,10 +120,10 @@ dmu_buf_will_dirty(ds->ds_dbuf, tx); mutex_enter(&ds->ds_lock); delta = parent_delta(ds, used); - ds->ds_phys->ds_referenced_bytes += used; - ds->ds_phys->ds_compressed_bytes += compressed; - ds->ds_phys->ds_uncompressed_bytes += uncompressed; - ds->ds_phys->ds_unique_bytes += used; + dsl_dataset_phys(ds)->ds_referenced_bytes += used; + dsl_dataset_phys(ds)->ds_compressed_bytes += compressed; + dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed; + dsl_dataset_phys(ds)->ds_unique_bytes += used; if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) ds->ds_need_large_blocks = B_TRUE; mutex_exit(&ds->ds_lock); @@ -153,17 +158,17 @@ ASSERT(!dsl_dataset_is_snapshot(ds)); dmu_buf_will_dirty(ds->ds_dbuf, tx); - if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { + if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { int64_t delta; dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); dsl_free(tx->tx_pool, tx->tx_txg, bp); mutex_enter(&ds->ds_lock); - ASSERT(ds->ds_phys->ds_unique_bytes >= used || + ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used || !DS_UNIQUE_IS_ACCURATE(ds)); delta = parent_delta(ds, -used); - ds->ds_phys->ds_unique_bytes -= used; + dsl_dataset_phys(ds)->ds_unique_bytes -= used; mutex_exit(&ds->ds_lock); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, -compressed, -uncompressed, tx); @@ -184,15 +189,15 @@ dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); } ASSERT3U(ds->ds_prev->ds_object, ==, - ds->ds_phys->ds_prev_snap_obj); - ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); + dsl_dataset_phys(ds)->ds_prev_snap_obj); + ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0); /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ - if (ds->ds_prev->ds_phys->ds_next_snap_obj == + if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object && bp->blk_birth > - ds->ds_prev->ds_phys->ds_prev_snap_txg) { + dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) { dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); mutex_enter(&ds->ds_prev->ds_lock); - ds->ds_prev->ds_phys->ds_unique_bytes += used; + dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used; mutex_exit(&ds->ds_prev->ds_lock); } if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { @@ -201,12 +206,12 @@ } } mutex_enter(&ds->ds_lock); - ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); - ds->ds_phys->ds_referenced_bytes -= used; - ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); - ds->ds_phys->ds_compressed_bytes -= compressed; - ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); - ds->ds_phys->ds_uncompressed_bytes -= uncompressed; + ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used); + dsl_dataset_phys(ds)->ds_referenced_bytes -= used; + ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed); + dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed; + ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed); + dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed; mutex_exit(&ds->ds_lock); return (used); @@ -232,7 +237,7 @@ if (ds->ds_trysnap_txg > spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) trysnap = ds->ds_trysnap_txg; - return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); + return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap)); } boolean_t @@ -267,7 +272,7 @@ } bplist_destroy(&ds->ds_pending_deadlist); - if (ds->ds_phys->ds_deadlist_obj != 0) + if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0) dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_dir) dsl_dir_rele(ds->ds_dir, ds); @@ -293,10 +298,10 @@ if (ds->ds_snapname[0]) return (0); - if (ds->ds_phys->ds_next_snap_obj == 0) + if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) return (0); - err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, + err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &headdbuf); if (err != 0) return (err); @@ -311,11 +316,11 @@ dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; matchtype_t mt; int err; - if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) mt = MT_FIRST; else mt = MT_EXACT; @@ -332,13 +337,13 @@ boolean_t adj_cnt) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; - uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; + uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; matchtype_t mt; int err; dsl_dir_snap_cmtime_update(ds->ds_dir); - if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) + if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) mt = MT_FIRST; else mt = MT_EXACT; @@ -384,7 +389,6 @@ ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); ds->ds_dbuf = dbuf; ds->ds_object = dsobj; - ds->ds_phys = dbuf->db_data; mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); @@ -393,7 +397,7 @@ bplist_create(&ds->ds_pending_deadlist); dsl_deadlist_open(&ds->ds_deadlist, - mos, ds->ds_phys->ds_deadlist_obj); + mos, dsl_dataset_phys(ds)->ds_deadlist_obj); list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), offsetof(dmu_sendarg_t, dsa_link)); @@ -408,7 +412,8 @@ if (err == 0) { err = dsl_dir_hold_obj(dp, - ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); + dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, + &ds->ds_dir); } if (err != 0) { mutex_destroy(&ds->ds_lock); @@ -424,9 +429,9 @@ if (!dsl_dataset_is_snapshot(ds)) { ds->ds_snapname[0] = '\0'; - if (ds->ds_phys->ds_prev_snap_obj != 0) { + if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { err = dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, + dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev); } if (doi.doi_type == DMU_OTN_ZAP_METADATA) { @@ -440,10 +445,11 @@ } else { if (zfs_flags & ZFS_DEBUG_SNAPNAMES) err = dsl_dataset_get_snapname(ds); - if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { + if (err == 0 && + dsl_dataset_phys(ds)->ds_userrefs_obj != 0) { err = zap_count( ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_userrefs_obj, + dsl_dataset_phys(ds)->ds_userrefs_obj, &ds->ds_userrefs); } } @@ -462,7 +468,7 @@ } if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, - &ds->ds_phys, dsl_dataset_evict)) != NULL) { + dsl_dataset_evict)) != NULL) { bplist_destroy(&ds->ds_pending_deadlist); dsl_deadlist_close(&ds->ds_deadlist); if (ds->ds_prev) @@ -480,12 +486,12 @@ ds = winner; } else { ds->ds_fsid_guid = - unique_insert(ds->ds_phys->ds_fsid_guid); + unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid); } } ASSERT3P(ds->ds_dbuf, ==, dbuf); - ASSERT3P(ds->ds_phys, ==, dbuf->db_data); - ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || + ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data); + ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 || spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); *dsp = ds; @@ -506,7 +512,7 @@ return (err); ASSERT(dsl_pool_config_held(dp)); - obj = dd->dd_phys->dd_head_dataset_obj; + obj = dsl_dir_phys(dd)->dd_head_dataset_obj; if (obj != 0) err = dsl_dataset_hold_obj(dp, obj, tag, dsp); else @@ -673,9 +679,9 @@ origin = dp->dp_origin_snap; ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); - ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); + ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0); ASSERT(dmu_tx_is_syncing(tx)); - ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); + ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0); dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); @@ -701,55 +707,58 @@ dsphys->ds_prev_snap_obj = origin->ds_object; dsphys->ds_prev_snap_txg = - origin->ds_phys->ds_creation_txg; + dsl_dataset_phys(origin)->ds_creation_txg; dsphys->ds_referenced_bytes = - origin->ds_phys->ds_referenced_bytes; + dsl_dataset_phys(origin)->ds_referenced_bytes; dsphys->ds_compressed_bytes = - origin->ds_phys->ds_compressed_bytes; + dsl_dataset_phys(origin)->ds_compressed_bytes; dsphys->ds_uncompressed_bytes = - origin->ds_phys->ds_uncompressed_bytes; - dsphys->ds_bp = origin->ds_phys->ds_bp; + dsl_dataset_phys(origin)->ds_uncompressed_bytes; + dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp; /* * Inherit flags that describe the dataset's contents * (INCONSISTENT) or properties (Case Insensitive). */ - dsphys->ds_flags |= origin->ds_phys->ds_flags & + dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags & (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); if (origin->ds_large_blocks) dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx); dmu_buf_will_dirty(origin->ds_dbuf, tx); - origin->ds_phys->ds_num_children++; + dsl_dataset_phys(origin)->ds_num_children++; VERIFY0(dsl_dataset_hold_obj(dp, - origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); + dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj, + FTAG, &ohds)); dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); dsl_dataset_rele(ohds, FTAG); if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { - if (origin->ds_phys->ds_next_clones_obj == 0) { - origin->ds_phys->ds_next_clones_obj = + if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) { + dsl_dataset_phys(origin)->ds_next_clones_obj = zap_create(mos, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(mos, - origin->ds_phys->ds_next_clones_obj, dsobj, tx)); + dsl_dataset_phys(origin)->ds_next_clones_obj, + dsobj, tx)); } dmu_buf_will_dirty(dd->dd_dbuf, tx); - dd->dd_phys->dd_origin_obj = origin->ds_object; + dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object; if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { - if (origin->ds_dir->dd_phys->dd_clones == 0) { + if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); - origin->ds_dir->dd_phys->dd_clones = + dsl_dir_phys(origin->ds_dir)->dd_clones = zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(mos, - origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); + dsl_dir_phys(origin->ds_dir)->dd_clones, + dsobj, tx)); } } @@ -759,7 +768,7 @@ dmu_buf_rele(dbuf, FTAG); dmu_buf_will_dirty(dd->dd_dbuf, tx); - dd->dd_phys->dd_head_dataset_obj = dsobj; + dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj; return (dsobj); } @@ -841,20 +850,20 @@ ASSERT(!dsl_dataset_is_snapshot(ds)); - if (ds->ds_phys->ds_prev_snap_obj != 0) - mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; + if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) + mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; else mrs_used = 0; dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); ASSERT3U(dlused, <=, mrs_used); - ds->ds_phys->ds_unique_bytes = - ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); + dsl_dataset_phys(ds)->ds_unique_bytes = + dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused); if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) - ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; + dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; } void @@ -865,8 +874,9 @@ uint64_t count; int err; - ASSERT(ds->ds_phys->ds_num_children >= 2); - err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); + ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2); + err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, + obj, tx); /* * The err should not be ENOENT, but a bug in a previous version * of the code could cause upgrade_clones_cb() to not set @@ -879,16 +889,16 @@ */ if (err != ENOENT) VERIFY0(err); - ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, + ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, &count)); - ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); + ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2); } blkptr_t * dsl_dataset_get_blkptr(dsl_dataset_t *ds) { - return (&ds->ds_phys->ds_bp); + return (&dsl_dataset_phys(ds)->ds_bp); } void @@ -900,7 +910,7 @@ tx->tx_pool->dp_meta_rootbp = *bp; } else { dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_bp = *bp; + dsl_dataset_phys(ds)->ds_bp = *bp; } } @@ -920,7 +930,7 @@ ASSERT(ds->ds_objset != NULL); - if (ds->ds_phys->ds_next_snap_obj != 0) + if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) panic("dirtying snapshot!"); dp = ds->ds_dir->dd_pool; @@ -956,7 +966,7 @@ * outside of the reservation. */ ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); - asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); + asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved); if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) return (SET_ERROR(ENOSPC)); @@ -993,7 +1003,7 @@ * We don't allow multiple snapshots of the same txg. If there * is already one, try again. */ - if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) + if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) return (SET_ERROR(EAGAIN)); /* @@ -1219,35 +1229,38 @@ dsphys->ds_fsid_guid = unique_create(); (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, sizeof (dsphys->ds_guid)); - dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; - dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; + dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; + dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; dsphys->ds_next_snap_obj = ds->ds_object; dsphys->ds_num_children = 1; dsphys->ds_creation_time = gethrestime_sec(); dsphys->ds_creation_txg = crtxg; - dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; - dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes; - dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; - dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; - dsphys->ds_flags = ds->ds_phys->ds_flags; - dsphys->ds_bp = ds->ds_phys->ds_bp; + dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; + dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes; + dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes; + dsphys->ds_uncompressed_bytes = + dsl_dataset_phys(ds)->ds_uncompressed_bytes; + dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags; + dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp; dmu_buf_rele(dbuf, FTAG); if (ds->ds_large_blocks) dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx); - ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); + ASSERT3U(ds->ds_prev != 0, ==, + dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); if (ds->ds_prev) { uint64_t next_clones_obj = - ds->ds_prev->ds_phys->ds_next_clones_obj; - ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == + dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj; + ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object || - ds->ds_prev->ds_phys->ds_num_children > 1); - if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { + dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1); + if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == + ds->ds_object) { dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); - ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, - ds->ds_prev->ds_phys->ds_creation_txg); - ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; + ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, + dsl_dataset_phys(ds->ds_prev)->ds_creation_txg); + dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj; } else if (next_clones_obj != 0) { dsl_dataset_remove_from_next_clones(ds->ds_prev, dsphys->ds_next_snap_obj, tx); @@ -1264,33 +1277,36 @@ if (ds->ds_reserved) { int64_t delta; ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); - delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); + delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, + ds->ds_reserved); dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); } dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, - UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); + dsl_dataset_phys(ds)->ds_deadlist_obj = + dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, + dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + dsl_deadlist_open(&ds->ds_deadlist, mos, + dsl_dataset_phys(ds)->ds_deadlist_obj); dsl_deadlist_add_key(&ds->ds_deadlist, - ds->ds_phys->ds_prev_snap_txg, tx); - - ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); - ds->ds_phys->ds_prev_snap_obj = dsobj; - ds->ds_phys->ds_prev_snap_txg = crtxg; - ds->ds_phys->ds_unique_bytes = 0; + dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); + + ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg); + dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj; + dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg; + dsl_dataset_phys(ds)->ds_unique_bytes = 0; if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) - ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; - - VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, + dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; + + VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj, snapname, 8, 1, &dsobj, tx)); if (ds->ds_prev) dsl_dataset_rele(ds->ds_prev, ds); VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); + dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); dsl_scan_ds_snapshotted(ds, tx); @@ -1499,14 +1515,14 @@ { ASSERT(dmu_tx_is_syncing(tx)); ASSERT(ds->ds_objset != NULL); - ASSERT(ds->ds_phys->ds_next_snap_obj == 0); + ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0); /* * in case we had to change ds_fsid_guid when we opened it, * sync it out now. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; + dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid; dmu_objset_sync(ds->ds_objset, zio, tx); @@ -1533,13 +1549,14 @@ * due to a bug in a previous version of the code. * Only trust it if it has the right number of entries. */ - if (ds->ds_phys->ds_next_clones_obj != 0) { - VERIFY0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, + if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { + VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, &count)); } - if (count != ds->ds_phys->ds_num_children - 1) + if (count != dsl_dataset_phys(ds)->ds_num_children - 1) goto fail; - for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); + for (zap_cursor_init(&zc, mos, + dsl_dataset_phys(ds)->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *clone; @@ -1566,18 +1583,18 @@ ASSERT(dsl_pool_config_held(dp)); - ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : - (ds->ds_phys->ds_uncompressed_bytes * 100 / - ds->ds_phys->ds_compressed_bytes); + ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 : + (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 / + dsl_dataset_phys(ds)->ds_compressed_bytes); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, - ds->ds_phys->ds_uncompressed_bytes); + dsl_dataset_phys(ds)->ds_uncompressed_bytes); if (dsl_dataset_is_snapshot(ds)) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - ds->ds_phys->ds_unique_bytes); + dsl_dataset_phys(ds)->ds_unique_bytes); get_clones_stat(ds, nv); } else { if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { @@ -1594,17 +1611,17 @@ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, - ds->ds_phys->ds_creation_time); + dsl_dataset_phys(ds)->ds_creation_time); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, - ds->ds_phys->ds_creation_txg); + dsl_dataset_phys(ds)->ds_creation_txg); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, ds->ds_quota); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, ds->ds_reserved); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, - ds->ds_phys->ds_guid); + dsl_dataset_phys(ds)->ds_guid); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, - ds->ds_phys->ds_unique_bytes); + dsl_dataset_phys(ds)->ds_unique_bytes); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, ds->ds_object); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, @@ -1612,13 +1629,13 @@ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, DS_IS_DEFER_DESTROY(ds) ? 1 : 0); - if (ds->ds_phys->ds_prev_snap_obj != 0) { + if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { uint64_t written, comp, uncomp; dsl_pool_t *dp = ds->ds_dir->dd_pool; dsl_dataset_t *prev; int err = dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); + dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); if (err == 0) { err = dsl_dataset_space_written(prev, ds, &written, &comp, &uncomp); @@ -1637,13 +1654,15 @@ dsl_pool_t *dp = ds->ds_dir->dd_pool; ASSERT(dsl_pool_config_held(dp)); - stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; - stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; - stat->dds_guid = ds->ds_phys->ds_guid; + stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg; + stat->dds_inconsistent = + dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT; + stat->dds_guid = dsl_dataset_phys(ds)->ds_guid; stat->dds_origin[0] = '\0'; if (dsl_dataset_is_snapshot(ds)) { stat->dds_is_snapshot = B_TRUE; - stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; + stat->dds_num_clones = + dsl_dataset_phys(ds)->ds_num_children - 1; } else { stat->dds_is_snapshot = B_FALSE; stat->dds_num_clones = 0; @@ -1652,7 +1671,8 @@ dsl_dataset_t *ods; VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); + dsl_dir_phys(ds->ds_dir)->dd_origin_obj, + FTAG, &ods)); dsl_dataset_name(ods, stat->dds_origin); dsl_dataset_rele(ods, FTAG); } @@ -1670,10 +1690,11 @@ uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp) { - *refdbytesp = ds->ds_phys->ds_referenced_bytes; + *refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes; *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); - if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) - *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; + if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) + *availbytesp += + ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; if (ds->ds_quota != 0) { /* * Adjust available bytes according to refquota @@ -1684,7 +1705,7 @@ else *availbytesp = 0; } - *usedobjsp = BP_GET_FILL(&ds->ds_phys->ds_bp); + *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp); *availobjsp = DN_MAX_OBJECT - *usedobjsp; } @@ -1696,8 +1717,8 @@ ASSERT(dsl_pool_config_held(dp)); if (snap == NULL) return (B_FALSE); - if (ds->ds_phys->ds_bp.blk_birth > - snap->ds_phys->ds_creation_txg) { + if (dsl_dataset_phys(ds)->ds_bp.blk_birth > + dsl_dataset_phys(snap)->ds_creation_txg) { objset_t *os, *os_snap; /* * It may be that only the ZIL differs, because it was @@ -1804,7 +1825,8 @@ mutex_enter(&ds->ds_lock); (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); mutex_exit(&ds->ds_lock); - VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, + VERIFY0(zap_add(dp->dp_meta_objset, + dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); dsl_dataset_rele(ds, FTAG); @@ -1903,7 +1925,7 @@ } /* must have a most recent snapshot */ - if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { + if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EINVAL)); } @@ -1922,7 +1944,7 @@ fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), zfs_prop_to_name(ZFS_PROP_CREATETXG)); uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); - if (createtxg > ds->ds_phys->ds_prev_snap_txg) { + if (createtxg > dsl_dataset_phys(ds)->ds_prev_snap_txg) { fnvlist_free(bookmarks); dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EEXIST)); @@ -1941,7 +1963,7 @@ * the refquota. */ if (ds->ds_quota != 0 && - ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { + dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(EDQUOT)); } @@ -1954,7 +1976,7 @@ * this space, but the freeing happens over many txg's. */ unused_refres_delta = (int64_t)MIN(ds->ds_reserved, - ds->ds_phys->ds_unique_bytes); + dsl_dataset_phys(ds)->ds_unique_bytes); if (unused_refres_delta > 0 && unused_refres_delta > @@ -2060,7 +2082,7 @@ hds = ddpa->ddpa_clone; - if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { + if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) { promote_rele(ddpa, FTAG); return (SET_ERROR(EXDEV)); } @@ -2079,9 +2101,10 @@ /* compute origin's new unique space */ snap = list_tail(&ddpa->clone_snaps); - ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); + ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, + origin_ds->ds_object); dsl_deadlist_space_range(&snap->ds->ds_deadlist, - origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, + dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX, &ddpa->unique, &unused, &unused); /* @@ -2100,9 +2123,9 @@ * uN + kN + kN-1 + ... + kM - uM-1 */ ss_mv_cnt = 0; - ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; - ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; - ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; + ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes; + ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes; + ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes; for (snap = list_head(&ddpa->shared_snaps); snap; snap = list_next(&ddpa->shared_snaps, snap)) { uint64_t val, dlused, dlcomp, dluncomp; @@ -2131,7 +2154,7 @@ goto out; /* The very first snapshot does not have a deadlist */ - if (ds->ds_phys->ds_prev_snap_obj == 0) + if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0) continue; dsl_deadlist_space(&ds->ds_deadlist, @@ -2146,10 +2169,13 @@ * so we need to subtract out the clone origin's used space. */ if (ddpa->origin_origin) { - ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; - ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; + ddpa->used -= + dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes; + ddpa->comp -= + dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes; ddpa->uncomp -= - ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; + dsl_dataset_phys(ddpa->origin_origin)-> + ds_uncompressed_bytes; } /* Check that there is enough space and limit headroom here */ @@ -2164,7 +2190,7 @@ * it is the amount of space that will be on all of their * deadlists (that was not born before their new origin). */ - if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) { uint64_t space; /* @@ -2186,9 +2212,11 @@ goto out; ddpa->cloneusedsnap += space; } - if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags & + DD_FLAG_USED_BREAKDOWN) { err = snaplist_space(&ddpa->origin_snaps, - origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); + dsl_dataset_phys(origin_ds)->ds_creation_txg, + &ddpa->originusedsnap); if (err != 0) goto out; } @@ -2215,7 +2243,7 @@ VERIFY0(promote_hold(ddpa, dp, FTAG)); hds = ddpa->ddpa_clone; - ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); + ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE); snap = list_head(&ddpa->shared_snaps); origin_ds = snap->ds; @@ -2233,47 +2261,49 @@ /* change origin's next snap */ dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); - oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; + oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; snap = list_tail(&ddpa->clone_snaps); - ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); - origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; + ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, + origin_ds->ds_object); + dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object; /* change the origin's next clone */ - if (origin_ds->ds_phys->ds_next_clones_obj) { + if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) { dsl_dataset_remove_from_next_clones(origin_ds, snap->ds->ds_object, tx); VERIFY0(zap_add_int(dp->dp_meta_objset, - origin_ds->ds_phys->ds_next_clones_obj, + dsl_dataset_phys(origin_ds)->ds_next_clones_obj, oldnext_obj, tx)); } /* change origin */ dmu_buf_will_dirty(dd->dd_dbuf, tx); - ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); - dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; + ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object); + dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj; dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; dmu_buf_will_dirty(odd->dd_dbuf, tx); - odd->dd_phys->dd_origin_obj = origin_ds->ds_object; + dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object; origin_head->ds_dir->dd_origin_txg = - origin_ds->ds_phys->ds_creation_txg; + dsl_dataset_phys(origin_ds)->ds_creation_txg; /* change dd_clone entries */ if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { VERIFY0(zap_remove_int(dp->dp_meta_objset, - odd->dd_phys->dd_clones, hds->ds_object, tx)); + dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx)); VERIFY0(zap_add_int(dp->dp_meta_objset, - ddpa->origin_origin->ds_dir->dd_phys->dd_clones, + dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, hds->ds_object, tx)); VERIFY0(zap_remove_int(dp->dp_meta_objset, - ddpa->origin_origin->ds_dir->dd_phys->dd_clones, + dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, origin_head->ds_object, tx)); - if (dd->dd_phys->dd_clones == 0) { - dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, - DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); + if (dsl_dir_phys(dd)->dd_clones == 0) { + dsl_dir_phys(dd)->dd_clones = + zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, + DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(dp->dp_meta_objset, - dd->dd_phys->dd_clones, origin_head->ds_object, tx)); + dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx)); } /* move snapshots to this dir */ @@ -2296,28 +2326,28 @@ VERIFY0(dsl_dataset_snap_remove(origin_head, ds->ds_snapname, tx, B_TRUE)); VERIFY0(zap_add(dp->dp_meta_objset, - hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, + dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); dsl_fs_ss_count_adjust(hds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); /* change containing dsl_dir */ dmu_buf_will_dirty(ds->ds_dbuf, tx); - ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); - ds->ds_phys->ds_dir_obj = dd->dd_object; + ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object); + dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object; ASSERT3P(ds->ds_dir, ==, odd); dsl_dir_rele(ds->ds_dir, ds); VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, NULL, ds, &ds->ds_dir)); /* move any clone references */ - if (ds->ds_phys->ds_next_clones_obj && + if (dsl_dataset_phys(ds)->ds_next_clones_obj && spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, dp->dp_meta_objset, - ds->ds_phys->ds_next_clones_obj); + dsl_dataset_phys(ds)->ds_next_clones_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *cnds; @@ -2333,12 +2363,13 @@ VERIFY0(dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &cnds)); - o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; + o = dsl_dir_phys(cnds->ds_dir)-> + dd_head_dataset_obj; VERIFY0(zap_remove_int(dp->dp_meta_objset, - odd->dd_phys->dd_clones, o, tx)); + dsl_dir_phys(odd)->dd_clones, o, tx)); VERIFY0(zap_add_int(dp->dp_meta_objset, - dd->dd_phys->dd_clones, o, tx)); + dsl_dir_phys(dd)->dd_clones, o, tx)); dsl_dataset_rele(cnds, FTAG); } zap_cursor_fini(&zc); @@ -2355,7 +2386,7 @@ */ delta = ddpa->cloneusedsnap - - dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; + dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]; ASSERT3S(delta, >=, 0); ASSERT3U(ddpa->used, >=, delta); dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); @@ -2363,14 +2394,14 @@ ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); delta = ddpa->originusedsnap - - odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; + dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP]; ASSERT3S(delta, <=, 0); ASSERT3U(ddpa->used, >=, -delta); dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); dsl_dir_diduse_space(odd, DD_USED_HEAD, -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); - origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; + dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique; /* log history record */ spa_history_log_internal_ds(hds, "promote", tx, ""); @@ -2405,12 +2436,12 @@ return (err); if (first_obj == 0) - first_obj = ds->ds_dir->dd_phys->dd_origin_obj; + first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj; snap = kmem_alloc(sizeof (*snap), KM_SLEEP); snap->ds = ds; list_insert_tail(l, snap); - obj = ds->ds_phys->ds_prev_snap_obj; + obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; } return (0); @@ -2466,7 +2497,7 @@ return (SET_ERROR(EINVAL)); } - error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, + error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj, &ddpa->shared_snaps, tag); if (error != 0) goto out; @@ -2477,16 +2508,16 @@ goto out; snap = list_head(&ddpa->shared_snaps); - ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); - error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, - snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, + ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj); + error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj, + dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj, &ddpa->origin_snaps, tag); if (error != 0) goto out; - if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { + if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) { error = dsl_dataset_hold_obj(dp, - snap->ds->ds_dir->dd_phys->dd_origin_obj, + dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj, tag, &ddpa->origin_origin); if (error != 0) goto out; @@ -2530,7 +2561,8 @@ if (error != 0) return (error); error = zap_count(dmu_objset_pool(os)->dp_meta_objset, - dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); + dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj, + &numsnaps); dmu_objset_rele(os, FTAG); if (error != 0) return (error); @@ -2581,9 +2613,9 @@ /* check amount of any unconsumed refreservation */ unused_refres_delta = (int64_t)MIN(origin_head->ds_reserved, - origin_head->ds_phys->ds_unique_bytes) - + dsl_dataset_phys(origin_head)->ds_unique_bytes) - (int64_t)MIN(origin_head->ds_reserved, - clone->ds_phys->ds_unique_bytes); + dsl_dataset_phys(clone)->ds_unique_bytes); if (unused_refres_delta > 0 && unused_refres_delta > @@ -2592,7 +2624,8 @@ /* clone can't be over the head's refquota */ if (origin_head->ds_quota != 0 && - clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) + dsl_dataset_phys(clone)->ds_referenced_bytes > + origin_head->ds_quota) return (SET_ERROR(EDQUOT)); return (0); @@ -2607,7 +2640,7 @@ ASSERT(clone->ds_reserved == 0); ASSERT(origin_head->ds_quota == 0 || - clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); + dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota); ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); dmu_buf_will_dirty(clone->ds_dbuf, tx); @@ -2625,9 +2658,9 @@ unused_refres_delta = (int64_t)MIN(origin_head->ds_reserved, - origin_head->ds_phys->ds_unique_bytes) - + dsl_dataset_phys(origin_head)->ds_unique_bytes) - (int64_t)MIN(origin_head->ds_reserved, - clone->ds_phys->ds_unique_bytes); + dsl_dataset_phys(clone)->ds_unique_bytes); /* * Reset origin's unique bytes, if it exists. @@ -2638,16 +2671,17 @@ dmu_buf_will_dirty(origin->ds_dbuf, tx); dsl_deadlist_space_range(&clone->ds_deadlist, - origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, - &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); + dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX, + &dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp); } /* swap blkptrs */ { blkptr_t tmp; - tmp = origin_head->ds_phys->ds_bp; - origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; - clone->ds_phys->ds_bp = tmp; + tmp = dsl_dataset_phys(origin_head)->ds_bp; + dsl_dataset_phys(origin_head)->ds_bp = + dsl_dataset_phys(clone)->ds_bp; + dsl_dataset_phys(clone)->ds_bp = tmp; } /* set dd_*_bytes */ @@ -2656,7 +2690,7 @@ uint64_t cdl_used, cdl_comp, cdl_uncomp; uint64_t odl_used, odl_comp, odl_uncomp; - ASSERT3U(clone->ds_dir->dd_phys-> + ASSERT3U(dsl_dir_phys(clone->ds_dir)-> dd_used_breakdown[DD_USED_SNAP], ==, 0); dsl_deadlist_space(&clone->ds_deadlist, @@ -2664,13 +2698,18 @@ dsl_deadlist_space(&origin_head->ds_deadlist, &odl_used, &odl_comp, &odl_uncomp); - dused = clone->ds_phys->ds_referenced_bytes + cdl_used - - (origin_head->ds_phys->ds_referenced_bytes + odl_used); - dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - - (origin_head->ds_phys->ds_compressed_bytes + odl_comp); - duncomp = clone->ds_phys->ds_uncompressed_bytes + + dused = dsl_dataset_phys(clone)->ds_referenced_bytes + + cdl_used - + (dsl_dataset_phys(origin_head)->ds_referenced_bytes + + odl_used); + dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes + + cdl_comp - + (dsl_dataset_phys(origin_head)->ds_compressed_bytes + + odl_comp); + duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes + cdl_uncomp - - (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); + (dsl_dataset_phys(origin_head)->ds_uncompressed_bytes + + odl_uncomp); dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, dused, dcomp, duncomp, tx); @@ -2694,14 +2733,14 @@ } /* swap ds_*_bytes */ - SWITCH64(origin_head->ds_phys->ds_referenced_bytes, - clone->ds_phys->ds_referenced_bytes); - SWITCH64(origin_head->ds_phys->ds_compressed_bytes, - clone->ds_phys->ds_compressed_bytes); - SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, - clone->ds_phys->ds_uncompressed_bytes); - SWITCH64(origin_head->ds_phys->ds_unique_bytes, - clone->ds_phys->ds_unique_bytes); + SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes, + dsl_dataset_phys(clone)->ds_referenced_bytes); + SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes, + dsl_dataset_phys(clone)->ds_compressed_bytes); + SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes, + dsl_dataset_phys(clone)->ds_uncompressed_bytes); + SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes, + dsl_dataset_phys(clone)->ds_unique_bytes); /* apply any parent delta for change in unconsumed refreservation */ dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, @@ -2712,12 +2751,12 @@ */ dsl_deadlist_close(&clone->ds_deadlist); dsl_deadlist_close(&origin_head->ds_deadlist); - SWITCH64(origin_head->ds_phys->ds_deadlist_obj, - clone->ds_phys->ds_deadlist_obj); + SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj, + dsl_dataset_phys(clone)->ds_deadlist_obj); dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, - clone->ds_phys->ds_deadlist_obj); + dsl_dataset_phys(clone)->ds_deadlist_obj); dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, - origin_head->ds_phys->ds_deadlist_obj); + dsl_dataset_phys(origin_head)->ds_deadlist_obj); dsl_scan_ds_clone_swapped(origin_head, clone, tx); @@ -2768,10 +2807,11 @@ /* * Make a space adjustment for reserved bytes. */ - if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { + if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { ASSERT3U(*used, >=, - ds->ds_reserved - ds->ds_phys->ds_unique_bytes); - *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); + ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); + *used -= + (ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); *ref_rsrv = asize - MIN(asize, parent_delta(ds, asize + inflight)); } @@ -2786,9 +2826,10 @@ * on-disk is over quota and there are no pending changes (which * may free up space for us). */ - if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { + if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >= + ds->ds_quota) { if (inflight > 0 || - ds->ds_phys->ds_referenced_bytes < ds->ds_quota) + dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota) error = SET_ERROR(ERESTART); else error = SET_ERROR(EDQUOT); @@ -2840,7 +2881,7 @@ return (0); } - if (newval < ds->ds_phys->ds_referenced_bytes || + if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes || newval < ds->ds_reserved) { dsl_dataset_rele(ds, FTAG); return (SET_ERROR(ENOSPC)); @@ -2930,7 +2971,7 @@ mutex_enter(&ds->ds_lock); if (!DS_UNIQUE_IS_ACCURATE(ds)) dsl_dataset_recalc_head_uniq(ds); - unique = ds->ds_phys->ds_unique_bytes; + unique = dsl_dataset_phys(ds)->ds_unique_bytes; mutex_exit(&ds->ds_lock); if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { @@ -2967,7 +3008,7 @@ mutex_enter(&ds->ds_dir->dd_lock); mutex_enter(&ds->ds_lock); ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); - unique = ds->ds_phys->ds_unique_bytes; + unique = dsl_dataset_phys(ds)->ds_unique_bytes; delta = MAX(0, (int64_t)(newval - unique)) - MAX(0, (int64_t)(ds->ds_reserved - unique)); ds->ds_reserved = newval; @@ -3033,16 +3074,16 @@ ASSERT(dsl_pool_config_held(dp)); *usedp = 0; - *usedp += new->ds_phys->ds_referenced_bytes; - *usedp -= oldsnap->ds_phys->ds_referenced_bytes; + *usedp += dsl_dataset_phys(new)->ds_referenced_bytes; + *usedp -= dsl_dataset_phys(oldsnap)->ds_referenced_bytes; *compp = 0; - *compp += new->ds_phys->ds_compressed_bytes; - *compp -= oldsnap->ds_phys->ds_compressed_bytes; + *compp += dsl_dataset_phys(new)->ds_compressed_bytes; + *compp -= dsl_dataset_phys(oldsnap)->ds_compressed_bytes; *uncompp = 0; - *uncompp += new->ds_phys->ds_uncompressed_bytes; - *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; + *uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes; + *uncompp -= dsl_dataset_phys(oldsnap)->ds_uncompressed_bytes; snapobj = new->ds_object; while (snapobj != oldsnap->ds_object) { @@ -3057,8 +3098,8 @@ break; } - if (snap->ds_phys->ds_prev_snap_txg == - oldsnap->ds_phys->ds_creation_txg) { + if (dsl_dataset_phys(snap)->ds_prev_snap_txg == + dsl_dataset_phys(oldsnap)->ds_creation_txg) { /* * The blocks in the deadlist can not be born after * ds_prev_snap_txg, so get the whole deadlist space, @@ -3071,7 +3112,7 @@ &used, &comp, &uncomp); } else { dsl_deadlist_space_range(&snap->ds_deadlist, - 0, oldsnap->ds_phys->ds_creation_txg, + 0, dsl_dataset_phys(oldsnap)->ds_creation_txg, &used, &comp, &uncomp); } *usedp += used; @@ -3083,7 +3124,7 @@ * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap * was not a snapshot of/before new. */ - snapobj = snap->ds_phys->ds_prev_snap_obj; + snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj; if (snap != new) dsl_dataset_rele(snap, FTAG); if (snapobj == 0) { @@ -3127,13 +3168,13 @@ * is before lastsnap. */ if (firstsnap->ds_dir != lastsnap->ds_dir || - firstsnap->ds_phys->ds_creation_txg > - lastsnap->ds_phys->ds_creation_txg) + dsl_dataset_phys(firstsnap)->ds_creation_txg > + dsl_dataset_phys(lastsnap)->ds_creation_txg) return (SET_ERROR(EINVAL)); *usedp = *compp = *uncompp = 0; - snapobj = lastsnap->ds_phys->ds_next_snap_obj; + snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj; while (snapobj != firstsnap->ds_object) { dsl_dataset_t *ds; uint64_t used, comp, uncomp; @@ -3143,13 +3184,13 @@ break; dsl_deadlist_space_range(&ds->ds_deadlist, - firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, + dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX, &used, &comp, &uncomp); *usedp += used; *compp += comp; *uncompp += uncomp; - snapobj = ds->ds_phys->ds_prev_snap_obj; + snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj; ASSERT3U(snapobj, !=, 0); dsl_dataset_rele(ds, FTAG); } @@ -3248,10 +3289,10 @@ ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); if (earlier_txg == 0) - earlier_txg = earlier->ds_phys->ds_creation_txg; + earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; if (dsl_dataset_is_snapshot(later) && - earlier_txg >= later->ds_phys->ds_creation_txg) + earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) return (B_FALSE); if (later->ds_dir == earlier->ds_dir) @@ -3259,11 +3300,11 @@ if (!dsl_dir_is_clone(later->ds_dir)) return (B_FALSE); - if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) + if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object) return (B_TRUE); dsl_dataset_t *origin; error = dsl_dataset_hold_obj(dp, - later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); + dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin); if (error != 0) return (B_FALSE); ret = dsl_dataset_is_before(origin, earlier, earlier_txg); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_deadlist.c --- a/usr/src/uts/common/fs/zfs/dsl_deadlist.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_deadlist.c Wed Dec 10 08:46:44 2014 -0800 @@ -308,8 +308,9 @@ while (mrs_obj != 0) { dsl_dataset_t *ds; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds)); - dsl_deadlist_add_key(&dl, ds->ds_phys->ds_prev_snap_txg, tx); - mrs_obj = ds->ds_phys->ds_prev_snap_obj; + dsl_deadlist_add_key(&dl, + dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); + mrs_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; dsl_dataset_rele(ds, FTAG); } dsl_deadlist_close(&dl); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_deleg.c --- a/usr/src/uts/common/fs/zfs/dsl_deleg.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c Wed Dec 10 08:46:44 2014 -0800 @@ -164,10 +164,10 @@ VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL)); - zapobj = dd->dd_phys->dd_deleg_zapobj; + zapobj = dsl_dir_phys(dd)->dd_deleg_zapobj; if (zapobj == 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); - zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, + zapobj = dsl_dir_phys(dd)->dd_deleg_zapobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); } @@ -208,7 +208,7 @@ uint64_t zapobj; VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL)); - zapobj = dd->dd_phys->dd_deleg_zapobj; + zapobj = dsl_dir_phys(dd)->dd_deleg_zapobj; if (zapobj == 0) { dsl_dir_rele(dd, FTAG); return; @@ -332,14 +332,14 @@ uint64_t n; char source[MAXNAMELEN]; - if (dd->dd_phys->dd_deleg_zapobj == 0 || - zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) != 0 || - n == 0) + if (dsl_dir_phys(dd)->dd_deleg_zapobj == 0 || + zap_count(mos, + dsl_dir_phys(dd)->dd_deleg_zapobj, &n) != 0 || n == 0) continue; sp_nvp = fnvlist_alloc(); for (zap_cursor_init(&basezc, mos, - dd->dd_phys->dd_deleg_zapobj); + dsl_dir_phys(dd)->dd_deleg_zapobj); zap_cursor_retrieve(&basezc, &baseza) == 0; zap_cursor_advance(&basezc)) { zap_cursor_t zc; @@ -594,7 +594,7 @@ if (!zoned) break; } - zapobj = dd->dd_phys->dd_deleg_zapobj; + zapobj = dsl_dir_phys(dd)->dd_deleg_zapobj; if (zapobj == 0) continue; @@ -673,7 +673,7 @@ { objset_t *mos = dd->dd_pool->dp_meta_objset; uint64_t jumpobj, pjumpobj; - uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj; + uint64_t zapobj = dsl_dir_phys(dd)->dd_deleg_zapobj; zap_cursor_t zc; zap_attribute_t za; char whokey[ZFS_MAX_DELEG_NAME]; @@ -686,7 +686,7 @@ if (zapobj == 0) { dmu_buf_will_dirty(dd->dd_dbuf, tx); - zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos, + zapobj = dsl_dir_phys(dd)->dd_deleg_zapobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx); } @@ -724,7 +724,7 @@ return; for (dd = sdd->dd_parent; dd != NULL; dd = dd->dd_parent) { - uint64_t pzapobj = dd->dd_phys->dd_deleg_zapobj; + uint64_t pzapobj = dsl_dir_phys(dd)->dd_deleg_zapobj; if (pzapobj == 0) continue; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_destroy.c --- a/usr/src/uts/common/fs/zfs/dsl_destroy.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_destroy.c Wed Dec 10 08:46:44 2014 -0800 @@ -78,7 +78,7 @@ /* * Can't delete a branch point. */ - if (ds->ds_phys->ds_num_children > 1) + if (dsl_dataset_phys(ds)->ds_num_children > 1) return (SET_ERROR(EEXIST)); return (0); @@ -147,12 +147,12 @@ ASSERT(!BP_IS_HOLE(bp)); - if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { + if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) { dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); if (poa->ds_prev && !poa->after_branch_point && bp->blk_birth > - poa->ds_prev->ds_phys->ds_prev_snap_txg) { - poa->ds_prev->ds_phys->ds_unique_bytes += + dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) { + dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes += bp_get_dsize_sync(dp->dp_spa, bp); } } else { @@ -183,7 +183,7 @@ VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, process_old_cb, &poa, tx)); VERIFY0(zio_wait(poa.pio)); - ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); + ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes); /* change snapused */ dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, @@ -192,12 +192,14 @@ /* swap next's deadlist to our deadlist */ dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_close(&ds_next->ds_deadlist); - deadlist_obj = ds->ds_phys->ds_deadlist_obj; - ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj; - ds_next->ds_phys->ds_deadlist_obj = deadlist_obj; - dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; + dsl_dataset_phys(ds)->ds_deadlist_obj = + dsl_dataset_phys(ds_next)->ds_deadlist_obj; + dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj; + dsl_deadlist_open(&ds->ds_deadlist, mos, + dsl_dataset_phys(ds)->ds_deadlist_obj); dsl_deadlist_open(&ds_next->ds_deadlist, mos, - ds_next->ds_phys->ds_deadlist_obj); + dsl_dataset_phys(ds_next)->ds_deadlist_obj); } static void @@ -212,10 +214,10 @@ * find the clones, but dsl_deadlist_remove_key() is a no-op so it * doesn't matter. */ - if (ds->ds_dir->dd_phys->dd_clones == 0) + if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0) return; - for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); + for (zap_cursor_init(&zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { dsl_dataset_t *clone; @@ -243,19 +245,20 @@ uint64_t obj; ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); - ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); + ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); ASSERT(refcount_is_zero(&ds->ds_longholds)); if (defer && - (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) { + (ds->ds_userrefs > 0 || + dsl_dataset_phys(ds)->ds_num_children > 1)) { ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; + dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY; spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); return; } - ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); /* We need to log before removing it from the namespace. */ spa_history_log_internal_ds(ds, "destroy", tx, ""); @@ -268,26 +271,28 @@ ASSERT0(zap_contains(mos, obj, DS_FIELD_LARGE_BLOCKS)); spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx); } - if (ds->ds_phys->ds_prev_snap_obj != 0) { + if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { ASSERT3P(ds->ds_prev, ==, NULL); VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); + dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev)); after_branch_point = - (ds_prev->ds_phys->ds_next_snap_obj != obj); + (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj); dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); if (after_branch_point && - ds_prev->ds_phys->ds_next_clones_obj != 0) { + dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) { dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); - if (ds->ds_phys->ds_next_snap_obj != 0) { + if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) { VERIFY0(zap_add_int(mos, - ds_prev->ds_phys->ds_next_clones_obj, - ds->ds_phys->ds_next_snap_obj, tx)); + dsl_dataset_phys(ds_prev)-> + ds_next_clones_obj, + dsl_dataset_phys(ds)->ds_next_snap_obj, + tx)); } } if (!after_branch_point) { - ds_prev->ds_phys->ds_next_snap_obj = - ds->ds_phys->ds_next_snap_obj; + dsl_dataset_phys(ds_prev)->ds_next_snap_obj = + dsl_dataset_phys(ds)->ds_next_snap_obj; } } @@ -296,18 +301,18 @@ uint64_t used = 0, comp = 0, uncomp = 0; VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); - ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); + dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next)); + ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj); - old_unique = ds_next->ds_phys->ds_unique_bytes; + old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes; dmu_buf_will_dirty(ds_next->ds_dbuf, tx); - ds_next->ds_phys->ds_prev_snap_obj = - ds->ds_phys->ds_prev_snap_obj; - ds_next->ds_phys->ds_prev_snap_txg = - ds->ds_phys->ds_prev_snap_txg; - ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, - ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); + dsl_dataset_phys(ds_next)->ds_prev_snap_obj = + dsl_dataset_phys(ds)->ds_prev_snap_obj; + dsl_dataset_phys(ds_next)->ds_prev_snap_txg = + dsl_dataset_phys(ds)->ds_prev_snap_txg; + ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, + ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0); if (ds_next->ds_deadlist.dl_oldfmt) { process_old_deadlist(ds, ds_prev, ds_next, @@ -316,38 +321,38 @@ /* Adjust prev's unique space. */ if (ds_prev && !after_branch_point) { dsl_deadlist_space_range(&ds_next->ds_deadlist, - ds_prev->ds_phys->ds_prev_snap_txg, - ds->ds_phys->ds_prev_snap_txg, + dsl_dataset_phys(ds_prev)->ds_prev_snap_txg, + dsl_dataset_phys(ds)->ds_prev_snap_txg, &used, &comp, &uncomp); - ds_prev->ds_phys->ds_unique_bytes += used; + dsl_dataset_phys(ds_prev)->ds_unique_bytes += used; } /* Adjust snapused. */ dsl_deadlist_space_range(&ds_next->ds_deadlist, - ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, + dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX, &used, &comp, &uncomp); dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, -used, -comp, -uncomp, tx); /* Move blocks to be freed to pool's free list. */ dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, - &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, + &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD, used, comp, uncomp, tx); /* Merge our deadlist into next's and free it. */ dsl_deadlist_merge(&ds_next->ds_deadlist, - ds->ds_phys->ds_deadlist_obj, tx); + dsl_dataset_phys(ds)->ds_deadlist_obj, tx); } dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_deadlist_obj = 0; + dsl_dataset_phys(ds)->ds_deadlist_obj = 0; /* Collapse range in clone heads */ dsl_dataset_remove_clones_key(ds, - ds->ds_phys->ds_creation_txg, tx); + dsl_dataset_phys(ds)->ds_creation_txg, tx); if (dsl_dataset_is_snapshot(ds_next)) { dsl_dataset_t *ds_nextnext; @@ -362,21 +367,22 @@ * deadlist). */ VERIFY0(dsl_dataset_hold_obj(dp, - ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext)); + dsl_dataset_phys(ds_next)->ds_next_snap_obj, + FTAG, &ds_nextnext)); dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, - ds->ds_phys->ds_prev_snap_txg, - ds->ds_phys->ds_creation_txg, + dsl_dataset_phys(ds)->ds_prev_snap_txg, + dsl_dataset_phys(ds)->ds_creation_txg, &used, &comp, &uncomp); - ds_next->ds_phys->ds_unique_bytes += used; + dsl_dataset_phys(ds_next)->ds_unique_bytes += used; dsl_dataset_rele(ds_nextnext, FTAG); ASSERT3P(ds_next->ds_prev, ==, NULL); /* Collapse range in this head. */ dsl_dataset_t *hds; VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds)); + dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds)); dsl_deadlist_remove_key(&hds->ds_deadlist, - ds->ds_phys->ds_creation_txg, tx); + dsl_dataset_phys(ds)->ds_creation_txg, tx); dsl_dataset_rele(hds, FTAG); } else { @@ -385,7 +391,7 @@ ds_next->ds_prev = NULL; if (ds_prev) { VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, + dsl_dataset_phys(ds)->ds_prev_snap_obj, ds_next, &ds_next->ds_prev)); } @@ -399,7 +405,7 @@ if (old_unique < ds_next->ds_reserved) { int64_t mrsdelta; uint64_t new_unique = - ds_next->ds_phys->ds_unique_bytes; + dsl_dataset_phys(ds_next)->ds_unique_bytes; ASSERT(old_unique <= new_unique); mrsdelta = MIN(new_unique - old_unique, @@ -421,9 +427,9 @@ /* remove from snapshot namespace */ dsl_dataset_t *ds_head; - ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); + ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0); VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); + dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head)); VERIFY0(dsl_dataset_get_snapname(ds)); #ifdef ZFS_DEBUG { @@ -443,17 +449,20 @@ spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); - if (ds->ds_phys->ds_next_clones_obj != 0) { + if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { uint64_t count; ASSERT0(zap_count(mos, - ds->ds_phys->ds_next_clones_obj, &count) && count == 0); + dsl_dataset_phys(ds)->ds_next_clones_obj, &count) && + count == 0); VERIFY0(dmu_object_free(mos, - ds->ds_phys->ds_next_clones_obj, tx)); + dsl_dataset_phys(ds)->ds_next_clones_obj, tx)); } - if (ds->ds_phys->ds_props_obj != 0) - VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); - if (ds->ds_phys->ds_userrefs_obj != 0) - VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); + if (dsl_dataset_phys(ds)->ds_props_obj != 0) + VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj, + tx)); + if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0) + VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj, + tx)); dsl_dir_rele(ds->ds_dir, ds); ds->ds_dir = NULL; dmu_object_free_zapified(mos, obj, tx); @@ -555,7 +564,8 @@ dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); } else { ASSERT(zilog == NULL); - ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); + ASSERT3U(bp->blk_birth, >, + dsl_dataset_phys(ka->ds)->ds_prev_snap_txg); (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); } @@ -577,9 +587,10 @@ ka.ds = ds; ka.tx = tx; VERIFY0(traverse_dataset(ds, - ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, + dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST, kill_blkptr, &ka)); - ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); + ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || + dsl_dataset_phys(ds)->ds_unique_bytes == 0); } typedef struct dsl_destroy_head_arg { @@ -608,21 +619,21 @@ * from.) */ if (ds->ds_prev != NULL && - ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) + dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object) return (SET_ERROR(EBUSY)); /* * Can't delete if there are children of this fs. */ error = zap_count(mos, - ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); + dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count); if (error != 0) return (error); if (count != 0) return (SET_ERROR(EEXIST)); if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && - ds->ds_prev->ds_phys->ds_num_children == 2 && + dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && ds->ds_prev->ds_userrefs == 0) { /* We need to remove the origin snapshot as well. */ if (!refcount_is_zero(&ds->ds_prev->ds_longholds)) @@ -660,7 +671,7 @@ VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); - ASSERT0(dd->dd_phys->dd_head_dataset_obj); + ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj); /* * Decrement the filesystem count for all parent filesystems. @@ -679,16 +690,17 @@ */ dsl_dir_set_reservation_sync_impl(dd, 0, tx); - ASSERT0(dd->dd_phys->dd_used_bytes); - ASSERT0(dd->dd_phys->dd_reserved); + ASSERT0(dsl_dir_phys(dd)->dd_used_bytes); + ASSERT0(dsl_dir_phys(dd)->dd_reserved); for (t = 0; t < DD_USED_NUM; t++) - ASSERT0(dd->dd_phys->dd_used_breakdown[t]); + ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]); - VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); - VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); - VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); + VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); + VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); + VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx)); VERIFY0(zap_remove(mos, - dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); + dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj, + dd->dd_myname, tx)); dsl_dir_rele(dd, FTAG); dmu_object_free_zapified(mos, ddobj, tx); @@ -702,10 +714,10 @@ uint64_t obj, ddobj, prevobj = 0; boolean_t rmorigin; - ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); ASSERT(ds->ds_prev == NULL || - ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); - ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); + dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object); + ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); /* We need to log before removing it from the namespace. */ @@ -713,7 +725,7 @@ rmorigin = (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && - ds->ds_prev->ds_phys->ds_num_children == 2 && + dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && ds->ds_prev->ds_userrefs == 0); /* Remove our reservation. */ @@ -731,20 +743,21 @@ obj = ds->ds_object; - if (ds->ds_phys->ds_prev_snap_obj != 0) { + if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { /* This is a clone */ ASSERT(ds->ds_prev != NULL); - ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj); - ASSERT0(ds->ds_phys->ds_next_snap_obj); + ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=, + obj); + ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj); dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); - if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) { + if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) { dsl_dataset_remove_from_next_clones(ds->ds_prev, obj, tx); } - ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1); - ds->ds_prev->ds_phys->ds_num_children--; + ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1); + dsl_dataset_phys(ds->ds_prev)->ds_num_children--; } /* @@ -753,9 +766,9 @@ * safe to ignore the deadlist contents.) */ dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); + dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_deadlist_obj = 0; + dsl_dataset_phys(ds)->ds_deadlist_obj = 0; objset_t *os; VERIFY0(dmu_objset_from_ds(ds, &os)); @@ -785,15 +798,16 @@ scn->scn_async_destroying = B_TRUE; } - used = ds->ds_dir->dd_phys->dd_used_bytes; - comp = ds->ds_dir->dd_phys->dd_compressed_bytes; - uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes; + used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes; + comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes; + uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes; ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || - ds->ds_phys->ds_unique_bytes == used); + dsl_dataset_phys(ds)->ds_unique_bytes == used); bptree_add(mos, dp->dp_bptree_obj, - &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, + &dsl_dataset_phys(ds)->ds_bp, + dsl_dataset_phys(ds)->ds_prev_snap_txg, used, comp, uncomp, tx); dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, -used, -comp, -uncomp, tx); @@ -804,7 +818,7 @@ if (ds->ds_prev != NULL) { if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { VERIFY0(zap_remove_int(mos, - ds->ds_prev->ds_dir->dd_phys->dd_clones, + dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones, ds->ds_object, tx)); } prevobj = ds->ds_prev->ds_object; @@ -823,22 +837,22 @@ /* Erase the link in the dir */ dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); - ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; + dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0; ddobj = ds->ds_dir->dd_object; - ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); - VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx)); + ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0); + VERIFY0(zap_destroy(mos, + dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx)); if (ds->ds_bookmarks != 0) { - VERIFY0(zap_destroy(mos, - ds->ds_bookmarks, tx)); + VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx)); spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx); } spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); - ASSERT0(ds->ds_phys->ds_next_clones_obj); - ASSERT0(ds->ds_phys->ds_props_obj); - ASSERT0(ds->ds_phys->ds_userrefs_obj); + ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj); + ASSERT0(dsl_dataset_phys(ds)->ds_props_obj); + ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj); dsl_dir_rele(ds->ds_dir, ds); ds->ds_dir = NULL; dmu_object_free_zapified(mos, obj, tx); @@ -876,7 +890,7 @@ /* Mark it as inconsistent on-disk, in case we crash */ dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; + dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; spa_history_log_internal_ds(ds, "destroy begin", tx, ""); dsl_dataset_rele(ds, FTAG); @@ -919,7 +933,8 @@ error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); if (error == 0) { uint64_t prev_snap_txg = - dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg; + dsl_dataset_phys(dmu_objset_ds(os))-> + ds_prev_snap_txg; for (uint64_t obj = 0; error == 0; error = dmu_object_next(os, &obj, FALSE, prev_snap_txg)) diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_dir.c --- a/usr/src/uts/common/fs/zfs/dsl_dir.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c Wed Dec 10 08:46:44 2014 -0800 @@ -121,6 +121,8 @@ * such as those created by zfs diff. */ +extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd); + static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); /* ARGSUSED */ @@ -180,7 +182,6 @@ dd->dd_object = ddobj; dd->dd_dbuf = dbuf; dd->dd_pool = dp; - dd->dd_phys = dbuf->db_data; mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), @@ -188,9 +189,10 @@ dsl_dir_snap_cmtime_update(dd); - if (dd->dd_phys->dd_parent_obj) { - err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj, - NULL, dd, &dd->dd_parent); + if (dsl_dir_phys(dd)->dd_parent_obj) { + err = dsl_dir_hold_obj(dp, + dsl_dir_phys(dd)->dd_parent_obj, NULL, dd, + &dd->dd_parent); if (err != 0) goto errout; if (tail) { @@ -198,14 +200,16 @@ uint64_t foundobj; err = zap_lookup(dp->dp_meta_objset, - dd->dd_parent->dd_phys->dd_child_dir_zapobj, - tail, sizeof (foundobj), 1, &foundobj); + dsl_dir_phys(dd->dd_parent)-> + dd_child_dir_zapobj, tail, + sizeof (foundobj), 1, &foundobj); ASSERT(err || foundobj == ddobj); #endif (void) strcpy(dd->dd_myname, tail); } else { err = zap_value_search(dp->dp_meta_objset, - dd->dd_parent->dd_phys->dd_child_dir_zapobj, + dsl_dir_phys(dd->dd_parent)-> + dd_child_dir_zapobj, ddobj, 0, dd->dd_myname); } if (err != 0) @@ -224,7 +228,8 @@ * Just look at its phys directly instead. */ err = dmu_bonus_hold(dp->dp_meta_objset, - dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus); + dsl_dir_phys(dd)->dd_origin_obj, FTAG, + &origin_bonus); if (err != 0) goto errout; origin_phys = origin_bonus->db_data; @@ -233,8 +238,7 @@ dmu_buf_rele(origin_bonus, FTAG); } - winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, - dsl_dir_evict); + winner = dmu_buf_set_user_ie(dbuf, dd, dsl_dir_evict); if (winner) { if (dd->dd_parent) dsl_dir_rele(dd->dd_parent, dd); @@ -417,10 +421,10 @@ if (next[0] == '@') break; dprintf("looking up %s in obj%lld\n", - buf, dd->dd_phys->dd_child_dir_zapobj); + buf, dsl_dir_phys(dd)->dd_child_dir_zapobj); err = zap_lookup(dp->dp_meta_objset, - dd->dd_phys->dd_child_dir_zapobj, + dsl_dir_phys(dd)->dd_child_dir_zapobj, buf, sizeof (ddobj), 1, &ddobj); if (err != 0) { if (err == ENOENT) @@ -501,7 +505,7 @@ za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* Iterate my child dirs */ - for (zap_cursor_init(zc, os, dd->dd_phys->dd_child_dir_zapobj); + for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { dsl_dir_t *chld_dd; uint64_t count; @@ -535,9 +539,9 @@ zap_cursor_fini(zc); /* Count my snapshots (we counted children's snapshots above) */ VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_head_dataset_obj, FTAG, &ds)); + dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds)); - for (zap_cursor_init(zc, os, ds->ds_phys->ds_snapnames_zapobj); + for (zap_cursor_init(zc, os, dsl_dataset_phys(ds)->ds_snapnames_zapobj); zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { /* Don't count temporary snapshots */ @@ -686,7 +690,7 @@ return (ENFORCE_NEVER); #endif - if ((obj = dd->dd_phys->dd_head_dataset_obj) == 0) + if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0) return (ENFORCE_ALWAYS); ASSERT(dsl_pool_config_held(dd->dd_pool)); @@ -864,7 +868,7 @@ ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); if (pds) { - VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, + VERIFY(0 == zap_add(mos, dsl_dir_phys(pds)->dd_child_dir_zapobj, name, sizeof (uint64_t), 1, &ddobj, tx)); } else { /* it's the root dir */ @@ -896,9 +900,9 @@ boolean_t dsl_dir_is_clone(dsl_dir_t *dd) { - return (dd->dd_phys->dd_origin_obj && + return (dsl_dir_phys(dd)->dd_origin_obj && (dd->dd_pool->dp_origin_snap == NULL || - dd->dd_phys->dd_origin_obj != + dsl_dir_phys(dd)->dd_origin_obj != dd->dd_pool->dp_origin_snap->ds_object)); } @@ -907,26 +911,27 @@ { mutex_enter(&dd->dd_lock); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, - dd->dd_phys->dd_used_bytes); - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota); + dsl_dir_phys(dd)->dd_used_bytes); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, + dsl_dir_phys(dd)->dd_quota); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, - dd->dd_phys->dd_reserved); + dsl_dir_phys(dd)->dd_reserved); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, - dd->dd_phys->dd_compressed_bytes == 0 ? 100 : - (dd->dd_phys->dd_uncompressed_bytes * 100 / - dd->dd_phys->dd_compressed_bytes)); + dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 : + (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 / + dsl_dir_phys(dd)->dd_compressed_bytes)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED, - dd->dd_phys->dd_uncompressed_bytes); - if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + dsl_dir_phys(dd)->dd_uncompressed_bytes); + if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) { dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, - dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]); + dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS, - dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]); + dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV, - dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]); + dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD, - dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] + - dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]); + dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] + + dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]); } mutex_exit(&dd->dd_lock); @@ -951,7 +956,7 @@ char buf[MAXNAMELEN]; VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_origin_obj, FTAG, &ds)); + dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds)); dsl_dataset_name(ds, buf); dsl_dataset_rele(ds, FTAG); dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); @@ -963,7 +968,7 @@ { dsl_pool_t *dp = dd->dd_pool; - ASSERT(dd->dd_phys); + ASSERT(dsl_dir_phys(dd)); if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) { /* up the hold count until we can be written out */ @@ -974,8 +979,9 @@ static int64_t parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) { - uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); - uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); + uint64_t old_accounted = MAX(used, dsl_dir_phys(dd)->dd_reserved); + uint64_t new_accounted = + MAX(used + delta, dsl_dir_phys(dd)->dd_reserved); return (new_accounted - old_accounted); } @@ -1034,9 +1040,9 @@ } mutex_enter(&dd->dd_lock); - if (dd->dd_phys->dd_quota != 0) - quota = dd->dd_phys->dd_quota; - used = dd->dd_phys->dd_used_bytes; + if (dsl_dir_phys(dd)->dd_quota != 0) + quota = dsl_dir_phys(dd)->dd_quota; + used = dsl_dir_phys(dd)->dd_used_bytes; if (!ondiskonly) used += dsl_dir_space_towrite(dd); @@ -1045,12 +1051,12 @@ quota = MIN(quota, poolsize); } - if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { + if (dsl_dir_phys(dd)->dd_reserved > used && parentspace != UINT64_MAX) { /* * We have some space reserved, in addition to what our * parent gave us. */ - parentspace += dd->dd_phys->dd_reserved - used; + parentspace += dsl_dir_phys(dd)->dd_reserved - used; } if (dd == ancestor) { @@ -1109,7 +1115,7 @@ est_inflight = dsl_dir_space_towrite(dd); for (i = 0; i < TXG_SIZE; i++) est_inflight += dd->dd_tempreserved[i]; - used_on_disk = dd->dd_phys->dd_used_bytes; + used_on_disk = dsl_dir_phys(dd)->dd_used_bytes; /* * On the first iteration, fetch the dataset's used-on-disk and @@ -1132,10 +1138,10 @@ * If this transaction will result in a net free of space, * we want to let it through. */ - if (ignorequota || netfree || dd->dd_phys->dd_quota == 0) + if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0) quota = UINT64_MAX; else - quota = dd->dd_phys->dd_quota; + quota = dsl_dir_phys(dd)->dd_quota; /* * Adjust the quota against the actual pool size at the root @@ -1189,7 +1195,7 @@ /* see if it's OK with our parent */ if (dd->dd_parent && parent_rsrv) { - boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0); + boolean_t ismos = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0); return (dsl_dir_tempreserve_impl(dd->dd_parent, parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE)); @@ -1308,7 +1314,7 @@ if (space > 0) dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; - est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes; + est_used = dsl_dir_space_towrite(dd) + dsl_dir_phys(dd)->dd_used_bytes; parent_space = parent_delta(dd, est_used, space); mutex_exit(&dd->dd_lock); @@ -1343,26 +1349,27 @@ if (needlock) mutex_enter(&dd->dd_lock); - accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used); - ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used); + accounted_delta = + parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, used); + ASSERT(used >= 0 || dsl_dir_phys(dd)->dd_used_bytes >= -used); ASSERT(compressed >= 0 || - dd->dd_phys->dd_compressed_bytes >= -compressed); + dsl_dir_phys(dd)->dd_compressed_bytes >= -compressed); ASSERT(uncompressed >= 0 || - dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); - dd->dd_phys->dd_used_bytes += used; - dd->dd_phys->dd_uncompressed_bytes += uncompressed; - dd->dd_phys->dd_compressed_bytes += compressed; + dsl_dir_phys(dd)->dd_uncompressed_bytes >= -uncompressed); + dsl_dir_phys(dd)->dd_used_bytes += used; + dsl_dir_phys(dd)->dd_uncompressed_bytes += uncompressed; + dsl_dir_phys(dd)->dd_compressed_bytes += compressed; - if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { + if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) { ASSERT(used > 0 || - dd->dd_phys->dd_used_breakdown[type] >= -used); - dd->dd_phys->dd_used_breakdown[type] += used; + dsl_dir_phys(dd)->dd_used_breakdown[type] >= -used); + dsl_dir_phys(dd)->dd_used_breakdown[type] += used; #ifdef DEBUG dd_used_t t; uint64_t u = 0; for (t = 0; t < DD_USED_NUM; t++) - u += dd->dd_phys->dd_used_breakdown[t]; - ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes); + u += dsl_dir_phys(dd)->dd_used_breakdown[t]; + ASSERT3U(u, ==, dsl_dir_phys(dd)->dd_used_bytes); #endif } if (needlock) @@ -1385,17 +1392,18 @@ ASSERT(oldtype < DD_USED_NUM); ASSERT(newtype < DD_USED_NUM); - if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN)) + if (delta == 0 || + !(dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN)) return; dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); ASSERT(delta > 0 ? - dd->dd_phys->dd_used_breakdown[oldtype] >= delta : - dd->dd_phys->dd_used_breakdown[newtype] >= -delta); - ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta)); - dd->dd_phys->dd_used_breakdown[oldtype] -= delta; - dd->dd_phys->dd_used_breakdown[newtype] += delta; + dsl_dir_phys(dd)->dd_used_breakdown[oldtype] >= delta : + dsl_dir_phys(dd)->dd_used_breakdown[newtype] >= -delta); + ASSERT(dsl_dir_phys(dd)->dd_used_bytes >= ABS(delta)); + dsl_dir_phys(dd)->dd_used_breakdown[oldtype] -= delta; + dsl_dir_phys(dd)->dd_used_breakdown[newtype] += delta; mutex_exit(&dd->dd_lock); } @@ -1439,8 +1447,8 @@ */ towrite = dsl_dir_space_towrite(ds->ds_dir); if ((dmu_tx_is_syncing(tx) || towrite == 0) && - (newval < ds->ds_dir->dd_phys->dd_reserved || - newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) { + (newval < dsl_dir_phys(ds->ds_dir)->dd_reserved || + newval < dsl_dir_phys(ds->ds_dir)->dd_used_bytes + towrite)) { error = SET_ERROR(ENOSPC); } mutex_exit(&ds->ds_dir->dd_lock); @@ -1473,7 +1481,7 @@ dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); mutex_enter(&ds->ds_dir->dd_lock); - ds->ds_dir->dd_phys->dd_quota = newval; + dsl_dir_phys(ds->ds_dir)->dd_quota = newval; mutex_exit(&ds->ds_dir->dd_lock); dsl_dataset_rele(ds, FTAG); } @@ -1524,7 +1532,7 @@ } mutex_enter(&dd->dd_lock); - used = dd->dd_phys->dd_used_bytes; + used = dsl_dir_phys(dd)->dd_used_bytes; mutex_exit(&dd->dd_lock); if (dd->dd_parent) { @@ -1534,13 +1542,13 @@ avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; } - if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) { + if (MAX(used, newval) > MAX(used, dsl_dir_phys(dd)->dd_reserved)) { uint64_t delta = MAX(used, newval) - - MAX(used, dd->dd_phys->dd_reserved); + MAX(used, dsl_dir_phys(dd)->dd_reserved); if (delta > avail || - (dd->dd_phys->dd_quota > 0 && - newval > dd->dd_phys->dd_quota)) + (dsl_dir_phys(dd)->dd_quota > 0 && + newval > dsl_dir_phys(dd)->dd_quota)) error = SET_ERROR(ENOSPC); } @@ -1557,9 +1565,9 @@ dmu_buf_will_dirty(dd->dd_dbuf, tx); mutex_enter(&dd->dd_lock); - used = dd->dd_phys->dd_used_bytes; - delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved); - dd->dd_phys->dd_reserved = value; + used = dsl_dir_phys(dd)->dd_used_bytes; + delta = MAX(used, value) - MAX(used, dsl_dir_phys(dd)->dd_reserved); + dsl_dir_phys(dd)->dd_reserved = value; if (dd->dd_parent != NULL) { /* Roll up this additional usage into our ancestors */ @@ -1637,7 +1645,7 @@ return (delta); mutex_enter(&dd->dd_lock); - delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta); + delta = parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, delta); mutex_exit(&dd->dd_lock); return (would_change(dd->dd_parent, delta, ancestor)); } @@ -1728,7 +1736,8 @@ if (newparent != dd->dd_parent) { /* is there enough space? */ uint64_t myspace = - MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved); + MAX(dsl_dir_phys(dd)->dd_used_bytes, + dsl_dir_phys(dd)->dd_reserved); objset_t *os = dd->dd_pool->dp_meta_objset; uint64_t fs_cnt = 0; uint64_t ss_cnt = 0; @@ -1833,17 +1842,18 @@ DD_FIELD_SNAPSHOT_COUNT, tx); dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, - -dd->dd_phys->dd_used_bytes, - -dd->dd_phys->dd_compressed_bytes, - -dd->dd_phys->dd_uncompressed_bytes, tx); + -dsl_dir_phys(dd)->dd_used_bytes, + -dsl_dir_phys(dd)->dd_compressed_bytes, + -dsl_dir_phys(dd)->dd_uncompressed_bytes, tx); dsl_dir_diduse_space(newparent, DD_USED_CHILD, - dd->dd_phys->dd_used_bytes, - dd->dd_phys->dd_compressed_bytes, - dd->dd_phys->dd_uncompressed_bytes, tx); + dsl_dir_phys(dd)->dd_used_bytes, + dsl_dir_phys(dd)->dd_compressed_bytes, + dsl_dir_phys(dd)->dd_uncompressed_bytes, tx); - if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) { - uint64_t unused_rsrv = dd->dd_phys->dd_reserved - - dd->dd_phys->dd_used_bytes; + if (dsl_dir_phys(dd)->dd_reserved > + dsl_dir_phys(dd)->dd_used_bytes) { + uint64_t unused_rsrv = dsl_dir_phys(dd)->dd_reserved - + dsl_dir_phys(dd)->dd_used_bytes; dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, -unused_rsrv, 0, 0, tx); @@ -1855,18 +1865,19 @@ dmu_buf_will_dirty(dd->dd_dbuf, tx); /* remove from old parent zapobj */ - error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, + error = zap_remove(mos, + dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj, dd->dd_myname, tx); ASSERT0(error); (void) strcpy(dd->dd_myname, mynewname); dsl_dir_rele(dd->dd_parent, dd); - dd->dd_phys->dd_parent_obj = newparent->dd_object; + dsl_dir_phys(dd)->dd_parent_obj = newparent->dd_object; VERIFY0(dsl_dir_hold_obj(dp, newparent->dd_object, NULL, dd, &dd->dd_parent)); /* add to new parent zapobj */ - VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj, + VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj, dd->dd_myname, 8, 1, &dd->dd_object, tx)); dsl_prop_notify_all(dd); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_pool.c --- a/usr/src/uts/common/fs/zfs/dsl_pool.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c Wed Dec 10 08:46:44 2014 -0800 @@ -135,7 +135,7 @@ int err; err = zap_lookup(dp->dp_meta_objset, - dp->dp_root_dir->dd_phys->dd_child_dir_zapobj, + dsl_dir_phys(dp->dp_root_dir)->dd_child_dir_zapobj, name, sizeof (obj), 1, &obj); if (err) return (err); @@ -217,11 +217,11 @@ err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); if (err) goto out; - err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj, - FTAG, &ds); + err = dsl_dataset_hold_obj(dp, + dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds); if (err == 0) { err = dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, dp, + dsl_dataset_phys(ds)->ds_prev_snap_obj, dp, &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } @@ -674,15 +674,15 @@ if (err) return (err); - while (ds->ds_phys->ds_prev_snap_obj != 0) { - err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, - FTAG, &prev); + while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { + err = dsl_dataset_hold_obj(dp, + dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); if (err) { dsl_dataset_rele(ds, FTAG); return (err); } - if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) + if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object) break; dsl_dataset_rele(ds, FTAG); ds = prev; @@ -696,7 +696,7 @@ * The $ORIGIN can't have any data, or the accounting * will be wrong. */ - ASSERT0(prev->ds_phys->ds_bp.blk_birth); + ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth); /* The origin doesn't get attached to itself */ if (ds->ds_object == prev->ds_object) { @@ -705,33 +705,35 @@ } dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_prev_snap_obj = prev->ds_object; - ds->ds_phys->ds_prev_snap_txg = prev->ds_phys->ds_creation_txg; + dsl_dataset_phys(ds)->ds_prev_snap_obj = prev->ds_object; + dsl_dataset_phys(ds)->ds_prev_snap_txg = + dsl_dataset_phys(prev)->ds_creation_txg; dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); - ds->ds_dir->dd_phys->dd_origin_obj = prev->ds_object; + dsl_dir_phys(ds->ds_dir)->dd_origin_obj = prev->ds_object; dmu_buf_will_dirty(prev->ds_dbuf, tx); - prev->ds_phys->ds_num_children++; + dsl_dataset_phys(prev)->ds_num_children++; - if (ds->ds_phys->ds_next_snap_obj == 0) { + if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) { ASSERT(ds->ds_prev == NULL); VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); + dsl_dataset_phys(ds)->ds_prev_snap_obj, + ds, &ds->ds_prev)); } } - ASSERT3U(ds->ds_dir->dd_phys->dd_origin_obj, ==, prev->ds_object); - ASSERT3U(ds->ds_phys->ds_prev_snap_obj, ==, prev->ds_object); + ASSERT3U(dsl_dir_phys(ds->ds_dir)->dd_origin_obj, ==, prev->ds_object); + ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_obj, ==, prev->ds_object); - if (prev->ds_phys->ds_next_clones_obj == 0) { + if (dsl_dataset_phys(prev)->ds_next_clones_obj == 0) { dmu_buf_will_dirty(prev->ds_dbuf, tx); - prev->ds_phys->ds_next_clones_obj = + dsl_dataset_phys(prev)->ds_next_clones_obj = zap_create(dp->dp_meta_objset, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(dp->dp_meta_objset, - prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx)); + dsl_dataset_phys(prev)->ds_next_clones_obj, ds->ds_object, tx)); dsl_dataset_rele(ds, FTAG); if (prev != dp->dp_origin_snap) @@ -756,20 +758,22 @@ dmu_tx_t *tx = arg; objset_t *mos = dp->dp_meta_objset; - if (ds->ds_dir->dd_phys->dd_origin_obj != 0) { + if (dsl_dir_phys(ds->ds_dir)->dd_origin_obj != 0) { dsl_dataset_t *origin; VERIFY0(dsl_dataset_hold_obj(dp, - ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin)); + dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &origin)); - if (origin->ds_dir->dd_phys->dd_clones == 0) { + if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); - origin->ds_dir->dd_phys->dd_clones = zap_create(mos, - DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); + dsl_dir_phys(origin->ds_dir)->dd_clones = + zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, + 0, tx); } VERIFY0(zap_add_int(dp->dp_meta_objset, - origin->ds_dir->dd_phys->dd_clones, ds->ds_object, tx)); + dsl_dir_phys(origin->ds_dir)->dd_clones, + ds->ds_object, tx)); dsl_dataset_rele(origin, FTAG); } @@ -816,7 +820,7 @@ NULL, 0, kcred, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); - VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, + VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, dp, &dp->dp_origin_snap)); dsl_dataset_rele(ds, FTAG); } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_prop.c --- a/usr/src/uts/common/fs/zfs/dsl_prop.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_prop.c Wed Dec 10 08:46:44 2014 -0800 @@ -105,8 +105,8 @@ } /* Check for a local value. */ - err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, propname, - intsz, numints, buf); + err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, + propname, intsz, numints, buf); if (err != ENOENT) { if (setpoint != NULL && err == 0) dsl_dir_name(dd, setpoint); @@ -117,14 +117,14 @@ * Skip the check for a received value if there is an explicit * inheritance entry. */ - err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, + err = zap_contains(mos, dsl_dir_phys(dd)->dd_props_zapobj, inheritstr); if (err != 0 && err != ENOENT) break; if (err == ENOENT) { /* Check for a received value. */ - err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, + err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj, recvdstr, intsz, numints, buf); if (err != ENOENT) { if (setpoint != NULL && err == 0) { @@ -169,7 +169,7 @@ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop)); snapshot = dsl_dataset_is_snapshot(ds); - zapobj = ds->ds_phys->ds_props_obj; + zapobj = dsl_dataset_phys(ds)->ds_props_obj; if (zapobj != 0) { objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; @@ -327,7 +327,7 @@ } mos = dd->dd_pool->dp_meta_objset; - zapobj = dd->dd_phys->dd_props_zapobj; + zapobj = dsl_dir_phys(dd)->dd_props_zapobj; recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); version = spa_version(dd->dd_pool->dp_spa); @@ -486,7 +486,8 @@ * If the prop is set here, then this change is not * being inherited here or below; stop the recursion. */ - err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname); + err = zap_contains(mos, dsl_dir_phys(dd)->dd_props_zapobj, + propname); if (err == 0) { dsl_dir_rele(dd, FTAG); return; @@ -497,7 +498,7 @@ mutex_enter(&dd->dd_lock); for (cbr = list_head(&dd->dd_prop_cbs); cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) { - uint64_t propobj = cbr->cbr_ds->ds_phys->ds_props_obj; + uint64_t propobj = dsl_dataset_phys(cbr->cbr_ds)->ds_props_obj; if (strcmp(cbr->cbr_propname, propname) != 0) continue; @@ -515,7 +516,7 @@ za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); for (zap_cursor_init(&zc, mos, - dd->dd_phys->dd_child_dir_zapobj); + dsl_dir_phys(dd)->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { dsl_prop_changed_notify(dp, za->za_first_integer, @@ -546,15 +547,15 @@ if (dsl_dataset_is_snapshot(ds)) { ASSERT(version >= SPA_VERSION_SNAP_PROPS); - if (ds->ds_phys->ds_props_obj == 0) { + if (dsl_dataset_phys(ds)->ds_props_obj == 0) { dmu_buf_will_dirty(ds->ds_dbuf, tx); - ds->ds_phys->ds_props_obj = + dsl_dataset_phys(ds)->ds_props_obj = zap_create(mos, DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); } - zapobj = ds->ds_phys->ds_props_obj; + zapobj = dsl_dataset_phys(ds)->ds_props_obj; } else { - zapobj = ds->ds_dir->dd_phys->dd_props_zapobj; + zapobj = dsl_dir_phys(ds->ds_dir)->dd_props_zapobj; } if (version < SPA_VERSION_RECVD_PROPS) { @@ -987,11 +988,11 @@ ASSERT(dsl_pool_config_held(dp)); - if (ds->ds_phys->ds_props_obj != 0) { + if (dsl_dataset_phys(ds)->ds_props_obj != 0) { ASSERT(flags & DSL_PROP_GET_SNAPSHOT); dsl_dataset_name(ds, setpoint); - err = dsl_prop_get_all_impl(mos, ds->ds_phys->ds_props_obj, - setpoint, flags, *nvp); + err = dsl_prop_get_all_impl(mos, + dsl_dataset_phys(ds)->ds_props_obj, setpoint, flags, *nvp); if (err) goto out; } @@ -1004,8 +1005,8 @@ flags |= DSL_PROP_GET_INHERITING; } dsl_dir_name(dd, setpoint); - err = dsl_prop_get_all_impl(mos, dd->dd_phys->dd_props_zapobj, - setpoint, flags, *nvp); + err = dsl_prop_get_all_impl(mos, + dsl_dir_phys(dd)->dd_props_zapobj, setpoint, flags, *nvp); if (err) break; } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_scan.c --- a/usr/src/uts/common/fs/zfs/dsl_scan.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c Wed Dec 10 08:46:44 2014 -0800 @@ -376,7 +376,7 @@ { uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg; if (dsl_dataset_is_snapshot(ds)) - return (MIN(smt, ds->ds_phys->ds_creation_txg)); + return (MIN(smt, dsl_dataset_phys(ds)->ds_creation_txg)); return (smt); } @@ -546,7 +546,7 @@ uint64_t objset, uint64_t object, uint64_t blkid) { zbookmark_phys_t czb; - uint32_t flags = ARC_NOWAIT | ARC_PREFETCH; + arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; if (zfs_no_scrub_prefetch) return; @@ -611,7 +611,7 @@ int err; if (BP_GET_LEVEL(bp) > 0) { - uint32_t flags = ARC_WAIT; + arc_flags_t flags = ARC_FLAG_WAIT; int i; blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; @@ -638,7 +638,7 @@ } (void) arc_buf_remove_ref(buf, &buf); } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { - uint32_t flags = ARC_WAIT; + arc_flags_t flags = ARC_FLAG_WAIT; dnode_phys_t *cdnp; int i, j; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; @@ -664,7 +664,7 @@ (void) arc_buf_remove_ref(buf, &buf); } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { - uint32_t flags = ARC_WAIT; + arc_flags_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; arc_buf_t *buf; @@ -814,11 +814,12 @@ if (dsl_dataset_is_snapshot(ds)) { /* Note, scn_cur_{min,max}_txg stays the same. */ scn->scn_phys.scn_bookmark.zb_objset = - ds->ds_phys->ds_next_snap_obj; + dsl_dataset_phys(ds)->ds_next_snap_obj; zfs_dbgmsg("destroying ds %llu; currently traversing; " "reset zb_objset to %llu", (u_longlong_t)ds->ds_object, - (u_longlong_t)ds->ds_phys->ds_next_snap_obj); + (u_longlong_t)dsl_dataset_phys(ds)-> + ds_next_snap_obj); scn->scn_phys.scn_flags |= DSF_VISIT_DS_AGAIN; } else { SET_BOOKMARK(&scn->scn_phys.scn_bookmark, @@ -829,7 +830,7 @@ } } else if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) { - ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); + ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, tx)); if (dsl_dataset_is_snapshot(ds)) { @@ -840,11 +841,13 @@ */ VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds->ds_phys->ds_next_snap_obj, mintxg, tx) == 0); + dsl_dataset_phys(ds)->ds_next_snap_obj, + mintxg, tx) == 0); zfs_dbgmsg("destroying ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds->ds_object, - (u_longlong_t)ds->ds_phys->ds_next_snap_obj); + (u_longlong_t)dsl_dataset_phys(ds)-> + ds_next_snap_obj); } else { zfs_dbgmsg("destroying ds %llu; in queue; removing", (u_longlong_t)ds->ds_object); @@ -871,26 +874,26 @@ if (scn->scn_phys.scn_state != DSS_SCANNING) return; - ASSERT(ds->ds_phys->ds_prev_snap_obj != 0); + ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) { scn->scn_phys.scn_bookmark.zb_objset = - ds->ds_phys->ds_prev_snap_obj; + dsl_dataset_phys(ds)->ds_prev_snap_obj; zfs_dbgmsg("snapshotting ds %llu; currently traversing; " "reset zb_objset to %llu", (u_longlong_t)ds->ds_object, - (u_longlong_t)ds->ds_phys->ds_prev_snap_obj); + (u_longlong_t)dsl_dataset_phys(ds)->ds_prev_snap_obj); } else if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) { VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, tx)); VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds->ds_phys->ds_prev_snap_obj, mintxg, tx) == 0); + dsl_dataset_phys(ds)->ds_prev_snap_obj, mintxg, tx) == 0); zfs_dbgmsg("snapshotting ds %llu; in queue; " "replacing with %llu", (u_longlong_t)ds->ds_object, - (u_longlong_t)ds->ds_phys->ds_prev_snap_obj); + (u_longlong_t)dsl_dataset_phys(ds)->ds_prev_snap_obj); } dsl_scan_sync_state(scn, tx); } @@ -923,8 +926,8 @@ ds1->ds_object, &mintxg) == 0) { int err; - ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg); + ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds1->ds_object, tx)); err = zap_add_int_key(dp->dp_meta_objset, @@ -942,8 +945,8 @@ (u_longlong_t)ds2->ds_object); } else if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg) == 0) { - ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg); - ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg); + ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); + ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds2->ds_object, tx)); VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, @@ -971,17 +974,17 @@ int err; dsl_scan_t *scn = dp->dp_scan; - if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj) + if (dsl_dir_phys(hds->ds_dir)->dd_origin_obj != eca->originobj) return (0); err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); - while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) { + while (dsl_dataset_phys(ds)->ds_prev_snap_obj != eca->originobj) { dsl_dataset_t *prev; err = dsl_dataset_hold_obj(dp, - ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); + dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); dsl_dataset_rele(ds, FTAG); if (err) @@ -990,7 +993,7 @@ } VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, ds->ds_object, - ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0); + dsl_dataset_phys(ds)->ds_prev_snap_txg, eca->tx) == 0); dsl_dataset_rele(ds, FTAG); return (0); } @@ -1021,7 +1024,7 @@ * Iterate over the bps in this ds. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); - dsl_scan_visit_rootbp(scn, ds, &ds->ds_phys->ds_bp, tx); + dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx); char *dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP); dsl_dataset_name(ds, dsname); @@ -1055,14 +1058,15 @@ /* * Add descendent datasets to work queue. */ - if (ds->ds_phys->ds_next_snap_obj != 0) { + if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) { VERIFY(zap_add_int_key(dp->dp_meta_objset, - scn->scn_phys.scn_queue_obj, ds->ds_phys->ds_next_snap_obj, - ds->ds_phys->ds_creation_txg, tx) == 0); + scn->scn_phys.scn_queue_obj, + dsl_dataset_phys(ds)->ds_next_snap_obj, + dsl_dataset_phys(ds)->ds_creation_txg, tx) == 0); } - if (ds->ds_phys->ds_num_children > 1) { + if (dsl_dataset_phys(ds)->ds_num_children > 1) { boolean_t usenext = B_FALSE; - if (ds->ds_phys->ds_next_clones_obj != 0) { + if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { uint64_t count; /* * A bug in a previous version of the code could @@ -1072,17 +1076,17 @@ * next_clones_obj when its count is correct. */ int err = zap_count(dp->dp_meta_objset, - ds->ds_phys->ds_next_clones_obj, &count); + dsl_dataset_phys(ds)->ds_next_clones_obj, &count); if (err == 0 && - count == ds->ds_phys->ds_num_children - 1) + count == dsl_dataset_phys(ds)->ds_num_children - 1) usenext = B_TRUE; } if (usenext) { VERIFY0(zap_join_key(dp->dp_meta_objset, - ds->ds_phys->ds_next_clones_obj, + dsl_dataset_phys(ds)->ds_next_clones_obj, scn->scn_phys.scn_queue_obj, - ds->ds_phys->ds_creation_txg, tx)); + dsl_dataset_phys(ds)->ds_creation_txg, tx)); } else { struct enqueue_clones_arg eca; eca.tx = tx; @@ -1110,10 +1114,10 @@ if (err) return (err); - while (ds->ds_phys->ds_prev_snap_obj != 0) { + while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { dsl_dataset_t *prev; - err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, - FTAG, &prev); + err = dsl_dataset_hold_obj(dp, + dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); if (err) { dsl_dataset_rele(ds, FTAG); return (err); @@ -1122,7 +1126,7 @@ /* * If this is a clone, we don't need to worry about it for now. */ - if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { + if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object) { dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(prev, FTAG); return (0); @@ -1132,7 +1136,7 @@ } VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, - ds->ds_object, ds->ds_phys->ds_prev_snap_txg, tx) == 0); + ds->ds_object, dsl_dataset_phys(ds)->ds_prev_snap_txg, tx) == 0); dsl_dataset_rele(ds, FTAG); return (0); } @@ -1307,7 +1311,7 @@ } else { scn->scn_phys.scn_cur_min_txg = MAX(scn->scn_phys.scn_min_txg, - ds->ds_phys->ds_prev_snap_txg); + dsl_dataset_phys(ds)->ds_prev_snap_txg); } scn->scn_phys.scn_cur_max_txg = dsl_scan_ds_maxtxg(ds); dsl_dataset_rele(ds, FTAG); @@ -1449,13 +1453,6 @@ "traverse_dataset_destroyed()", err); } - /* - * If we didn't make progress, mark the async destroy as - * stalled, so that we will not initiate a spa_sync() on - * its behalf. - */ - scn->scn_async_stalled = (scn->scn_visited_this_txg == 0); - if (bptree_is_empty(dp->dp_meta_objset, dp->dp_bptree_obj)) { /* finished; deactivate async destroy feature */ spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, tx); @@ -1468,6 +1465,18 @@ dp->dp_bptree_obj, tx)); dp->dp_bptree_obj = 0; scn->scn_async_destroying = B_FALSE; + scn->scn_async_stalled = B_FALSE; + } else { + /* + * If we didn't make progress, mark the async + * destroy as stalled, so that we will not initiate + * a spa_sync() on its behalf. Note that we only + * check this if we are not finished, because if the + * bptree had no blocks for us to visit, we can + * finish without "making progress". + */ + scn->scn_async_stalled = + (scn->scn_visited_this_txg == 0); } } if (scn->scn_visited_this_txg) { @@ -1489,9 +1498,9 @@ if (err != 0) return; if (!scn->scn_async_destroying && zfs_free_leak_on_eio && - (dp->dp_free_dir->dd_phys->dd_used_bytes != 0 || - dp->dp_free_dir->dd_phys->dd_compressed_bytes != 0 || - dp->dp_free_dir->dd_phys->dd_uncompressed_bytes != 0)) { + (dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes != 0 || + dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes != 0 || + dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes != 0)) { /* * We have finished background destroying, but there is still * some space left in the dp_free_dir. Transfer this leaked @@ -1506,19 +1515,19 @@ rrw_exit(&dp->dp_config_rwlock, FTAG); } dsl_dir_diduse_space(dp->dp_leak_dir, DD_USED_HEAD, - dp->dp_free_dir->dd_phys->dd_used_bytes, - dp->dp_free_dir->dd_phys->dd_compressed_bytes, - dp->dp_free_dir->dd_phys->dd_uncompressed_bytes, tx); + dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes, + dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes, + dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx); dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, - -dp->dp_free_dir->dd_phys->dd_used_bytes, - -dp->dp_free_dir->dd_phys->dd_compressed_bytes, - -dp->dp_free_dir->dd_phys->dd_uncompressed_bytes, tx); + -dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes, + -dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes, + -dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx); } if (!scn->scn_async_destroying) { /* finished; verify that space accounting went to zero */ - ASSERT0(dp->dp_free_dir->dd_phys->dd_used_bytes); - ASSERT0(dp->dp_free_dir->dd_phys->dd_compressed_bytes); - ASSERT0(dp->dp_free_dir->dd_phys->dd_uncompressed_bytes); + ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes); + ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes); + ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes); } if (scn->scn_phys.scn_state != DSS_SCANNING) diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_synctask.c --- a/usr/src/uts/common/fs/zfs/dsl_synctask.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_synctask.c Wed Dec 10 08:46:44 2014 -0800 @@ -163,7 +163,7 @@ uint64_t quota = dsl_pool_adjustedsize(dp, dst->dst_space_check == ZFS_SPACE_CHECK_RESERVED) - metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); - uint64_t used = dp->dp_root_dir->dd_phys->dd_used_bytes; + uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes; /* MOS space is triple-dittoed, so we multiply by 3. */ if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) { dst->dst_error = SET_ERROR(ENOSPC); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/dsl_userhold.c --- a/usr/src/uts/common/fs/zfs/dsl_userhold.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/dsl_userhold.c Wed Dec 10 08:46:44 2014 -0800 @@ -64,10 +64,10 @@ return (SET_ERROR(E2BIG)); /* tags must be unique (if ds already exists) */ - if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) { + if (ds != NULL && dsl_dataset_phys(ds)->ds_userrefs_obj != 0) { uint64_t value; - error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, + error = zap_lookup(mos, dsl_dataset_phys(ds)->ds_userrefs_obj, htag, 8, 1, &value); if (error == 0) error = SET_ERROR(EEXIST); @@ -140,16 +140,16 @@ ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); - if (ds->ds_phys->ds_userrefs_obj == 0) { + if (dsl_dataset_phys(ds)->ds_userrefs_obj == 0) { /* * This is the first user hold for this dataset. Create * the userrefs zap object. */ dmu_buf_will_dirty(ds->ds_dbuf, tx); - zapobj = ds->ds_phys->ds_userrefs_obj = + zapobj = dsl_dataset_phys(ds)->ds_userrefs_obj = zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); } else { - zapobj = ds->ds_phys->ds_userrefs_obj; + zapobj = dsl_dataset_phys(ds)->ds_userrefs_obj; } ds->ds_userrefs++; @@ -360,7 +360,7 @@ numholds = 0; mos = ds->ds_dir->dd_pool->dp_meta_objset; - zapobj = ds->ds_phys->ds_userrefs_obj; + zapobj = dsl_dataset_phys(ds)->ds_userrefs_obj; holds_found = fnvlist_alloc(); for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL; @@ -398,7 +398,8 @@ numholds++; } - if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 && + if (DS_IS_DEFER_DESTROY(ds) && + dsl_dataset_phys(ds)->ds_num_children == 1 && ds->ds_userrefs == numholds) { /* we need to destroy the snapshot as well */ if (dsl_dataset_long_held(ds)) { @@ -484,8 +485,8 @@ error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx); VERIFY(error == 0 || error == ENOENT); - VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname, - tx)); + VERIFY0(zap_remove(mos, dsl_dataset_phys(ds)->ds_userrefs_obj, + holdname, tx)); ds->ds_userrefs--; spa_history_log_internal_ds(ds, "release", tx, @@ -514,7 +515,7 @@ fnvpair_value_nvlist(pair), tx); if (nvlist_exists(ddura->ddura_todelete, name)) { ASSERT(ds->ds_userrefs == 0 && - ds->ds_phys->ds_num_children == 1 && + dsl_dataset_phys(ds)->ds_num_children == 1 && DS_IS_DEFER_DESTROY(ds)); dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx); } @@ -644,13 +645,13 @@ return (err); } - if (ds->ds_phys->ds_userrefs_obj != 0) { + if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0) { zap_attribute_t *za; zap_cursor_t zc; za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, - ds->ds_phys->ds_userrefs_obj); + dsl_dataset_phys(ds)->ds_userrefs_obj); zap_cursor_retrieve(&zc, za) == 0; zap_cursor_advance(&zc)) { fnvlist_add_uint64(nvl, za->za_name, diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sa.c --- a/usr/src/uts/common/fs/zfs/sa.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sa.c Wed Dec 10 08:46:44 2014 -0800 @@ -1345,7 +1345,7 @@ { mutex_enter(&hdl->sa_lock); (void) dmu_buf_update_user((dmu_buf_t *)hdl->sa_bonus, hdl, - NULL, NULL, NULL); + NULL, NULL); if (hdl->sa_bonus_tab) { sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); @@ -1392,8 +1392,7 @@ error = sa_build_index(handle, SA_BONUS); newhandle = (hdl_type == SA_HDL_SHARED) ? - dmu_buf_set_user_ie(db, handle, - NULL, sa_evict) : NULL; + dmu_buf_set_user_ie(db, handle, sa_evict) : NULL; if (newhandle != NULL) { kmem_cache_free(sa_cache, handle); @@ -1911,7 +1910,7 @@ sa_update_user(sa_handle_t *newhdl, sa_handle_t *oldhdl) { (void) dmu_buf_update_user((dmu_buf_t *)newhdl->sa_bonus, - oldhdl, newhdl, NULL, sa_evict); + oldhdl, newhdl, sa_evict); oldhdl->sa_bonus = NULL; } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/spa.c --- a/usr/src/uts/common/fs/zfs/spa.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/spa.c Wed Dec 10 08:46:44 2014 -0800 @@ -241,7 +241,8 @@ */ if (pool->dp_free_dir != NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, - pool->dp_free_dir->dd_phys->dd_used_bytes, src); + dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes, + src); } else { spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, 0, src); @@ -249,7 +250,8 @@ if (pool->dp_leak_dir != NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, - pool->dp_leak_dir->dd_phys->dd_used_bytes, src); + dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes, + src); } else { spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, 0, src); @@ -6262,21 +6264,6 @@ } /* - * If anything has changed in this txg, or if someone is waiting - * for this txg to sync (eg, spa_vdev_remove()), push the - * deferred frees from the previous txg. If not, leave them - * alone so that we don't generate work on an otherwise idle - * system. - */ - if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || - !txg_list_empty(&dp->dp_dirty_dirs, txg) || - !txg_list_empty(&dp->dp_sync_tasks, txg) || - ((dsl_scan_active(dp->dp_scan) || - txg_sync_waiting(dp)) && !spa_shutting_down(spa))) { - spa_sync_deferred_frees(spa, tx); - } - - /* * Iterate to convergence. */ do { @@ -6293,6 +6280,11 @@ if (pass < zfs_sync_pass_deferred_free) { spa_sync_frees(spa, free_bpl, tx); } else { + /* + * We can not defer frees in pass 1, because + * we sync the deferred frees later in pass 1. + */ + ASSERT3U(pass, >, 1); bplist_iterate(free_bpl, bpobj_enqueue_cb, &spa->spa_deferred_bpobj, tx); } @@ -6303,8 +6295,37 @@ while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) vdev_sync(vd, txg); - if (pass == 1) + if (pass == 1) { spa_sync_upgrades(spa, tx); + ASSERT3U(txg, >=, + spa->spa_uberblock.ub_rootbp.blk_birth); + /* + * Note: We need to check if the MOS is dirty + * because we could have marked the MOS dirty + * without updating the uberblock (e.g. if we + * have sync tasks but no dirty user data). We + * need to check the uberblock's rootbp because + * it is updated if we have synced out dirty + * data (though in this case the MOS will most + * likely also be dirty due to second order + * effects, we don't want to rely on that here). + */ + if (spa->spa_uberblock.ub_rootbp.blk_birth < txg && + !dmu_objset_is_dirty(mos, txg)) { + /* + * Nothing changed on the first pass, + * therefore this TXG is a no-op. Avoid + * syncing deferred frees, so that we + * can keep this TXG as a no-op. + */ + ASSERT(txg_list_empty(&dp->dp_dirty_datasets, + txg)); + ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); + ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg)); + break; + } + spa_sync_deferred_frees(spa, tx); + } } while (dmu_objset_is_dirty(mos, txg)); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/spa_history.c --- a/usr/src/uts/common/fs/zfs/spa_history.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/spa_history.c Wed Dec 10 08:46:44 2014 -0800 @@ -520,7 +520,7 @@ dsl_dir_name(dd, namebuf); fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, - dd->dd_phys->dd_head_dataset_obj); + dsl_dir_phys(dd)->dd_head_dataset_obj); va_start(adx, fmt); log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sys/arc.h --- a/usr/src/uts/common/fs/zfs/sys/arc.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/arc.h Wed Dec 10 08:46:44 2014 -0800 @@ -46,6 +46,36 @@ arc_done_func_t arc_bcopy_func; arc_done_func_t arc_getbuf_func; +typedef enum arc_flags +{ + /* + * Public flags that can be passed into the ARC by external consumers. + */ + ARC_FLAG_NONE = 1 << 0, /* No flags set */ + ARC_FLAG_WAIT = 1 << 1, /* perform sync I/O */ + ARC_FLAG_NOWAIT = 1 << 2, /* perform async I/O */ + ARC_FLAG_PREFETCH = 1 << 3, /* I/O is a prefetch */ + ARC_FLAG_CACHED = 1 << 4, /* I/O was in cache */ + ARC_FLAG_L2CACHE = 1 << 5, /* cache in L2ARC */ + ARC_FLAG_L2COMPRESS = 1 << 6, /* compress in L2ARC */ + + /* + * Private ARC flags. These flags are private ARC only flags that + * will show up in b_flags in the arc_hdr_buf_t. These flags should + * only be set by ARC code. + */ + ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */ + ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */ + ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */ + ARC_FLAG_FREED_IN_READ = 1 << 10, /* freed during read */ + ARC_FLAG_BUF_AVAILABLE = 1 << 11, /* block not in use */ + ARC_FLAG_INDIRECT = 1 << 12, /* indirect block */ + ARC_FLAG_FREE_IN_PROGRESS = 1 << 13, /* about to be freed */ + ARC_FLAG_L2_WRITING = 1 << 14, /* write in progress */ + ARC_FLAG_L2_EVICTED = 1 << 15, /* evicted during I/O */ + ARC_FLAG_L2_WRITE_HEAD = 1 << 16, /* head of write list */ +} arc_flags_t; + struct arc_buf { arc_buf_hdr_t *b_hdr; arc_buf_t *b_next; @@ -60,15 +90,6 @@ ARC_BUFC_METADATA, /* buffer contains metadata */ ARC_BUFC_NUMTYPES } arc_buf_contents_t; -/* - * These are the flags we pass into calls to the arc - */ -#define ARC_WAIT (1 << 1) /* perform I/O synchronously */ -#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */ -#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */ -#define ARC_CACHED (1 << 4) /* I/O was already in cache */ -#define ARC_L2CACHE (1 << 5) /* cache in L2ARC */ -#define ARC_L2COMPRESS (1 << 6) /* compress in L2ARC */ /* * The following breakdows of arc_size exist for kstat only. @@ -102,7 +123,7 @@ int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, void *private, zio_priority_t priority, int flags, - uint32_t *arc_flags, const zbookmark_phys_t *zb); + arc_flags_t *arc_flags, const zbookmark_phys_t *zb); zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress, const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone, diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sys/dbuf.h --- a/usr/src/uts/common/fs/zfs/sys/dbuf.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h Wed Dec 10 08:46:44 2014 -0800 @@ -228,7 +228,6 @@ /* stuff we store for the user (see dmu_buf_set_user) */ void *db_user_ptr; - void **db_user_data_ptr_ptr; dmu_buf_evict_func_t *db_evict_func; uint8_t db_immediate_evict; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sys/dmu.h --- a/usr/src/uts/common/fs/zfs/sys/dmu.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h Wed Dec 10 08:46:44 2014 -0800 @@ -484,12 +484,6 @@ * * user_ptr is for use by the user and can be obtained via dmu_buf_get_user(). * - * user_data_ptr_ptr should be NULL, or a pointer to a pointer which - * will be set to db->db_data when you are allowed to access it. Note - * that db->db_data (the pointer) can change when you do dmu_buf_read(), - * dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill(). - * *user_data_ptr_ptr will be set to the new value when it changes. - * * If non-NULL, pageout func will be called when this buffer is being * excised from the cache, so that you can clean up the data structure * pointed to by user_ptr. @@ -497,17 +491,16 @@ * dmu_evict_user() will call the pageout func for all buffers in a * objset with a given pageout func. */ -void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr, +void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, dmu_buf_evict_func_t *pageout_func); /* * set_user_ie is the same as set_user, but request immediate eviction * when hold count goes to zero. */ void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr, - void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func); + dmu_buf_evict_func_t *pageout_func); void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, - void *user_ptr, void *user_data_ptr_ptr, - dmu_buf_evict_func_t *pageout_func); + void *user_ptr, dmu_buf_evict_func_t *pageout_func); void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func); /* diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sys/dsl_dataset.h --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Wed Dec 10 08:46:44 2014 -0800 @@ -48,7 +48,7 @@ #define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_IS_INCONSISTENT(ds) \ - ((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) + (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT) /* * Do not allow this dataset to be promoted. @@ -68,7 +68,7 @@ */ #define DS_FLAG_DEFER_DESTROY (1ULL<<3) #define DS_IS_DEFER_DESTROY(ds) \ - ((ds)->ds_phys->ds_flags & DS_FLAG_DEFER_DESTROY) + (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_DEFER_DESTROY) /* * DS_FIELD_* are strings that are used in the "extensified" dataset zap object. @@ -134,7 +134,6 @@ typedef struct dsl_dataset { /* Immutable: */ struct dsl_dir *ds_dir; - dsl_dataset_phys_t *ds_phys; dmu_buf_t *ds_dbuf; uint64_t ds_object; uint64_t ds_fsid_guid; @@ -186,17 +185,26 @@ char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; +inline dsl_dataset_phys_t * +dsl_dataset_phys(dsl_dataset_t *ds) +{ + return (ds->ds_dbuf->db_data); +} + /* * The max length of a temporary tag prefix is the number of hex digits * required to express UINT64_MAX plus one for the hyphen. */ #define MAX_TAG_PREFIX_LEN 17 -#define dsl_dataset_is_snapshot(ds) \ - ((ds)->ds_phys->ds_num_children != 0) +inline boolean_t +dsl_dataset_is_snapshot(dsl_dataset_t *ds) +{ + return (dsl_dataset_phys(ds)->ds_num_children != 0); +} #define DS_UNIQUE_IS_ACCURATE(ds) \ - (((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) + ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, dsl_dataset_t **dsp); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sys/dsl_dir.h --- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h Wed Dec 10 08:46:44 2014 -0800 @@ -86,9 +86,10 @@ struct dsl_dir { /* These are immutable; no lock needed: */ uint64_t dd_object; - dsl_dir_phys_t *dd_phys; + dsl_pool_t *dd_pool; + + /* Stable until user eviction; no lock needed: */ dmu_buf_t *dd_dbuf; - dsl_pool_t *dd_pool; /* protected by lock on pool's dp_dirty_dirs list */ txg_node_t dd_dirty_link; @@ -111,6 +112,12 @@ char dd_myname[MAXNAMELEN]; }; +inline dsl_dir_phys_t * +dsl_dir_phys(dsl_dir_t *dd) +{ + return (dd->dd_dbuf->db_data); +} + void dsl_dir_rele(dsl_dir_t *dd, void *tag); int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, dsl_dir_t **, const char **tail); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sys/uberblock.h --- a/usr/src/uts/common/fs/zfs/sys/uberblock.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/uberblock.h Wed Dec 10 08:46:44 2014 -0800 @@ -22,6 +22,9 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2014 by Delphix. All rights reserved. + */ #ifndef _SYS_UBERBLOCK_H #define _SYS_UBERBLOCK_H @@ -36,8 +39,8 @@ typedef struct uberblock uberblock_t; -extern int uberblock_verify(uberblock_t *ub); -extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg); +extern int uberblock_verify(uberblock_t *); +extern boolean_t uberblock_update(uberblock_t *, vdev_t *, uint64_t); #ifdef __cplusplus } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sys/zap_impl.h --- a/usr/src/uts/common/fs/zfs/sys/zap_impl.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/zap_impl.h Wed Dec 10 08:46:44 2014 -0800 @@ -70,7 +70,7 @@ } mzap_ent_t; #define MZE_PHYS(zap, mze) \ - (&(zap)->zap_m.zap_phys->mz_chunk[(mze)->mze_chunkid]) + (&zap_m_phys(zap)->mz_chunk[(mze)->mze_chunkid]) /* * The (fat) zap is stored in one object. It is an array of @@ -104,7 +104,7 @@ * word number (1<zap_f.zap_phys) \ + ((uint64_t *)zap_f_phys(zap)) \ [(idx) + (1<zap_dbuf->db_data); +} + +inline mzap_phys_t * +zap_m_phys(zap_t *zap) +{ + return (zap->zap_dbuf->db_data); +} + typedef struct zap_name { zap_t *zn_zap; int zn_key_intlen; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/sys/zap_leaf.h --- a/usr/src/uts/common/fs/zfs/sys/zap_leaf.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/zap_leaf.h Wed Dec 10 08:46:44 2014 -0800 @@ -83,7 +83,7 @@ */ #define ZAP_LEAF_CHUNK(l, idx) \ ((zap_leaf_chunk_t *) \ - ((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx] + (zap_leaf_phys(l)->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx] #define ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry) typedef enum zap_chunk_type { @@ -156,9 +156,13 @@ uint64_t l_blkid; /* 1<l_dbuf->db_data); +} typedef struct zap_entry_handle { /* Set by zap_leaf and public to ZAP */ diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/uberblock.c --- a/usr/src/uts/common/fs/zfs/uberblock.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/uberblock.c Wed Dec 10 08:46:44 2014 -0800 @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. */ #include @@ -40,10 +40,10 @@ } /* - * Update the uberblock and return a boolean value indicating whether - * anything changed in this transaction group. + * Update the uberblock and return TRUE if anything changed in this + * transaction group. */ -int +boolean_t uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg) { ASSERT(ub->ub_txg < txg); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/zap.c --- a/usr/src/uts/common/fs/zfs/zap.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/zap.c Wed Dec 10 08:46:44 2014 -0800 @@ -50,10 +50,11 @@ int fzap_default_block_shift = 14; /* 16k blocksize */ +extern inline zap_phys_t *zap_f_phys(zap_t *zap); + static void zap_leaf_pageout(dmu_buf_t *db, void *vl); static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks); - void fzap_byteswap(void *vbuf, size_t size) { @@ -80,13 +81,12 @@ ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); zap->zap_ismicro = FALSE; - (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, - &zap->zap_f.zap_phys, zap_evict); + (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, zap_evict); mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1; - zp = zap->zap_f.zap_phys; + zp = zap_f_phys(zap); /* * explicitly zero it since it might be coming from an * initialized microzap @@ -117,7 +117,6 @@ l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); l->l_dbuf = db; - l->l_phys = db->db_data; zap_leaf_init(l, zp->zap_normflags != 0); @@ -325,10 +324,10 @@ * If we are within 2 bits of running out, stop growing, since * this is already an aberrant condition. */ - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2) + if (zap_f_phys(zap)->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2) return (SET_ERROR(ENOSPC)); - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { + if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) { /* * We are outgrowing the "embedded" ptrtbl (the one * stored in the header block). Give it its own entire @@ -338,9 +337,9 @@ dmu_buf_t *db_new; int err; - ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, + ASSERT3U(zap_f_phys(zap)->zap_ptrtbl.zt_shift, ==, ZAP_EMBEDDED_PTRTBL_SHIFT(zap)); - ASSERT0(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk); + ASSERT0(zap_f_phys(zap)->zap_ptrtbl.zt_blk); newblk = zap_allocate_blocks(zap, 1); err = dmu_buf_hold(zap->zap_objset, zap->zap_object, @@ -353,17 +352,17 @@ db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap)); dmu_buf_rele(db_new, FTAG); - zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk; - zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1; - zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++; + zap_f_phys(zap)->zap_ptrtbl.zt_blk = newblk; + zap_f_phys(zap)->zap_ptrtbl.zt_numblks = 1; + zap_f_phys(zap)->zap_ptrtbl.zt_shift++; - ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, - zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << + ASSERT3U(1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift, ==, + zap_f_phys(zap)->zap_ptrtbl.zt_numblks << (FZAP_BLOCK_SHIFT(zap)-3)); return (0); } else { - return (zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl, + return (zap_table_grow(zap, &zap_f_phys(zap)->zap_ptrtbl, zap_ptrtbl_transfer, tx)); } } @@ -373,8 +372,8 @@ { dmu_buf_will_dirty(zap->zap_dbuf, tx); mutex_enter(&zap->zap_f.zap_num_entries_mtx); - ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta); - zap->zap_f.zap_phys->zap_num_entries += delta; + ASSERT(delta > 0 || zap_f_phys(zap)->zap_num_entries >= -delta); + zap_f_phys(zap)->zap_num_entries += delta; mutex_exit(&zap->zap_f.zap_num_entries_mtx); } @@ -383,8 +382,8 @@ { uint64_t newblk; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - newblk = zap->zap_f.zap_phys->zap_freeblk; - zap->zap_f.zap_phys->zap_freeblk += nblocks; + newblk = zap_f_phys(zap)->zap_freeblk; + zap_f_phys(zap)->zap_freeblk += nblocks; return (newblk); } @@ -400,18 +399,17 @@ rw_enter(&l->l_rwlock, RW_WRITER); l->l_blkid = zap_allocate_blocks(zap, 1); l->l_dbuf = NULL; - l->l_phys = NULL; VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf, DMU_READ_NO_PREFETCH)); - winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout); + winner = dmu_buf_set_user(l->l_dbuf, l, zap_leaf_pageout); ASSERT(winner == NULL); dmu_buf_will_dirty(l->l_dbuf, tx); zap_leaf_init(l, zap->zap_normflags != 0); - zap->zap_f.zap_phys->zap_num_leafs++; + zap_f_phys(zap)->zap_num_leafs++; return (l); } @@ -421,7 +419,7 @@ { ASSERT(!zap->zap_ismicro); mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */ - *count = zap->zap_f.zap_phys->zap_num_entries; + *count = zap_f_phys(zap)->zap_num_entries; mutex_exit(&zap->zap_f.zap_num_entries_mtx); return (0); } @@ -460,9 +458,8 @@ l->l_blkid = blkid; l->l_bs = highbit64(db->db_size) - 1; l->l_dbuf = db; - l->l_phys = NULL; - winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout); + winner = dmu_buf_set_user(db, l, zap_leaf_pageout); rw_exit(&l->l_rwlock); if (winner != NULL) { @@ -476,7 +473,7 @@ * chain. There should be no chained leafs (as we have removed * support for them). */ - ASSERT0(l->l_phys->l_hdr.lh_pad1); + ASSERT0(zap_leaf_phys(l)->l_hdr.lh_pad1); /* * There should be more hash entries than there can be @@ -486,11 +483,11 @@ /* The chunks should begin at the end of the hash table */ ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==, - &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]); + &zap_leaf_phys(l)->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]); /* The chunks should end at the end of the block */ ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) - - (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size); + (uintptr_t)zap_leaf_phys(l), ==, l->l_dbuf->db_size); return (l); } @@ -523,16 +520,15 @@ rw_enter(&l->l_rwlock, lt); /* - * Must lock before dirtying, otherwise l->l_phys could change, + * Must lock before dirtying, otherwise zap_leaf_phys(l) could change, * causing ASSERT below to fail. */ if (lt == RW_WRITER) dmu_buf_will_dirty(db, tx); ASSERT3U(l->l_blkid, ==, blkid); ASSERT3P(l->l_dbuf, ==, db); - ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data); - ASSERT3U(l->l_phys->l_hdr.lh_block_type, ==, ZBT_LEAF); - ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); + ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_block_type, ==, ZBT_LEAF); + ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); *lp = l; return (0); @@ -543,13 +539,13 @@ { ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { + if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) { ASSERT3U(idx, <, - (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift)); + (1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift)); *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx); return (0); } else { - return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl, + return (zap_table_load(zap, &zap_f_phys(zap)->zap_ptrtbl, idx, valp)); } } @@ -560,11 +556,11 @@ ASSERT(tx != NULL); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) { + if (zap_f_phys(zap)->zap_ptrtbl.zt_blk == 0) { ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk; return (0); } else { - return (zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl, + return (zap_table_store(zap, &zap_f_phys(zap)->zap_ptrtbl, idx, blk, tx)); } } @@ -576,16 +572,17 @@ int err; ASSERT(zap->zap_dbuf == NULL || - zap->zap_f.zap_phys == zap->zap_dbuf->db_data); - ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC); - idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + zap_f_phys(zap) == zap->zap_dbuf->db_data); + ASSERT3U(zap_f_phys(zap)->zap_magic, ==, ZAP_MAGIC); + idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift); err = zap_idx_to_blk(zap, idx, &blk); if (err != 0) return (err); err = zap_get_leaf_byblk(zap, blk, tx, lt, lp); - ASSERT(err || ZAP_HASH_IDX(h, (*lp)->l_phys->l_hdr.lh_prefix_len) == - (*lp)->l_phys->l_hdr.lh_prefix); + ASSERT(err || + ZAP_HASH_IDX(h, zap_leaf_phys(*lp)->l_hdr.lh_prefix_len) == + zap_leaf_phys(*lp)->l_hdr.lh_prefix); return (err); } @@ -597,16 +594,16 @@ zap_leaf_t *nl; int prefix_diff, i, err; uint64_t sibling; - int old_prefix_len = l->l_phys->l_hdr.lh_prefix_len; + int old_prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len; - ASSERT3U(old_prefix_len, <=, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + ASSERT3U(old_prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift); ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==, - l->l_phys->l_hdr.lh_prefix); + zap_leaf_phys(l)->l_hdr.lh_prefix); if (zap_tryupgradedir(zap, tx) == 0 || - old_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { + old_prefix_len == zap_f_phys(zap)->zap_ptrtbl.zt_shift) { /* We failed to upgrade, or need to grow the pointer table */ objset_t *os = zap->zap_objset; uint64_t object = zap->zap_object; @@ -621,7 +618,7 @@ ASSERT(!zap->zap_ismicro); while (old_prefix_len == - zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { + zap_f_phys(zap)->zap_ptrtbl.zt_shift) { err = zap_grow_ptrtbl(zap, tx); if (err) return (err); @@ -631,18 +628,18 @@ if (err) return (err); - if (l->l_phys->l_hdr.lh_prefix_len != old_prefix_len) { + if (zap_leaf_phys(l)->l_hdr.lh_prefix_len != old_prefix_len) { /* it split while our locks were down */ *lp = l; return (0); } } ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - ASSERT3U(old_prefix_len, <, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + ASSERT3U(old_prefix_len, <, zap_f_phys(zap)->zap_ptrtbl.zt_shift); ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==, - l->l_phys->l_hdr.lh_prefix); + zap_leaf_phys(l)->l_hdr.lh_prefix); - prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - + prefix_diff = zap_f_phys(zap)->zap_ptrtbl.zt_shift - (old_prefix_len + 1); sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff; @@ -664,7 +661,7 @@ ASSERT0(err); /* we checked for i/o errors above */ } - if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) { + if (hash & (1ULL << (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) { /* we want the sibling */ zap_put_leaf(l); *lp = nl; @@ -680,13 +677,13 @@ zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx) { zap_t *zap = zn->zn_zap; - int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; - int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift && - l->l_phys->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER); + int shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift; + int leaffull = (zap_leaf_phys(l)->l_hdr.lh_prefix_len == shift && + zap_leaf_phys(l)->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER); zap_put_leaf(l); - if (leaffull || zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk) { + if (leaffull || zap_f_phys(zap)->zap_ptrtbl.zt_nextblk) { int err; /* @@ -706,7 +703,7 @@ } /* could have finished growing while our locks were down */ - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == shift) + if (zap_f_phys(zap)->zap_ptrtbl.zt_shift == shift) (void) zap_grow_ptrtbl(zap, tx); } } @@ -937,7 +934,7 @@ int bs; idx = ZAP_HASH_IDX(zn->zn_hash, - zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); + zap_f_phys(zap)->zap_ptrtbl.zt_shift); if (zap_idx_to_blk(zap, idx, &blk) != 0) return; bs = FZAP_BLOCK_SHIFT(zap); @@ -1169,8 +1166,8 @@ if (zc->zc_leaf && (ZAP_HASH_IDX(zc->zc_hash, - zc->zc_leaf->l_phys->l_hdr.lh_prefix_len) != - zc->zc_leaf->l_phys->l_hdr.lh_prefix)) { + zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) != + zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix)) { rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); zap_put_leaf(zc->zc_leaf); zc->zc_leaf = NULL; @@ -1191,10 +1188,11 @@ if (err == ENOENT) { uint64_t nocare = - (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len)) - 1; + (1ULL << (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len)) - 1; zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1; zc->zc_cd = 0; - if (l->l_phys->l_hdr.lh_prefix_len == 0 || zc->zc_hash == 0) { + if (zap_leaf_phys(l)->l_hdr.lh_prefix_len == 0 || + zc->zc_hash == 0) { zc->zc_hash = -1ULL; } else { zap_put_leaf(zc->zc_leaf); @@ -1261,25 +1259,25 @@ /* * Set zap_phys_t fields */ - zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs; - zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries; - zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk; - zs->zs_block_type = zap->zap_f.zap_phys->zap_block_type; - zs->zs_magic = zap->zap_f.zap_phys->zap_magic; - zs->zs_salt = zap->zap_f.zap_phys->zap_salt; + zs->zs_num_leafs = zap_f_phys(zap)->zap_num_leafs; + zs->zs_num_entries = zap_f_phys(zap)->zap_num_entries; + zs->zs_num_blocks = zap_f_phys(zap)->zap_freeblk; + zs->zs_block_type = zap_f_phys(zap)->zap_block_type; + zs->zs_magic = zap_f_phys(zap)->zap_magic; + zs->zs_salt = zap_f_phys(zap)->zap_salt; /* * Set zap_ptrtbl fields */ - zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; - zs->zs_ptrtbl_nextblk = zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk; + zs->zs_ptrtbl_len = 1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift; + zs->zs_ptrtbl_nextblk = zap_f_phys(zap)->zap_ptrtbl.zt_nextblk; zs->zs_ptrtbl_blks_copied = - zap->zap_f.zap_phys->zap_ptrtbl.zt_blks_copied; - zs->zs_ptrtbl_zt_blk = zap->zap_f.zap_phys->zap_ptrtbl.zt_blk; - zs->zs_ptrtbl_zt_numblks = zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; - zs->zs_ptrtbl_zt_shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; + zap_f_phys(zap)->zap_ptrtbl.zt_blks_copied; + zs->zs_ptrtbl_zt_blk = zap_f_phys(zap)->zap_ptrtbl.zt_blk; + zs->zs_ptrtbl_zt_numblks = zap_f_phys(zap)->zap_ptrtbl.zt_numblks; + zs->zs_ptrtbl_zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift; - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { + if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) { /* the ptrtbl is entirely in the header block. */ zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs); @@ -1287,16 +1285,16 @@ int b; dmu_prefetch(zap->zap_objset, zap->zap_object, - zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs, - zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs); + zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs, + zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs); - for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; + for (b = 0; b < zap_f_phys(zap)->zap_ptrtbl.zt_numblks; b++) { dmu_buf_t *db; int err; err = dmu_buf_hold(zap->zap_objset, zap->zap_object, - (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs, + (zap_f_phys(zap)->zap_ptrtbl.zt_blk + b) << bs, FTAG, &db, DMU_READ_NO_PREFETCH); if (err == 0) { zap_stats_ptrtbl(zap, db->db_data, @@ -1333,7 +1331,7 @@ * could extend the table. */ if (add) { - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) + if (zap_f_phys(zap)->zap_ptrtbl.zt_blk == 0) *towrite += zap->zap_dbuf->db_size; else *towrite += (zap->zap_dbuf->db_size * 3); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/zap_leaf.c --- a/usr/src/uts/common/fs/zfs/zap_leaf.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/zap_leaf.c Wed Dec 10 08:46:44 2014 -0800 @@ -48,10 +48,12 @@ #define LEAF_HASH(l, h) \ ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ - ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len))) + ((h) >> \ + (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) -#define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) +#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) +extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l); static void zap_memset(void *a, int c, size_t n) @@ -105,8 +107,11 @@ { int i; zap_leaf_t l; + dmu_buf_t l_dbuf; + + l_dbuf.db_data = buf; l.l_bs = highbit64(size) - 1; - l.l_phys = buf; + l.l_dbuf = &l_dbuf; buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type); buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix); @@ -158,18 +163,20 @@ int i; l->l_bs = highbit64(l->l_dbuf->db_size) - 1; - zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header)); - zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); + zap_memset(&zap_leaf_phys(l)->l_hdr, 0, + sizeof (struct zap_leaf_header)); + zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END, + 2*ZAP_LEAF_HASH_NUMENTRIES(l)); for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE; ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1; } ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END; - l->l_phys->l_hdr.lh_block_type = ZBT_LEAF; - l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC; - l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); + zap_leaf_phys(l)->l_hdr.lh_block_type = ZBT_LEAF; + zap_leaf_phys(l)->l_hdr.lh_magic = ZAP_LEAF_MAGIC; + zap_leaf_phys(l)->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); if (sort) - l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; + zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; } /* @@ -181,15 +188,16 @@ { int chunk; - ASSERT(l->l_phys->l_hdr.lh_nfree > 0); + ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0); - chunk = l->l_phys->l_hdr.lh_freelist; + chunk = zap_leaf_phys(l)->l_hdr.lh_freelist; ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE); - l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; + zap_leaf_phys(l)->l_hdr.lh_freelist = + ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next; - l->l_phys->l_hdr.lh_nfree--; + zap_leaf_phys(l)->l_hdr.lh_nfree--; return (chunk); } @@ -198,16 +206,16 @@ zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk) { struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free; - ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); + ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l)); ASSERT(zlf->lf_type != ZAP_CHUNK_FREE); zlf->lf_type = ZAP_CHUNK_FREE; - zlf->lf_next = l->l_phys->l_hdr.lh_freelist; + zlf->lf_next = zap_leaf_phys(l)->l_hdr.lh_freelist; bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */ - l->l_phys->l_hdr.lh_freelist = chunk; + zap_leaf_phys(l)->l_hdr.lh_freelist = chunk; - l->l_phys->l_hdr.lh_nfree++; + zap_leaf_phys(l)->l_hdr.lh_nfree++; } /* @@ -393,7 +401,7 @@ uint16_t *chunkp; struct zap_leaf_entry *le; - ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); + ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); again: for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash); @@ -413,7 +421,7 @@ * lowest-cd match for MT_FIRST. */ ASSERT(zn->zn_matchtype == MT_EXACT || - (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); + (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED)); if (zap_leaf_array_match(l, zn, le->le_name_chunk, le->le_name_numints)) { zeh->zeh_num_integers = le->le_value_numints; @@ -453,10 +461,10 @@ uint16_t lh; struct zap_leaf_entry *le; - ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); + ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) { - for (chunk = l->l_phys->l_hash[lh]; + for (chunk = zap_leaf_phys(l)->l_hash[lh]; chunk != CHAIN_END; chunk = le->le_next) { le = ZAP_LEAF_ENTRY(l, chunk); @@ -536,7 +544,7 @@ delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) - ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen); - if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks) + if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks) return (SET_ERROR(EAGAIN)); zap_leaf_array_free(l, &le->le_value_chunk); @@ -566,7 +574,7 @@ *zeh->zeh_chunkp = le->le_next; zap_leaf_chunk_free(l, entry_chunk); - l->l_phys->l_hdr.lh_nentries--; + zap_leaf_phys(l)->l_hdr.lh_nentries--; } int @@ -590,7 +598,7 @@ if (cd == ZAP_NEED_CD) { /* find the lowest unused cd */ - if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { + if (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) { cd = 0; for (chunk = *LEAF_HASH_ENTPTR(l, h); @@ -626,7 +634,7 @@ ASSERT3U(cd, <, zap_maxcd(zn->zn_zap)); } - if (l->l_phys->l_hdr.lh_nfree < numchunks) + if (zap_leaf_phys(l)->l_hdr.lh_nfree < numchunks) return (SET_ERROR(EAGAIN)); /* make the entry */ @@ -647,7 +655,7 @@ /* XXX if we did the search above, we could just use that */ chunkp = zap_leaf_rehash_entry(l, chunk); - l->l_phys->l_hdr.lh_nentries++; + zap_leaf_phys(l)->l_hdr.lh_nentries++; zeh->zeh_leaf = l; zeh->zeh_num_integers = num_integers; @@ -781,8 +789,8 @@ zap_leaf_chunk_free(l, entry); - l->l_phys->l_hdr.lh_nentries--; - nl->l_phys->l_hdr.lh_nentries++; + zap_leaf_phys(l)->l_hdr.lh_nentries--; + zap_leaf_phys(nl)->l_hdr.lh_nentries++; } /* @@ -792,19 +800,22 @@ zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort) { int i; - int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len; + int bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len; /* set new prefix and prefix_len */ - l->l_phys->l_hdr.lh_prefix <<= 1; - l->l_phys->l_hdr.lh_prefix_len++; - nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1; - nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len; + zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1; + zap_leaf_phys(l)->l_hdr.lh_prefix_len++; + zap_leaf_phys(nl)->l_hdr.lh_prefix = + zap_leaf_phys(l)->l_hdr.lh_prefix | 1; + zap_leaf_phys(nl)->l_hdr.lh_prefix_len = + zap_leaf_phys(l)->l_hdr.lh_prefix_len; /* break existing hash chains */ - zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l)); + zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END, + 2*ZAP_LEAF_HASH_NUMENTRIES(l)); if (sort) - l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; + zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED; /* * Transfer entries whose hash bit 'bit' is set to nl; rehash @@ -832,25 +843,25 @@ { int i, n; - n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - - l->l_phys->l_hdr.lh_prefix_len; + n = zap_f_phys(zap)->zap_ptrtbl.zt_shift - + zap_leaf_phys(l)->l_hdr.lh_prefix_len; n = MIN(n, ZAP_HISTOGRAM_SIZE-1); zs->zs_leafs_with_2n_pointers[n]++; - n = l->l_phys->l_hdr.lh_nentries/5; + n = zap_leaf_phys(l)->l_hdr.lh_nentries/5; n = MIN(n, ZAP_HISTOGRAM_SIZE-1); zs->zs_blocks_with_n5_entries[n]++; n = ((1<l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / + zap_leaf_phys(l)->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 / (1<zs_blocks_n_tenths_full[n]++; for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) { int nentries = 0; - int chunk = l->l_phys->l_hash[i]; + int chunk = zap_leaf_phys(l)->l_hash[i]; while (chunk != CHAIN_END) { struct zap_leaf_entry *le = diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/zap_micro.c --- a/usr/src/uts/common/fs/zfs/zap_micro.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/zap_micro.c Wed Dec 10 08:46:44 2014 -0800 @@ -39,6 +39,8 @@ #include #endif +extern inline mzap_phys_t *zap_m_phys(zap_t *zap); + static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); uint64_t @@ -46,7 +48,7 @@ { if (zap->zap_ismicro) return (0); - return (zap->zap_u.zap_fat.zap_phys->zap_flags); + return (zap_f_phys(zap)->zap_flags); } int @@ -385,7 +387,7 @@ * it, because zap_lockdir() checks zap_ismicro without the lock * held. */ - winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); + winner = dmu_buf_set_user(db, zap, zap_evict); if (winner != NULL) { rw_exit(&zap->zap_rwlock); @@ -397,15 +399,15 @@ } if (zap->zap_ismicro) { - zap->zap_salt = zap->zap_m.zap_phys->mz_salt; - zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; + zap->zap_salt = zap_m_phys(zap)->mz_salt; + zap->zap_normflags = zap_m_phys(zap)->mz_normflags; zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; avl_create(&zap->zap_m.zap_avl, mze_compare, sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = - &zap->zap_m.zap_phys->mz_chunk[i]; + &zap_m_phys(zap)->mz_chunk[i]; if (mze->mze_name[0]) { zap_name_t *zn; @@ -417,8 +419,8 @@ } } } else { - zap->zap_salt = zap->zap_f.zap_phys->zap_salt; - zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; + zap->zap_salt = zap_f_phys(zap)->zap_salt; + zap->zap_normflags = zap_f_phys(zap)->zap_normflags; ASSERT3U(sizeof (struct zap_leaf_header), ==, 2*ZAP_LEAF_CHUNKSIZE); @@ -428,7 +430,7 @@ * other members. */ ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, - &zap->zap_f.zap_phys->zap_salt); + &zap_f_phys(zap)->zap_salt); /* * The embedded pointer table should end at the end of @@ -436,7 +438,7 @@ */ ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 1<zap_f.zap_phys, ==, + (uintptr_t)zap_f_phys(zap), ==, zap->zap_dbuf->db_size); } rw_exit(&zap->zap_rwlock); @@ -939,7 +941,7 @@ #ifdef ZFS_DEBUG for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { - mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; + mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); } #endif @@ -950,7 +952,7 @@ again: for (i = start; i < zap->zap_m.zap_num_chunks; i++) { - mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; + mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; if (mze->mze_name[0] == 0) { mze->mze_value = value; mze->mze_cd = cd; @@ -1151,7 +1153,7 @@ err = SET_ERROR(ENOENT); } else { zap->zap_m.zap_num_entries--; - bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], + bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], sizeof (mzap_ent_phys_t)); mze_remove(zap, mze); } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/zfs_ioctl.c --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c Wed Dec 10 08:46:44 2014 -0800 @@ -916,7 +916,7 @@ dd = clone->ds_dir; error = dsl_dataset_hold_obj(dd->dd_pool, - dd->dd_phys->dd_origin_obj, FTAG, &origin); + dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin); if (error != 0) { dsl_dataset_rele(clone, FTAG); dsl_pool_rele(dp, FTAG); @@ -4309,7 +4309,8 @@ } if (dsl_dir_is_clone(tosnap->ds_dir)) - zc->zc_fromobj = tosnap->ds_dir->dd_phys->dd_origin_obj; + zc->zc_fromobj = + dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj; dsl_dataset_rele(tosnap, FTAG); dsl_pool_rele(dp, FTAG); } @@ -4873,7 +4874,7 @@ return (error); error = dmu_object_next(os, &zc->zc_obj, B_FALSE, - os->os_dsl_dataset->ds_phys->ds_prev_snap_txg); + dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg); dmu_objset_rele(os, FTAG); return (error); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/zil.c --- a/usr/src/uts/common/fs/zfs/zil.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/zil.c Wed Dec 10 08:46:44 2014 -0800 @@ -181,7 +181,7 @@ char **end) { enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; - uint32_t aflags = ARC_WAIT; + arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf = NULL; zbookmark_phys_t zb; int error; @@ -257,7 +257,7 @@ { enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; const blkptr_t *bp = &lr->lr_blkptr; - uint32_t aflags = ARC_WAIT; + arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf = NULL; zbookmark_phys_t zb; int error; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/fs/zfs/zio.c --- a/usr/src/uts/common/fs/zfs/zio.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/fs/zfs/zio.c Wed Dec 10 08:46:44 2014 -0800 @@ -2159,7 +2159,7 @@ if (ddp->ddp_phys_birth != 0) { arc_buf_t *abuf = NULL; - uint32_t aflags = ARC_WAIT; + arc_flags_t aflags = ARC_FLAG_WAIT; blkptr_t blk = *zio->io_bp; int error; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c --- a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c Wed Dec 10 08:46:44 2014 -0800 @@ -500,7 +500,7 @@ 0xffffffffull, /* max segment size (DMA boundary) */ MPTSAS_MAX_DMA_SEGS, /* scatter/gather list length */ 512, /* granularity - device transfer size */ - DDI_DMA_RELAXED_ORDERING /* flags, enable relaxed ordering */ + 0 /* flags, set to 0 */ }; ddi_device_acc_attr_t mptsas_dev_attr = { @@ -614,6 +614,16 @@ int mptsas_extreq_sense_bufsize = 256*64; /* + * We believe that all software resrictions of having to run with DMA + * attributes to limit allocation to the first 4G are removed. + * However, this flag remains to enable quick switchback should suspicious + * problems emerge. + * Note that scsi_alloc_consistent_buf() does still adhere to allocating + * 32 bit addressable memory, but we can cope if that is changed now. + */ +int mptsas_use_64bit_msgaddr = 1; + +/* * SM - HBA statics */ char *mptsas_driver_rev = MPTSAS_MOD_STRING; @@ -1182,7 +1192,11 @@ /* Make a per-instance copy of the structures */ mpt->m_io_dma_attr = mptsas_dma_attrs64; - mpt->m_msg_dma_attr = mptsas_dma_attrs; + if (mptsas_use_64bit_msgaddr) { + mpt->m_msg_dma_attr = mptsas_dma_attrs64; + } else { + mpt->m_msg_dma_attr = mptsas_dma_attrs; + } mpt->m_reg_acc_attr = mptsas_dev_attr; mpt->m_dev_acc_attr = mptsas_dev_attr; @@ -1334,6 +1348,12 @@ goto fail; } + mpt->m_targets = refhash_create(MPTSAS_TARGET_BUCKET_COUNT, + mptsas_target_addr_hash, mptsas_target_addr_cmp, + mptsas_target_free, sizeof (mptsas_target_t), + offsetof(mptsas_target_t, m_link), + offsetof(mptsas_target_t, m_addr), KM_SLEEP); + /* * Fill in the phy_info structure and get the base WWID */ @@ -4032,7 +4052,7 @@ * address to dma to and from the driver. The second * address is the address mpt uses to fill in the SGL. */ - p->m_phys_addr = cookie.dmac_address; + p->m_phys_addr = cookie.dmac_laddress; return (DDI_SUCCESS); } @@ -4273,6 +4293,7 @@ { pMpi2SGESimple64_t sge; pMpi2SGEChain64_t sgechain; + uint64_t nframe_phys_addr; uint_t cookiec; mptti_t *dmap; uint32_t flags; @@ -4396,10 +4417,8 @@ p = cmd->cmd_extra_frames; ddi_put16(acc_hdl, &sgechain->Length, chainlength); - ddi_put32(acc_hdl, &sgechain->Address.Low, - p->m_phys_addr); - /* SGL is allocated in the first 4G mem range */ - ddi_put32(acc_hdl, &sgechain->Address.High, 0); + ddi_put32(acc_hdl, &sgechain->Address.Low, p->m_phys_addr); + ddi_put32(acc_hdl, &sgechain->Address.High, p->m_phys_addr >> 32); /* * If there are more than 2 frames left we have to @@ -4457,12 +4476,14 @@ * Note that frames are in contiguous * memory space. */ + nframe_phys_addr = p->m_phys_addr + + (mpt->m_req_frame_size * k); ddi_put32(p->m_acc_hdl, &sgechain->Address.Low, - (p->m_phys_addr + - (mpt->m_req_frame_size * k))); + nframe_phys_addr); ddi_put32(p->m_acc_hdl, - &sgechain->Address.High, 0); + &sgechain->Address.High, + nframe_phys_addr >> 32); /* * If there are more than 2 frames left @@ -4612,6 +4633,7 @@ { pMpi2IeeeSgeSimple64_t ieeesge; pMpi25IeeeSgeChain64_t ieeesgechain; + uint64_t nframe_phys_addr; uint_t cookiec; mptti_t *dmap; uint8_t flags; @@ -4731,10 +4753,8 @@ p = cmd->cmd_extra_frames; ddi_put32(acc_hdl, &ieeesgechain->Length, chainlength); - ddi_put32(acc_hdl, &ieeesgechain->Address.Low, - p->m_phys_addr); - /* SGL is allocated in the first 4G mem range */ - ddi_put32(acc_hdl, &ieeesgechain->Address.High, 0); + ddi_put32(acc_hdl, &ieeesgechain->Address.Low, p->m_phys_addr); + ddi_put32(acc_hdl, &ieeesgechain->Address.High, p->m_phys_addr >> 32); /* * If there are more than 2 frames left we have to @@ -4791,12 +4811,14 @@ * Note that frames are in contiguous * memory space. */ + nframe_phys_addr = p->m_phys_addr + + (mpt->m_req_frame_size * k); ddi_put32(p->m_acc_hdl, &ieeesgechain->Address.Low, - (p->m_phys_addr + - (mpt->m_req_frame_size * k))); + nframe_phys_addr); ddi_put32(p->m_acc_hdl, - &ieeesgechain->Address.High, 0); + &ieeesgechain->Address.High, + nframe_phys_addr >> 32); /* * If there are more than 2 frames left @@ -5147,7 +5169,7 @@ pMpi2AddressReplyDescriptor_t address_reply; pMPI2DefaultReply_t reply; mptsas_fw_diagnostic_buffer_t *pBuffer; - uint32_t reply_addr; + uint32_t reply_addr, reply_frame_dma_baseaddr; uint16_t SMID, iocstatus; mptsas_slots_t *slots = mpt->m_active; mptsas_cmd_t *cmd = NULL; @@ -5166,10 +5188,11 @@ * If reply frame is not in the proper range we should ignore this * message and exit the interrupt handler. */ - if ((reply_addr < mpt->m_reply_frame_dma_addr) || - (reply_addr >= (mpt->m_reply_frame_dma_addr + + reply_frame_dma_baseaddr = mpt->m_reply_frame_dma_addr & 0xffffffffu; + if ((reply_addr < reply_frame_dma_baseaddr) || + (reply_addr >= (reply_frame_dma_baseaddr + (mpt->m_reply_frame_size * mpt->m_max_replies))) || - ((reply_addr - mpt->m_reply_frame_dma_addr) % + ((reply_addr - reply_frame_dma_baseaddr) % mpt->m_reply_frame_size != 0)) { mptsas_log(mpt, CE_WARN, "?Received invalid reply frame " "address 0x%x\n", reply_addr); @@ -5180,7 +5203,7 @@ (void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0, DDI_DMA_SYNC_FORCPU); reply = (pMPI2DefaultReply_t)(mpt->m_reply_frame + (reply_addr - - mpt->m_reply_frame_dma_addr)); + reply_frame_dma_baseaddr)); function = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->Function); NDBG31(("mptsas_handle_address_reply: function 0x%x, reply_addr=0x%x", @@ -5245,7 +5268,7 @@ cv_signal(&mpt->m_fw_cv); break; case MPI2_FUNCTION_EVENT_NOTIFICATION: - reply_frame_no = (reply_addr - mpt->m_reply_frame_dma_addr) / + reply_frame_no = (reply_addr - reply_frame_dma_baseaddr) / mpt->m_reply_frame_size; args = &mpt->m_replyh_args[reply_frame_no]; args->mpt = (void *)mpt; @@ -6718,7 +6741,8 @@ mpt = replyh_arg->mpt; eventreply = (pMpi2EventNotificationReply_t) - (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr)); + (mpt->m_reply_frame + (rfm - + (mpt->m_reply_frame_dma_addr & 0xffffffffu))); event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event); @@ -6806,7 +6830,8 @@ ASSERT(mutex_owned(&mpt->m_mutex)); eventreply = (pMpi2EventNotificationReply_t) - (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr)); + (mpt->m_reply_frame + (rfm - + (mpt->m_reply_frame_dma_addr & 0xffffffffu))); event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event); if (iocstatus = ddi_get16(mpt->m_acc_reply_frame_hdl, @@ -7510,7 +7535,8 @@ } eventreply = (pMpi2EventNotificationReply_t) - (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr)); + (mpt->m_reply_frame + (rfm - + (mpt->m_reply_frame_dma_addr & 0xffffffffu))); event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event); if (iocstatus = ddi_get16(mpt->m_acc_reply_frame_hdl, @@ -8302,7 +8328,7 @@ mptsas_target_t *ptgt = cmd->cmd_tgt_addr; uint16_t SMID, io_flags = 0; uint8_t ars_size; - uint32_t request_desc_low, request_desc_high; + uint64_t request_desc; uint32_t ars_dmaaddrlow; mptsas_cmd_t *c; @@ -8413,9 +8439,9 @@ if (mptsas_use_fastpath && ptgt->m_io_flags & MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) { io_flags |= MPI25_SCSIIO_IOFLAGS_FAST_PATH; - request_desc_low = MPI25_REQ_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO; - } else { - request_desc_low = MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO; + request_desc = MPI25_REQ_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO; + } else { + request_desc = MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO; } ddi_put16(acc_hdl, &io_request->IoFlags, io_flags); /* @@ -8449,9 +8475,9 @@ /* * Build request descriptor and write it to the request desc post reg. */ - request_desc_low |= (SMID << 16); - request_desc_high = ptgt->m_devhdl << 16; - MPTSAS_START_CMD(mpt, request_desc_low, request_desc_high); + request_desc |= (SMID << 16); + request_desc |= (uint64_t)ptgt->m_devhdl << 48; + MPTSAS_START_CMD(mpt, request_desc); /* * Start timeout. @@ -10210,8 +10236,8 @@ struct scsi_pkt *pkt = cmd->cmd_pkt; mptsas_pt_request_t *pt = pkt->pkt_ha_private; uint32_t request_size; - uint32_t request_desc_low, request_desc_high = 0; uint32_t i; + uint64_t request_desc = 0; uint8_t desc_type; uint16_t SMID; uint8_t *request, function; @@ -10307,8 +10333,8 @@ */ if (function == MPI2_FUNCTION_SCSI_IO_REQUEST) { desc_type = MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO; - request_desc_high = (ddi_get16(acc_hdl, - &scsi_io_req->DevHandle) << 16); + request_desc = ((uint64_t)ddi_get16(acc_hdl, + &scsi_io_req->DevHandle) << 48); } (void) ddi_dma_sync(mpt->m_dma_req_sense_hdl, 0, 0, DDI_DMA_SYNC_FORDEV); @@ -10320,9 +10346,9 @@ * finish. */ (void) ddi_dma_sync(dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV); - request_desc_low = (SMID << 16) + desc_type; + request_desc |= (SMID << 16) + desc_type; cmd->cmd_rfm = NULL; - MPTSAS_START_CMD(mpt, request_desc_low, request_desc_high); + MPTSAS_START_CMD(mpt, request_desc); if ((mptsas_check_dma_handle(dma_hdl) != DDI_SUCCESS) || (mptsas_check_acc_handle(acc_hdl) != DDI_SUCCESS)) { ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED); @@ -10911,7 +10937,7 @@ DDI_DMA_SYNC_FORCPU); reply_msg = (pMPI2DefaultReply_t) (mpt->m_reply_frame + (cmd->cmd_rfm - - mpt->m_reply_frame_dma_addr)); + (mpt->m_reply_frame_dma_addr & 0xffffffffu))); } mptsas_fma_check(mpt, cmd); @@ -11105,7 +11131,8 @@ pMpi2DiagReleaseRequest_t pDiag_release_msg; struct scsi_pkt *pkt = cmd->cmd_pkt; mptsas_diag_request_t *diag = pkt->pkt_ha_private; - uint32_t request_desc_low, i; + uint32_t i; + uint64_t request_desc; ASSERT(mutex_owned(&mpt->m_mutex)); @@ -11158,10 +11185,10 @@ */ (void) ddi_dma_sync(mpt->m_dma_req_frame_hdl, 0, 0, DDI_DMA_SYNC_FORDEV); - request_desc_low = (cmd->cmd_slot << 16) + + request_desc = (cmd->cmd_slot << 16) + MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cmd->cmd_rfm = NULL; - MPTSAS_START_CMD(mpt, request_desc_low, 0); + MPTSAS_START_CMD(mpt, request_desc); if ((mptsas_check_dma_handle(mpt->m_dma_req_frame_hdl) != DDI_SUCCESS) || (mptsas_check_acc_handle(mpt->m_acc_req_frame_hdl) != @@ -11259,7 +11286,8 @@ (void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0, DDI_DMA_SYNC_FORCPU); reply = (pMpi2DiagBufferPostReply_t)(mpt->m_reply_frame + - (cmd->cmd_rfm - mpt->m_reply_frame_dma_addr)); + (cmd->cmd_rfm - + (mpt->m_reply_frame_dma_addr & 0xffffffffu))); /* * Get the reply message data @@ -11413,7 +11441,8 @@ (void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0, DDI_DMA_SYNC_FORCPU); reply = (pMpi2DiagReleaseReply_t)(mpt->m_reply_frame + - (cmd->cmd_rfm - mpt->m_reply_frame_dma_addr)); + (cmd->cmd_rfm - + (mpt->m_reply_frame_dma_addr & 0xffffffffu))); /* * Get the reply message data @@ -12802,12 +12831,6 @@ goto fail; } - mpt->m_targets = refhash_create(MPTSAS_TARGET_BUCKET_COUNT, - mptsas_target_addr_hash, mptsas_target_addr_cmp, - mptsas_target_free, sizeof (mptsas_target_t), - offsetof(mptsas_target_t, m_link), - offsetof(mptsas_target_t, m_addr), KM_SLEEP); - if (mptsas_alloc_active_slots(mpt, KM_SLEEP)) { goto fail; } @@ -12857,7 +12880,7 @@ * Initialize the Reply Free Queue with the physical addresses of our * reply frames. */ - cookie.dmac_address = mpt->m_reply_frame_dma_addr; + cookie.dmac_address = mpt->m_reply_frame_dma_addr & 0xffffffffu; for (i = 0; i < mpt->m_max_replies; i++) { ddi_put32(mpt->m_acc_free_queue_hdl, &((uint32_t *)(void *)mpt->m_free_queue)[i], @@ -14412,14 +14435,14 @@ * 3. call sas_device_page/expander_page to update hash table */ mptsas_update_phymask(mpt); - /* - * Invalid the existing entries - * - * XXX - It seems like we should just delete everything here. We are - * holding the lock and are about to refresh all the targets in both - * hashes anyway. Given the path we're in, what outstanding async - * event could possibly be trying to reference one of these things - * without taking the lock, and how would that be useful anyway? + + /* + * Remove all the devhdls for existing entries but leave their + * addresses alone. In update_hashtab() below, we'll find all + * targets that are still present and reassociate them with + * their potentially new devhdls. Leaving the targets around in + * this fashion allows them to be used on the tx waitq even + * while IOC reset is occurring. */ for (tp = refhash_first(mpt->m_targets); tp != NULL; tp = refhash_next(mpt->m_targets, tp)) { diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_impl.c --- a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_impl.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_impl.c Wed Dec 10 08:46:44 2014 -0800 @@ -203,7 +203,8 @@ struct scsi_pkt *pkt = cmd->cmd_pkt; mptsas_config_request_t *config = pkt->pkt_ha_private; uint8_t direction; - uint32_t length, flagslength, request_desc_low; + uint32_t length, flagslength; + uint64_t request_desc; ASSERT(mutex_owned(&mpt->m_mutex)); @@ -277,10 +278,10 @@ (void) ddi_dma_sync(mpt->m_dma_req_frame_hdl, 0, 0, DDI_DMA_SYNC_FORDEV); - request_desc_low = (cmd->cmd_slot << 16) + + request_desc = (cmd->cmd_slot << 16) + MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cmd->cmd_rfm = NULL; - MPTSAS_START_CMD(mpt, request_desc_low, 0); + MPTSAS_START_CMD(mpt, request_desc); if ((mptsas_check_dma_handle(mpt->m_dma_req_frame_hdl) != DDI_SUCCESS) || (mptsas_check_acc_handle(mpt->m_acc_req_frame_hdl) != @@ -390,7 +391,7 @@ (void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0, DDI_DMA_SYNC_FORCPU); reply = (pMpi2ConfigReply_t)(mpt->m_reply_frame + (cmd->cmd_rfm - - mpt->m_reply_frame_dma_addr)); + - (mpt->m_reply_frame_dma_addr & 0xffffffffu))); config.page_type = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->Header.PageType); config.page_number = ddi_get8(mpt->m_acc_reply_frame_hdl, @@ -535,7 +536,7 @@ (void) ddi_dma_sync(cmd->cmd_dmahandle, 0, 0, DDI_DMA_SYNC_FORCPU); reply = (pMpi2ConfigReply_t)(mpt->m_reply_frame + (cmd->cmd_rfm - - mpt->m_reply_frame_dma_addr)); + - (mpt->m_reply_frame_dma_addr & 0xffffffffu))); iocstatus = ddi_get16(mpt->m_acc_reply_frame_hdl, &reply->IOCStatus); iocstatus = MPTSAS_IOCSTATUS(iocstatus); @@ -612,7 +613,7 @@ int mptsas_send_config_request_msg(mptsas_t *mpt, uint8_t action, uint8_t pagetype, uint32_t pageaddress, uint8_t pagenumber, uint8_t pageversion, - uint8_t pagelength, uint32_t SGEflagslength, uint32_t SGEaddress32) + uint8_t pagelength, uint32_t SGEflagslength, uint64_t SGEaddress) { pMpi2ConfigRequest_t config; int send_numbytes; @@ -629,7 +630,10 @@ ddi_put32(mpt->m_hshk_acc_hdl, &config->PageBufferSGE.MpiSimple.FlagsLength, SGEflagslength); ddi_put32(mpt->m_hshk_acc_hdl, - &config->PageBufferSGE.MpiSimple.u.Address32, SGEaddress32); + &config->PageBufferSGE.MpiSimple.u.Address64.Low, SGEaddress); + ddi_put32(mpt->m_hshk_acc_hdl, + &config->PageBufferSGE.MpiSimple.u.Address64.High, + SGEaddress >> 32); send_numbytes = sizeof (MPI2_CONFIG_REQUEST); /* @@ -646,7 +650,7 @@ mptsas_send_extended_config_request_msg(mptsas_t *mpt, uint8_t action, uint8_t extpagetype, uint32_t pageaddress, uint8_t pagenumber, uint8_t pageversion, uint16_t extpagelength, - uint32_t SGEflagslength, uint32_t SGEaddress32) + uint32_t SGEflagslength, uint64_t SGEaddress) { pMpi2ConfigRequest_t config; int send_numbytes; @@ -665,7 +669,10 @@ ddi_put32(mpt->m_hshk_acc_hdl, &config->PageBufferSGE.MpiSimple.FlagsLength, SGEflagslength); ddi_put32(mpt->m_hshk_acc_hdl, - &config->PageBufferSGE.MpiSimple.u.Address32, SGEaddress32); + &config->PageBufferSGE.MpiSimple.u.Address64.Low, SGEaddress); + ddi_put32(mpt->m_hshk_acc_hdl, + &config->PageBufferSGE.MpiSimple.u.Address64.High, + SGEaddress >> 32); send_numbytes = sizeof (MPI2_CONFIG_REQUEST); /* @@ -1093,7 +1100,7 @@ mptsas_cmd_t *cmd; struct scsi_pkt *pkt; mptsas_slots_t *slots = mpt->m_active; - uint32_t request_desc_low, i; + uint64_t request_desc, i; pMPI2DefaultReply_t reply_msg; /* @@ -1149,9 +1156,9 @@ */ (void) ddi_dma_sync(mpt->m_dma_req_frame_hdl, 0, 0, DDI_DMA_SYNC_FORDEV); - request_desc_low = (cmd->cmd_slot << 16) + + request_desc = (cmd->cmd_slot << 16) + MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY; - MPTSAS_START_CMD(mpt, request_desc_low, 0); + MPTSAS_START_CMD(mpt, request_desc); rval = mptsas_poll(mpt, cmd, MPTSAS_POLL_TIME); if (pkt->pkt_reason == CMD_INCOMPLETE) @@ -1167,7 +1174,7 @@ DDI_DMA_SYNC_FORCPU); reply_msg = (pMPI2DefaultReply_t) (mpt->m_reply_frame + (cmd->cmd_rfm - - mpt->m_reply_frame_dma_addr)); + (mpt->m_reply_frame_dma_addr & 0xffffffffu))); if (reply_size > sizeof (MPI2_SCSI_TASK_MANAGE_REPLY)) { reply_size = sizeof (MPI2_SCSI_TASK_MANAGE_REPLY); } @@ -1303,7 +1310,7 @@ struct scsi_pkt *pkt; int i; int rvalue = 0; - uint32_t request_desc_low; + uint64_t request_desc; if (mpt->m_MPI25 && !mptsas_enable_mpi25_flashupdate) { /* @@ -1389,10 +1396,10 @@ */ (void) ddi_dma_sync(mpt->m_dma_req_frame_hdl, 0, 0, DDI_DMA_SYNC_FORDEV); - request_desc_low = (cmd->cmd_slot << 16) + + request_desc = (cmd->cmd_slot << 16) + MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; cmd->cmd_rfm = NULL; - MPTSAS_START_CMD(mpt, request_desc_low, 0); + MPTSAS_START_CMD(mpt, request_desc); rvalue = 0; (void) cv_reltimedwait(&mpt->m_fw_cv, &mpt->m_mutex, @@ -2004,7 +2011,7 @@ MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_SYSTEM_ADDRESS | - MPI2_SGE_FLAGS_32_BIT_ADDRESSING | + MPI2_SGE_FLAGS_64_BIT_ADDRESSING | MPI2_SGE_FLAGS_IOC_TO_HOST | MPI2_SGE_FLAGS_END_OF_LIST) << MPI2_SGE_FLAGS_SHIFT); @@ -2020,7 +2027,7 @@ MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_SIMPLE_ELEMENT | MPI2_SGE_FLAGS_SYSTEM_ADDRESS | - MPI2_SGE_FLAGS_32_BIT_ADDRESSING | + MPI2_SGE_FLAGS_64_BIT_ADDRESSING | MPI2_SGE_FLAGS_IOC_TO_HOST | MPI2_SGE_FLAGS_END_OF_LIST) << MPI2_SGE_FLAGS_SHIFT); @@ -2064,7 +2071,7 @@ MPI2_CONFIG_EXTPAGETYPE_SAS_IO_UNIT, 0, page_number, ddi_get8(recv_accessp, &configreply->Header.PageVersion), ddi_get16(recv_accessp, &configreply->ExtPageLength), - flags_length, page_cookie.dmac_address)) { + flags_length, page_cookie.dmac_laddress)) { goto cleanup; } @@ -2314,6 +2321,8 @@ bzero(page_memp, sizeof (MPI2_CONFIG_PAGE_MAN_5)); m5 = (pMpi2ManufacturingPage5_t)page_memp; + NDBG20(("mptsas_get_manufacture_page5: paddr 0x%p", + (void *)(uintptr_t)page_cookie.dmac_laddress)); /* * Give reply address to IOC to store config page in and send @@ -2323,7 +2332,7 @@ flagslength = sizeof (MPI2_CONFIG_PAGE_MAN_5); flagslength |= ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_SIMPLE_ELEMENT | - MPI2_SGE_FLAGS_SYSTEM_ADDRESS | MPI2_SGE_FLAGS_32_BIT_ADDRESSING | + MPI2_SGE_FLAGS_SYSTEM_ADDRESS | MPI2_SGE_FLAGS_64_BIT_ADDRESSING | MPI2_SGE_FLAGS_IOC_TO_HOST | MPI2_SGE_FLAGS_END_OF_LIST) << MPI2_SGE_FLAGS_SHIFT); @@ -2332,7 +2341,7 @@ MPI2_CONFIG_PAGETYPE_MANUFACTURING, 0, 5, ddi_get8(recv_accessp, &configreply->Header.PageVersion), ddi_get8(recv_accessp, &configreply->Header.PageLength), - flagslength, page_cookie.dmac_address)) { + flagslength, page_cookie.dmac_laddress)) { rval = DDI_FAILURE; goto done; } @@ -2699,7 +2708,7 @@ flagslength = sizeof (MPI2_CONFIG_PAGE_MAN_0); flagslength |= ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_SIMPLE_ELEMENT | - MPI2_SGE_FLAGS_SYSTEM_ADDRESS | MPI2_SGE_FLAGS_32_BIT_ADDRESSING | + MPI2_SGE_FLAGS_SYSTEM_ADDRESS | MPI2_SGE_FLAGS_64_BIT_ADDRESSING | MPI2_SGE_FLAGS_IOC_TO_HOST | MPI2_SGE_FLAGS_END_OF_LIST) << MPI2_SGE_FLAGS_SHIFT); @@ -2708,7 +2717,7 @@ MPI2_CONFIG_PAGETYPE_MANUFACTURING, 0, 0, ddi_get8(recv_accessp, &configreply->Header.PageVersion), ddi_get8(recv_accessp, &configreply->Header.PageLength), - flagslength, page_cookie.dmac_address)) { + flagslength, page_cookie.dmac_laddress)) { rval = DDI_FAILURE; goto done; } diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_init.c --- a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_init.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_init.c Wed Dec 10 08:46:44 2014 -0800 @@ -656,7 +656,8 @@ * These addresses are set using the DMA cookie addresses from when the * memory was allocated. Sense buffer hi address should be 0. */ - ddi_put32(accessp, &init->SenseBufferAddressHigh, 0); + ddi_put32(accessp, &init->SenseBufferAddressHigh, + (uint32_t)(mpt->m_req_sense_dma_addr >> 32)); ddi_put32(accessp, &init->SystemReplyAddressHigh, (uint32_t)(mpt->m_reply_frame_dma_addr >> 32)); ddi_put32(accessp, &init->SystemRequestFrameBaseAddress.High, diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_raid.c --- a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_raid.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_raid.c Wed Dec 10 08:46:44 2014 -0800 @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2014, Tegile Systems Inc. All rights reserved. */ /* @@ -572,7 +573,8 @@ mptsas_slots_t *slots = mpt->m_active; int config, vol; mptsas_cmd_t *cmd; - uint32_t request_desc_low, reply_addr; + uint32_t reply_addr; + uint64_t request_desc; int cnt; pMpi2ReplyDescriptorsUnion_t reply_desc_union; pMPI2DefaultReply_t reply; @@ -631,9 +633,9 @@ */ (void) ddi_dma_sync(mpt->m_dma_req_frame_hdl, 0, 0, DDI_DMA_SYNC_FORDEV); - request_desc_low = (cmd->cmd_slot << 16) + + request_desc = (cmd->cmd_slot << 16) + MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; - MPTSAS_START_CMD(mpt, request_desc_low, 0); + MPTSAS_START_CMD(mpt, request_desc); /* * Even though reply does not matter because the system is shutting diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/os/exit.c --- a/usr/src/uts/common/os/exit.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/os/exit.c Wed Dec 10 08:46:44 2014 -0800 @@ -21,6 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -388,10 +389,16 @@ if (p->p_pid == z->zone_proc_initpid) { if (z->zone_boot_err == 0 && zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && - zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && - z->zone_restart_init == B_TRUE && - restart_init(what, why) == 0) - return (0); + zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) { + if (z->zone_restart_init == B_TRUE) { + if (restart_init(what, why) == 0) + return (0); + } else { + (void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, + CRED()); + } + } + /* * Since we didn't or couldn't restart init, we clear * the zone's init state and proceed with exit diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/os/fork.c --- a/usr/src/uts/common/os/fork.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/os/fork.c Wed Dec 10 08:46:44 2014 -0800 @@ -21,6 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2013, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -149,6 +150,7 @@ */ if ((flags & ~(FORK_NOSIGCHLD | FORK_WAITPID)) != 0) { error = EINVAL; + atomic_inc_32(&curproc->p_zone->zone_ffmisc); goto forkerr; } @@ -157,11 +159,14 @@ */ if (curthread == p->p_agenttp) { error = ENOTSUP; + atomic_inc_32(&curproc->p_zone->zone_ffmisc); goto forkerr; } - if ((error = secpolicy_basic_fork(CRED())) != 0) + if ((error = secpolicy_basic_fork(CRED())) != 0) { + atomic_inc_32(&p->p_zone->zone_ffmisc); goto forkerr; + } /* * If the calling lwp is doing a fork1() then the @@ -175,6 +180,7 @@ if (!holdlwps(isfork1 ? SHOLDFORK1 : SHOLDFORK)) { aston(curthread); error = EINTR; + atomic_inc_32(&p->p_zone->zone_ffmisc); goto forkerr; } @@ -290,6 +296,7 @@ * map all others to EAGAIN. */ error = (error == ENOMEM) ? ENOMEM : EAGAIN; + atomic_inc_32(&p->p_zone->zone_ffnomem); goto forkerr; } @@ -424,8 +431,10 @@ * fork event (if requested) to whatever contract the child is * a member of. Fails if the parent has been SIGKILLed. */ - if (contract_process_fork(NULL, cp, p, B_TRUE) == NULL) + if (contract_process_fork(NULL, cp, p, B_TRUE) == NULL) { + atomic_inc_32(&p->p_zone->zone_ffmisc); goto forklwperr; + } /* * No fork failures occur beyond this point. @@ -960,6 +969,7 @@ if (rctlfail) { mutex_exit(&zone->zone_nlwps_lock); mutex_exit(&pp->p_lock); + atomic_inc_32(&zone->zone_ffcap); goto punish; } } @@ -1233,6 +1243,7 @@ proj->kpj_nprocs--; zone->zone_nprocs--; mutex_exit(&zone->zone_nlwps_lock); + atomic_inc_32(&zone->zone_ffnoproc); punish: /* diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/os/lwp.c --- a/usr/src/uts/common/os/lwp.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/os/lwp.c Wed Dec 10 08:46:44 2014 -0800 @@ -146,6 +146,7 @@ if (rctlfail) { mutex_exit(&p->p_zone->zone_nlwps_lock); mutex_exit(&p->p_lock); + atomic_inc_32(&p->p_zone->zone_ffcap); return (NULL); } p->p_task->tk_nlwps++; @@ -204,6 +205,7 @@ p->p_zone->zone_nlwps--; mutex_exit(&p->p_zone->zone_nlwps_lock); mutex_exit(&p->p_lock); + atomic_inc_32(&p->p_zone->zone_ffnomem); return (NULL); } } else { @@ -217,6 +219,7 @@ p->p_zone->zone_nlwps--; mutex_exit(&p->p_zone->zone_nlwps_lock); mutex_exit(&p->p_lock); + atomic_inc_32(&p->p_zone->zone_ffnomem); return (NULL); } } @@ -582,10 +585,12 @@ err = CL_FORK(curthread, t, bufp); t->t_cid = cid; } - if (err) + if (err) { + atomic_inc_32(&p->p_zone->zone_ffmisc); goto error; - else + } else { bufp = NULL; + } } /* @@ -612,6 +617,7 @@ * All lwpids are allocated; fail the request. */ err = 1; + atomic_inc_32(&p->p_zone->zone_ffnoproc); goto error; } /* @@ -631,6 +637,7 @@ if (PROC_IS_BRANDED(p)) { if (BROP(p)->b_initlwp(lwp)) { err = 1; + atomic_inc_32(&p->p_zone->zone_ffmisc); goto error; } branded = 1; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/os/zone.c --- a/usr/src/uts/common/os/zone.c Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/os/zone.c Wed Dec 10 08:46:44 2014 -0800 @@ -1845,6 +1845,11 @@ zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1]; zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2]; + zmp->zm_ffcap.value.ui32 = zone->zone_ffcap; + zmp->zm_ffnoproc.value.ui32 = zone->zone_ffnoproc; + zmp->zm_ffnomem.value.ui32 = zone->zone_ffnomem; + zmp->zm_ffmisc.value.ui32 = zone->zone_ffmisc; + return (0); } @@ -1878,6 +1883,12 @@ kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32); kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min", KSTAT_DATA_UINT32); + kstat_named_init(&zmp->zm_ffcap, "forkfail_cap", KSTAT_DATA_UINT32); + kstat_named_init(&zmp->zm_ffnoproc, "forkfail_noproc", + KSTAT_DATA_UINT32); + kstat_named_init(&zmp->zm_ffnomem, "forkfail_nomem", KSTAT_DATA_UINT32); + kstat_named_init(&zmp->zm_ffmisc, "forkfail_misc", KSTAT_DATA_UINT32); + ksp->ks_update = zone_misc_kstat_update; ksp->ks_private = zone; @@ -5514,6 +5525,10 @@ case ZONE_ATTR_INITNAME: err = zone_set_initname(zone, (const char *)buf); break; + case ZONE_ATTR_INITNORESTART: + zone->zone_restart_init = B_FALSE; + err = 0; + break; case ZONE_ATTR_BOOTARGS: err = zone_set_bootargs(zone, (const char *)buf); break; diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h --- a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h Wed Dec 10 08:46:44 2014 -0800 @@ -253,7 +253,7 @@ ddi_dma_handle_t m_dma_hdl; ddi_acc_handle_t m_acc_hdl; caddr_t m_frames_addr; - uint32_t m_phys_addr; + uint64_t m_phys_addr; } mptsas_cache_frames_t; typedef struct mptsas_cmd { @@ -1128,11 +1128,11 @@ (uint32_t *)(mpt->m_devaddr + NREG_DSPS))) -#define MPTSAS_START_CMD(mpt, req_desc_lo, req_desc_hi) \ - ddi_put32(mpt->m_datap, &mpt->m_reg->RequestDescriptorPostLow,\ - req_desc_lo);\ - ddi_put32(mpt->m_datap, &mpt->m_reg->RequestDescriptorPostHigh,\ - req_desc_hi); +#define MPTSAS_START_CMD(mpt, req_desc) \ + ddi_put32(mpt->m_datap, &mpt->m_reg->RequestDescriptorPostLow, \ + req_desc & 0xffffffffu); \ + ddi_put32(mpt->m_datap, &mpt->m_reg->RequestDescriptorPostHigh, \ + (req_desc >> 32) & 0xffffffffu); #define INTPENDING(mpt) \ (MPTSAS_GET_ISTAT(mpt) & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) @@ -1275,14 +1275,6 @@ void mptsas_log(struct mptsas *mpt, int level, char *fmt, ...); int mptsas_poll(mptsas_t *mpt, mptsas_cmd_t *poll_cmd, int polltime); int mptsas_do_dma(mptsas_t *mpt, uint32_t size, int var, int (*callback)()); -int mptsas_send_config_request_msg(mptsas_t *mpt, uint8_t action, - uint8_t pagetype, uint32_t pageaddress, uint8_t pagenumber, - uint8_t pageversion, uint8_t pagelength, uint32_t - SGEflagslength, uint32_t SGEaddress32); -int mptsas_send_extended_config_request_msg(mptsas_t *mpt, uint8_t action, - uint8_t extpagetype, uint32_t pageaddress, uint8_t pagenumber, - uint8_t pageversion, uint16_t extpagelength, - uint32_t SGEflagslength, uint32_t SGEaddress32); int mptsas_update_flash(mptsas_t *mpt, caddr_t ptrbuffer, uint32_t size, uint8_t type, int mode); int mptsas_check_flash(mptsas_t *mpt, caddr_t origfile, uint32_t size, @@ -1314,11 +1306,11 @@ int mptsas_send_config_request_msg(mptsas_t *mpt, uint8_t action, uint8_t pagetype, uint32_t pageaddress, uint8_t pagenumber, uint8_t pageversion, uint8_t pagelength, uint32_t SGEflagslength, - uint32_t SGEaddress32); + uint64_t SGEaddress); int mptsas_send_extended_config_request_msg(mptsas_t *mpt, uint8_t action, uint8_t extpagetype, uint32_t pageaddress, uint8_t pagenumber, uint8_t pageversion, uint16_t extpagelength, - uint32_t SGEflagslength, uint32_t SGEaddress32); + uint32_t SGEflagslength, uint64_t SGEaddress); int mptsas_request_from_pool(mptsas_t *mpt, mptsas_cmd_t **cmd, struct scsi_pkt **pkt); diff -r 61550c9ec412 -r 014608f1fae0 usr/src/uts/common/sys/zone.h --- a/usr/src/uts/common/sys/zone.h Mon Dec 08 06:19:08 2014 -0800 +++ b/usr/src/uts/common/sys/zone.h Wed Dec 10 08:46:44 2014 -0800 @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2013, Joyent, Inc. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright 2014 Igor Kozhukhov . */ @@ -102,6 +103,7 @@ #define ZONE_ATTR_HOSTID 15 #define ZONE_ATTR_FS_ALLOWED 16 #define ZONE_ATTR_NETWORK 17 +#define ZONE_ATTR_INITNORESTART 20 /* Start of the brand-specific attribute namespace */ #define ZONE_ATTR_BRAND_ATTRS 32768 @@ -393,6 +395,10 @@ kstat_named_t zm_fss_shr_pct; kstat_named_t zm_fss_pri_hi; kstat_named_t zm_fss_pri_avg; + kstat_named_t zm_ffcap; + kstat_named_t zm_ffnoproc; + kstat_named_t zm_ffnomem; + kstat_named_t zm_ffmisc; } zone_misc_kstat_t; typedef struct zone { @@ -574,6 +580,11 @@ uint64_t zone_stime; /* total system time */ uint64_t zone_utime; /* total user time */ uint64_t zone_wtime; /* total time waiting in runq */ + /* fork-fail kstat tracking */ + uint32_t zone_ffcap; /* hit an rctl cap */ + uint32_t zone_ffnoproc; /* get proc/lwp error */ + uint32_t zone_ffnomem; /* as_dup/memory error */ + uint32_t zone_ffmisc; /* misc. other error */ struct loadavg_s zone_loadavg; /* loadavg for this zone */ uint64_t zone_hp_avenrun[3]; /* high-precision avenrun */