Mercurial > illumos > illumos-gate
changeset 10407:34e10c4af053
6875779 zfs user accounting callbacks can be simplified
6771468 ::blkptr prints incorrectly on 32-bit
6832861 zcmd_alloc_dst_nvlist's default size is too small
6876808 want ::refcount to print refcount_t details
author | Matthew Ahrens <Matthew.Ahrens@Sun.COM> |
---|---|
date | Fri, 28 Aug 2009 13:57:58 -0700 |
parents | fd30909fc9f8 |
children | 64c355043d3b |
files | usr/src/cmd/mdb/common/modules/zfs/zfs.c usr/src/lib/libzfs/common/libzfs_util.c usr/src/uts/common/fs/zfs/arc.c usr/src/uts/common/fs/zfs/dmu_objset.c usr/src/uts/common/fs/zfs/sys/arc.h usr/src/uts/common/fs/zfs/sys/dmu.h usr/src/uts/common/fs/zfs/sys/zap.h usr/src/uts/common/fs/zfs/zap.c usr/src/uts/common/fs/zfs/zfs_vfsops.c |
diffstat | 9 files changed, 180 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c Fri Aug 28 13:57:58 2009 -0700 @@ -36,6 +36,7 @@ #include <sys/spa_impl.h> #include <sys/vdev_impl.h> #include <sys/zio_compress.h> +#include <ctype.h> #ifndef _KERNEL #include "../genunix/list.h" @@ -450,7 +451,7 @@ DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva)); mdb_printf("DVA[%d]: GANG: %-5s GRID: %04x\t" "ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE", - DVA_GET_GRID(dva), DVA_GET_ASIZE(dva)); + (int)DVA_GET_GRID(dva), DVA_GET_ASIZE(dva)); mdb_printf("DVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", i, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp), BP_SHOULD_BYTESWAP(&bp) ? "e" : "", @@ -464,7 +465,7 @@ BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG", doti[BP_GET_TYPE(&bp)].ot_name); mdb_printf("BIRTH: %-16llx LEVEL: %-2d\tFILL: %llx\n", - bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill); + bp.blk_birth, (int)BP_GET_LEVEL(&bp), bp.blk_fill); mdb_printf("CKFUNC: %-16s\t\tCOMP: %s\n", zci[BP_GET_CHECKSUM(&bp)].ci_name, zct[BP_GET_COMPRESS(&bp)].ci_name); @@ -2145,6 +2146,114 @@ return (DCMD_OK); } +/* ARGSUSED */ +static int +reference_cb(uintptr_t addr, const void *ignored, void *arg) +{ + static int gotid; + static mdb_ctf_id_t ref_id; + uintptr_t ref_holder; + uintptr_t ref_removed; + uint64_t ref_number; + boolean_t holder_is_str; + char holder_str[128]; + boolean_t removed = (boolean_t)arg; + + if (!gotid) { + if (mdb_ctf_lookup_by_name("struct reference", &ref_id) == -1) { + mdb_warn("couldn't find struct reference"); + return (WALK_ERR); + } + gotid = TRUE; + } + + if (GETMEMBID(addr, &ref_id, ref_holder, ref_holder) || + GETMEMBID(addr, &ref_id, ref_removed, ref_removed) || + GETMEMBID(addr, &ref_id, ref_number, ref_number)) + return (WALK_ERR); + + if (mdb_readstr(holder_str, sizeof (holder_str), ref_holder) != -1) { + char *cp; + holder_is_str = B_TRUE; + for (cp = holder_str; *cp; cp++) { + if (!isprint(*cp)) { + holder_is_str = B_FALSE; + break; + } + } + } else { + holder_is_str = B_FALSE; + } + + if (removed) + mdb_printf("removed "); + mdb_printf("reference "); + if (ref_number != 1) + mdb_printf("with count=%llu ", ref_number); + mdb_printf("with tag %p", (void*)ref_holder); + if (holder_is_str) + mdb_printf(" \"%s\"", holder_str); + mdb_printf(", held at:\n"); + + (void) mdb_call_dcmd("whatis", addr, DCMD_ADDRSPEC, 0, NULL); + + if (removed) { + mdb_printf("removed at:\n"); + (void) mdb_call_dcmd("whatis", ref_removed, + DCMD_ADDRSPEC, 0, NULL); + } + + mdb_printf("\n"); + + return (WALK_NEXT); +} + +/* ARGSUSED */ +static int +refcount(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + uint64_t rc_count, rc_removed_count; + uintptr_t rc_list, rc_removed; + static int gotid; + static mdb_ctf_id_t rc_id; + ulong_t off; + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (!gotid) { + if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) { + mdb_warn("couldn't find struct refcount"); + return (DCMD_ERR); + } + gotid = TRUE; + } + + if (GETMEMBID(addr, &rc_id, rc_count, rc_count) || + GETMEMBID(addr, &rc_id, rc_removed_count, rc_removed_count)) + return (DCMD_ERR); + + mdb_printf("refcount_t at %p has %llu current holds, " + "%llu recently released holds\n", + addr, (longlong_t)rc_count, (longlong_t)rc_removed_count); + + if (rc_count > 0) + mdb_printf("current holds:\n"); + if (mdb_ctf_offsetof(rc_id, "rc_list", &off) == -1) + return (DCMD_ERR); + rc_list = addr + off/NBBY; + mdb_pwalk("list", reference_cb, (void*)B_FALSE, rc_list); + + if (rc_removed_count > 0) + mdb_printf("released holds:\n"); + if (mdb_ctf_offsetof(rc_id, "rc_removed", &off) == -1) + return (DCMD_ERR); + rc_removed = addr + off/NBBY; + mdb_pwalk("list", reference_cb, (void*)B_TRUE, rc_removed); + + return (DCMD_OK); +} + /* * MDB module linkage information: * @@ -2186,6 +2295,7 @@ "given a spa_t, print block type stats from last scrub", zfs_blkstats }, { "zfs_params", "", "print zfs tunable parameters", zfs_params }, + { "refcount", "", "print refcount_t holders", refcount }, { NULL } };
--- a/usr/src/lib/libzfs/common/libzfs_util.c Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/lib/libzfs/common/libzfs_util.c Fri Aug 28 13:57:58 2009 -0700 @@ -688,7 +688,7 @@ zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len) { if (len == 0) - len = 2048; + len = 4*1024; zc->zc_nvlist_dst_size = len; if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t) zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL)
--- a/usr/src/uts/common/fs/zfs/arc.c Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/arc.c Fri Aug 28 13:57:58 2009 -0700 @@ -2842,41 +2842,6 @@ return (0); } -/* - * arc_read() variant to support pool traversal. If the block is already - * in the ARC, make a copy of it; otherwise, the caller will do the I/O. - * The idea is that we don't want pool traversal filling up memory, but - * if the ARC already has the data anyway, we shouldn't pay for the I/O. - */ -int -arc_tryread(spa_t *spa, blkptr_t *bp, void *data) -{ - arc_buf_hdr_t *hdr; - kmutex_t *hash_mtx; - uint64_t guid = spa_guid(spa); - int rc = 0; - - hdr = buf_hash_find(guid, BP_IDENTITY(bp), bp->blk_birth, &hash_mtx); - - if (hdr && hdr->b_datacnt > 0 && !HDR_IO_IN_PROGRESS(hdr)) { - arc_buf_t *buf = hdr->b_buf; - - ASSERT(buf); - while (buf->b_data == NULL) { - buf = buf->b_next; - ASSERT(buf); - } - bcopy(buf->b_data, data, hdr->b_size); - } else { - rc = ENOENT; - } - - if (hash_mtx) - mutex_exit(hash_mtx); - - return (rc); -} - void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private) {
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c Fri Aug 28 13:57:58 2009 -0700 @@ -1017,18 +1017,39 @@ os->os_userused_dnode); } +static void +do_userquota_callback(objset_t *os, dnode_phys_t *dnp, + boolean_t subtract, dmu_tx_t *tx) +{ + static const char zerobuf[DN_MAX_BONUSLEN] = {0}; + uint64_t user, group; + + ASSERT(dnp->dn_type != 0 || + (bcmp(DN_BONUS(dnp), zerobuf, DN_MAX_BONUSLEN) == 0 && + DN_USED_BYTES(dnp) == 0)); + + if ((dnp->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) && + 0 == used_cbs[os->os_phys->os_type](dnp->dn_bonustype, + DN_BONUS(dnp), &user, &group)) { + int64_t delta = DNODE_SIZE + DN_USED_BYTES(dnp); + if (subtract) + delta = -delta; + VERIFY(0 == zap_increment_int(os, DMU_USERUSED_OBJECT, + user, delta, tx)); + VERIFY(0 == zap_increment_int(os, DMU_GROUPUSED_OBJECT, + group, delta, tx)); + } +} + void dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx) { dnode_t *dn; list_t *list = &os->os_synced_dnodes; - static const char zerobuf[DN_MAX_BONUSLEN] = {0}; ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); while (dn = list_head(list)) { - dmu_object_type_t bonustype; - ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); ASSERT(dn->dn_oldphys); ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || @@ -1046,31 +1067,14 @@ } /* - * If the object was not previously - * accounted, pretend that it was free. - */ - if (!(dn->dn_oldphys->dn_flags & - DNODE_FLAG_USERUSED_ACCOUNTED)) { - bzero(dn->dn_oldphys, sizeof (dnode_phys_t)); - } - - /* - * If the object was freed, use the previous bonustype. + * We intentionally modify the zap object even if the + * net delta (due to phys-oldphys) is zero. Otherwise + * the block of the zap obj could be shared between + * datasets but need to be different between them after + * a bprewrite. */ - bonustype = dn->dn_phys->dn_bonustype ? - dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype; - ASSERT(dn->dn_phys->dn_type != 0 || - (bcmp(DN_BONUS(dn->dn_phys), zerobuf, - DN_MAX_BONUSLEN) == 0 && - DN_USED_BYTES(dn->dn_phys) == 0)); - ASSERT(dn->dn_oldphys->dn_type != 0 || - (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf, - DN_MAX_BONUSLEN) == 0 && - DN_USED_BYTES(dn->dn_oldphys) == 0)); - used_cbs[os->os_phys->os_type](os, bonustype, - DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys), - DN_USED_BYTES(dn->dn_oldphys), - DN_USED_BYTES(dn->dn_phys), tx); + do_userquota_callback(os, dn->dn_oldphys, B_TRUE, tx); + do_userquota_callback(os, dn->dn_phys, B_FALSE, tx); /* * The mutex is needed here for interlock with dnode_allocate.
--- a/usr/src/uts/common/fs/zfs/sys/arc.h Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/arc.h Fri Aug 28 13:57:58 2009 -0700 @@ -120,7 +120,6 @@ int zio_flags, const zbookmark_t *zb); int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zio_done_func_t *done, void *private, uint32_t arc_flags); -int arc_tryread(spa_t *spa, blkptr_t *bp, void *data); void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private); int arc_buf_evict(arc_buf_t *buf);
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h Fri Aug 28 13:57:58 2009 -0700 @@ -581,9 +581,8 @@ extern int dmu_dir_list_next(objset_t *os, int namelen, char *name, uint64_t *idp, uint64_t *offp); -typedef void objset_used_cb_t(objset_t *os, dmu_object_type_t bonustype, - void *oldbonus, void *newbonus, uint64_t oldused, uint64_t newused, - dmu_tx_t *tx); +typedef int objset_used_cb_t(dmu_object_type_t bonustype, + void *bonus, uint64_t *userp, uint64_t *groupp); extern void dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb); extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
--- a/usr/src/uts/common/fs/zfs/sys/zap.h Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/zap.h Fri Aug 28 13:57:58 2009 -0700 @@ -255,6 +255,8 @@ int zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx); int zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx); int zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value); +int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta, + dmu_tx_t *tx); struct zap; struct zap_leaf;
--- a/usr/src/uts/common/fs/zfs/zap.c Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/zap.c Fri Aug 28 13:57:58 2009 -0700 @@ -978,6 +978,30 @@ return (zap_lookup(os, obj, name, 8, 1, &value)); } +int +zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta, + dmu_tx_t *tx) +{ + char name[20]; + uint64_t value = 0; + int err; + + if (delta == 0) + return (0); + + (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key); + err = zap_lookup(os, obj, name, 8, 1, &value); + if (err != 0 && err != ENOENT) + return (err); + value += delta; + if (value == 0) + err = zap_remove(os, obj, name, tx); + else + err = zap_update(os, obj, name, 8, 1, &value, tx); + return (err); +} + + /* * Routines for iterating over the attributes. */
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c Fri Aug 28 11:22:11 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c Fri Aug 28 13:57:58 2009 -0700 @@ -594,36 +594,18 @@ ASSERT(err == 0); } -static void -zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype, - void *oldbonus, void *newbonus, - uint64_t oldused, uint64_t newused, dmu_tx_t *tx) +static int +zfs_space_delta_cb(dmu_object_type_t bonustype, void *bonus, + uint64_t *userp, uint64_t *groupp) { - znode_phys_t *oldznp = oldbonus; - znode_phys_t *newznp = newbonus; + znode_phys_t *znp = bonus; if (bonustype != DMU_OT_ZNODE) - return; - - /* We charge 512 for the dnode (if it's allocated). */ - if (oldznp->zp_gen != 0) - oldused += DNODE_SIZE; - if (newznp->zp_gen != 0) - newused += DNODE_SIZE; + return (ENOENT); - if (oldznp->zp_uid == newznp->zp_uid) { - uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx); - } else { - uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx); - uidacct(os, B_FALSE, newznp->zp_uid, newused, tx); - } - - if (oldznp->zp_gid == newznp->zp_gid) { - uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx); - } else { - uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx); - uidacct(os, B_TRUE, newznp->zp_gid, newused, tx); - } + *userp = znp->zp_uid; + *groupp = znp->zp_gid; + return (0); } static void