Mercurial > illumos > illumos-gate
changeset 11149:8bad7424e2c2
6903731 need userland dedup stats
author | George Wilson <George.Wilson@Sun.COM> |
---|---|
date | Sun, 22 Nov 2009 11:04:51 -0800 |
parents | 68adfb531269 |
children | 3b3a70d343cc |
files | usr/src/cmd/zdb/zdb.c usr/src/cmd/zpool/zpool_main.c usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h usr/src/lib/libzfs/common/libzfs.h usr/src/lib/libzfs/common/libzfs_status.c usr/src/lib/libzfs/common/mapfile-vers usr/src/uts/common/fs/zfs/ddt.c usr/src/uts/common/fs/zfs/spa.c usr/src/uts/common/fs/zfs/spa_config.c usr/src/uts/common/fs/zfs/sys/ddt.h usr/src/uts/common/sys/fs/zfs.h |
diffstat | 11 files changed, 219 insertions(+), 99 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/zdb/zdb.c Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/cmd/zdb/zdb.c Sun Nov 22 11:04:51 2009 -0800 @@ -585,70 +585,6 @@ } static void -dump_ddt_stat(const ddt_stat_t *dds, int h) -{ - char refcnt[6]; - char blocks[6], lsize[6], psize[6], dsize[6]; - char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; - - if (dds->dds_blocks == 0) - return; - - if (h == -1) - (void) strcpy(refcnt, "Total"); - else - nicenum(1ULL << h, refcnt); - - nicenum(dds->dds_blocks, blocks); - nicenum(dds->dds_lsize, lsize); - nicenum(dds->dds_psize, psize); - nicenum(dds->dds_dsize, dsize); - nicenum(dds->dds_ref_blocks, ref_blocks); - nicenum(dds->dds_ref_lsize, ref_lsize); - nicenum(dds->dds_ref_psize, ref_psize); - nicenum(dds->dds_ref_dsize, ref_dsize); - - (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", - refcnt, - blocks, lsize, psize, dsize, - ref_blocks, ref_lsize, ref_psize, ref_dsize); -} - -static void -dump_ddt_histogram(const ddt_histogram_t *ddh) -{ - ddt_stat_t dds_total = { 0 }; - - ddt_histogram_stat(&dds_total, ddh); - - (void) printf("\n"); - - (void) printf("bucket " - " allocated " - " referenced \n"); - (void) printf("______ " - "______________________________ " - "______________________________\n"); - - (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", - "refcnt", - "blocks", "LSIZE", "PSIZE", "DSIZE", - "blocks", "LSIZE", "PSIZE", "DSIZE"); - - (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", - "------", - "------", "-----", "-----", "-----", - "------", "-----", "-----", "-----"); - - for (int h = 0; h < 64; h++) - dump_ddt_stat(&ddh->ddh_stat[h], h); - - dump_ddt_stat(&dds_total, -1); - - (void) printf("\n"); -} - -static void dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) { char name[DDT_NAMELEN]; @@ -681,7 +617,7 @@ if (dump_opt['D'] < 3) return; - dump_ddt_histogram(&ddt->ddt_histogram[type][class]); + zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]); if (dump_opt['D'] < 4) return; @@ -710,14 +646,12 @@ for (enum ddt_type type = 0; type < DDT_TYPES; type++) { for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { - ddt_histogram_add(&ddh_total, - &ddt->ddt_histogram[type][class]); dump_ddt(ddt, type, class); } } } - ddt_histogram_stat(&dds_total, &ddh_total); + ddt_get_dedup_stats(spa, &dds_total); if (dds_total.dds_blocks == 0) { (void) printf("All DDTs are empty\n"); @@ -728,7 +662,8 @@ if (dump_opt['D'] > 1) { (void) printf("DDT histogram (aggregated over all DDTs):\n"); - dump_ddt_histogram(&ddh_total); + ddt_get_dedup_histogram(spa, &ddh_total); + zpool_dump_ddt(&dds_total, &ddh_total); } dump_dedup_ratio(&dds_total); @@ -2245,7 +2180,7 @@ (void) printf("Simulated DDT histogram:\n"); - dump_ddt_histogram(&ddh_total); + zpool_dump_ddt(&dds_total, &ddh_total); dump_dedup_ratio(&dds_total); }
--- a/usr/src/cmd/zpool/zpool_main.c Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/cmd/zpool/zpool_main.c Sun Nov 22 11:04:51 2009 -0800 @@ -2984,6 +2984,7 @@ boolean_t cb_verbose; boolean_t cb_explain; boolean_t cb_first; + boolean_t cb_dedup_stats; } status_cbdata_t; /* @@ -3123,6 +3124,36 @@ } } +static void +print_dedup_stats(nvlist_t *config) +{ + ddt_histogram_t *ddh; + ddt_stat_t *dds; + ddt_object_t *ddo; + uint_t c; + + /* + * If the pool was faulted then we may not have been able to + * obtain the config. Otherwise, if have anything in the dedup + * table continue processing the stats. + */ + if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, + (uint64_t **)&ddo, &c) != 0 || ddo->ddo_count == 0) + return; + + (void) printf("\n"); + (void) printf("DDT entries %llu, size %llu on disk, %llu in core\n", + (u_longlong_t)ddo->ddo_count, + (u_longlong_t)ddo->ddo_dspace, + (u_longlong_t)ddo->ddo_mspace); + + verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, + (uint64_t **)&dds, &c) == 0); + verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, + (uint64_t **)&ddh, &c) == 0); + zpool_dump_ddt(dds, ddh); +} + /* * Display a summary of pool status. Displays a summary such as: * @@ -3405,6 +3436,9 @@ else print_error_log(zhp); } + + if (cbp->cb_dedup_stats) + print_dedup_stats(config); } else { (void) printf(gettext("config: The configuration cannot be " "determined.\n")); @@ -3418,6 +3452,7 @@ * * -v Display complete error logs * -x Display only pools with potential problems + * -D Display dedup status (undocumented) * * Describes the health status of all pools or some subset. */ @@ -3429,7 +3464,7 @@ status_cbdata_t cb = { 0 }; /* check options */ - while ((c = getopt(argc, argv, "vx")) != -1) { + while ((c = getopt(argc, argv, "vxD")) != -1) { switch (c) { case 'v': cb.cb_verbose = B_TRUE; @@ -3437,6 +3472,9 @@ case 'x': cb.cb_explain = B_TRUE; break; + case 'D': + cb.cb_dedup_stats = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt);
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h Sun Nov 22 11:04:51 2009 -0800 @@ -64,6 +64,9 @@ #define ZPOOL_CONFIG_HOLE_ARRAY "hole_array" #define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children" #define ZPOOL_CONFIG_IS_HOLE "is_hole" +#define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram" +#define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats" +#define ZPOOL_CONFIG_DDT_STATS "ddt_stats" /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such
--- a/usr/src/lib/libzfs/common/libzfs.h Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/lib/libzfs/common/libzfs.h Sun Nov 22 11:04:51 2009 -0800 @@ -299,6 +299,7 @@ extern zpool_status_t zpool_get_status(zpool_handle_t *, char **); extern zpool_status_t zpool_import_status(nvlist_t *, char **); +extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh); /* * Statistics and configuration functions.
--- a/usr/src/lib/libzfs/common/libzfs_status.c Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/lib/libzfs/common/libzfs_status.c Sun Nov 22 11:04:51 2009 -0800 @@ -328,3 +328,68 @@ return (ret); } + +static void +dump_ddt_stat(const ddt_stat_t *dds, int h) +{ + char refcnt[6]; + char blocks[6], lsize[6], psize[6], dsize[6]; + char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6]; + + if (dds == NULL || dds->dds_blocks == 0) + return; + + if (h == -1) + (void) strcpy(refcnt, "Total"); + else + zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt)); + + zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks)); + zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize)); + zfs_nicenum(dds->dds_psize, psize, sizeof (psize)); + zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize)); + zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks)); + zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize)); + zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize)); + zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize)); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + refcnt, + blocks, lsize, psize, dsize, + ref_blocks, ref_lsize, ref_psize, ref_dsize); +} + +/* + * Print the DDT histogram and the column totals. + */ +void +zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh) +{ + int h; + + (void) printf("\n"); + + (void) printf("bucket " + " allocated " + " referenced \n"); + (void) printf("______ " + "______________________________ " + "______________________________\n"); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + "refcnt", + "blocks", "LSIZE", "PSIZE", "DSIZE", + "blocks", "LSIZE", "PSIZE", "DSIZE"); + + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n", + "------", + "------", "-----", "-----", "-----", + "------", "-----", "-----", "-----"); + + for (h = 0; h < 64; h++) + dump_ddt_stat(&ddh->ddh_stat[h], h); + + dump_ddt_stat(dds_total, -1); + + (void) printf("\n"); +}
--- a/usr/src/lib/libzfs/common/mapfile-vers Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/lib/libzfs/common/mapfile-vers Sun Nov 22 11:04:51 2009 -0800 @@ -157,6 +157,7 @@ zpool_create; zpool_destroy; zpool_disable_datasets; + zpool_dump_ddt; zpool_enable_datasets; zpool_expand_proplist; zpool_explain_recover;
--- a/usr/src/uts/common/fs/zfs/ddt.c Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/uts/common/fs/zfs/ddt.c Sun Nov 22 11:04:51 2009 -0800 @@ -399,23 +399,58 @@ return (B_TRUE); } -static void -ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) +void +ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo) { - ddt_histogram_t ddh_total = { 0 }; + dmu_object_info_t doi; + uint64_t count; + int error; for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { ddt_t *ddt = spa->spa_ddt[c]; for (enum ddt_type type = 0; type < DDT_TYPES; type++) { for (enum ddt_class class = 0; class < DDT_CLASSES; class++) { - ddt_histogram_add(&ddh_total, + error = ddt_object_info(ddt, type, class, &doi); + if (error == ENOENT) + continue; + ASSERT3U(error, ==, 0); + + count = ddt_object_count(ddt, type, class); + ddo->ddo_count += count; + ddo->ddo_dspace += + (doi.doi_physical_blocks_512 << 9) / count; + ddo->ddo_mspace += doi.doi_fill_count * + doi.doi_data_block_size / count; + } + } + } +} + +void +ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh) +{ + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { + ddt_t *ddt = spa->spa_ddt[c]; + for (enum ddt_type type = 0; type < DDT_TYPES; type++) { + for (enum ddt_class class = 0; class < DDT_CLASSES; + class++) { + ddt_histogram_add(ddh, &ddt->ddt_histogram[type][class]); } } } +} - ddt_histogram_stat(dds_total, &ddh_total); +void +ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) +{ + ddt_histogram_t *ddh_total; + + ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); + ddt_get_dedup_histogram(spa, ddh_total); + ddt_histogram_stat(dds_total, ddh_total); + kmem_free(ddh_total, sizeof (ddt_histogram_t)); } uint64_t
--- a/usr/src/uts/common/fs/zfs/spa.c Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/uts/common/fs/zfs/spa.c Sun Nov 22 11:04:51 2009 -0800 @@ -1803,7 +1803,8 @@ spa->spa_minref = refcount_count(&spa->spa_refcount); if (error && error != EBADF) zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); - spa->spa_load_state = SPA_LOAD_NONE; + + spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; spa->spa_ena = 0; return (error);
--- a/usr/src/uts/common/fs/zfs/spa_config.c Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/uts/common/fs/zfs/spa_config.c Sun Nov 22 11:04:51 2009 -0800 @@ -404,6 +404,33 @@ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); + if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { + ddt_histogram_t *ddh; + ddt_stat_t *dds; + ddt_object_t *ddo; + + ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); + ddt_get_dedup_histogram(spa, ddh); + VERIFY(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_DDT_HISTOGRAM, + (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0); + kmem_free(ddh, sizeof (ddt_histogram_t)); + + ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); + ddt_get_dedup_object_stats(spa, ddo); + VERIFY(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_DDT_OBJ_STATS, + (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0); + kmem_free(ddo, sizeof (ddt_object_t)); + + dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); + ddt_get_dedup_stats(spa, dds); + VERIFY(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_DDT_STATS, + (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0); + kmem_free(dds, sizeof (ddt_stat_t)); + } + spa_rewind_data_to_nvlist(spa, config); if (locked)
--- a/usr/src/uts/common/fs/zfs/sys/ddt.h Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/uts/common/fs/zfs/sys/ddt.h Sun Nov 22 11:04:51 2009 -0800 @@ -60,24 +60,6 @@ #define DDT_COMPRESS_FUNCTION_MASK 0x7f /* - * DDT statistics. - */ -typedef struct ddt_stat { - uint64_t dds_blocks; /* blocks */ - uint64_t dds_lsize; /* logical size */ - uint64_t dds_psize; /* physical size */ - uint64_t dds_dsize; /* deflated allocated size */ - uint64_t dds_ref_blocks; /* referenced blocks */ - uint64_t dds_ref_lsize; /* referenced lsize * refcnt */ - uint64_t dds_ref_psize; /* referenced psize * refcnt */ - uint64_t dds_ref_dsize; /* referenced dsize * refcnt */ -} ddt_stat_t; - -typedef struct ddt_histogram { - ddt_stat_t ddh_stat[64]; /* power-of-two histogram buckets */ -} ddt_histogram_t; - -/* * On-disk ddt entry: key (name) and physical storage (value). */ typedef struct ddt_key { @@ -215,6 +197,9 @@ extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src); extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh); extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh); +extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo); +extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh); +extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total); extern uint64_t ddt_get_dedup_dspace(spa_t *spa); extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
--- a/usr/src/uts/common/sys/fs/zfs.h Sun Nov 22 01:09:09 2009 -0800 +++ b/usr/src/uts/common/sys/fs/zfs.h Sun Nov 22 11:04:51 2009 -0800 @@ -451,6 +451,9 @@ #define ZPOOL_CONFIG_HOLE_ARRAY "hole_array" #define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children" #define ZPOOL_CONFIG_IS_HOLE "is_hole" +#define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram" +#define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats" +#define ZPOOL_CONFIG_DDT_STATS "ddt_stats" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ @@ -606,6 +609,31 @@ uint64_t vs_scrub_end; /* UTC scrub end time */ } vdev_stat_t; +/* + * DDT statistics. Note: all fields should be 64-bit because this + * is passed between kernel and userland as an nvlist uint64 array. + */ +typedef struct ddt_object { + uint64_t ddo_count; /* number of elments in ddt */ + uint64_t ddo_dspace; /* size of ddt on disk */ + uint64_t ddo_mspace; /* size of ddt in-core */ +} ddt_object_t; + +typedef struct ddt_stat { + uint64_t dds_blocks; /* blocks */ + uint64_t dds_lsize; /* logical size */ + uint64_t dds_psize; /* physical size */ + uint64_t dds_dsize; /* deflated allocated size */ + uint64_t dds_ref_blocks; /* referenced blocks */ + uint64_t dds_ref_lsize; /* referenced lsize * refcnt */ + uint64_t dds_ref_psize; /* referenced psize * refcnt */ + uint64_t dds_ref_dsize; /* referenced dsize * refcnt */ +} ddt_stat_t; + +typedef struct ddt_histogram { + ddt_stat_t ddh_stat[64]; /* power-of-two histogram buckets */ +} ddt_histogram_t; + #define ZVOL_DRIVER "zvol" #define ZFS_DRIVER "zfs" #define ZFS_DEV "/dev/zfs" @@ -686,11 +714,12 @@ * Internal SPA load state. Used by FMA diagnosis engine. */ typedef enum { - SPA_LOAD_NONE, /* no load in progress */ - SPA_LOAD_OPEN, /* normal open */ - SPA_LOAD_IMPORT, /* import in progress */ + SPA_LOAD_NONE, /* no load in progress */ + SPA_LOAD_OPEN, /* normal open */ + SPA_LOAD_IMPORT, /* import in progress */ SPA_LOAD_TRYIMPORT, /* tryimport in progress */ - SPA_LOAD_RECOVER /* recovery requested */ + SPA_LOAD_RECOVER, /* recovery requested */ + SPA_LOAD_ERROR /* load failed */ } spa_load_state_t; /*