Mercurial > illumos > illumos-gate
changeset 10602:21d184667b7e
6399128 want tool to examine backup files (zstreamdump)
author | Lori Alt <Lori.Alt@Sun.COM> |
---|---|
date | Mon, 21 Sep 2009 21:41:02 -0600 |
parents | 531239fefdcd |
children | f9779f6db716 |
files | usr/src/cmd/Makefile usr/src/cmd/zstreamdump/Makefile usr/src/cmd/zstreamdump/zstreamdump.c usr/src/common/zfs/zfs_fletcher.c usr/src/common/zfs/zfs_fletcher.h usr/src/lib/libzfs/Makefile.com usr/src/lib/libzfs/common/libzfs_sendrecv.c usr/src/lib/libzfs/common/llib-lzfs usr/src/lib/libzfs/common/mapfile-vers usr/src/pkgdefs/SUNWzfsu/prototype_com usr/src/uts/common/Makefile.files usr/src/uts/common/fs/zfs/fletcher.c usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h |
diffstat | 13 files changed, 742 insertions(+), 253 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/Makefile Mon Sep 21 21:03:13 2009 -0600 +++ b/usr/src/cmd/Makefile Mon Sep 21 21:41:02 2009 -0600 @@ -461,6 +461,7 @@ zonename \ zpool \ zlook \ + zstreamdump \ ztest $(CLOSED_BUILD)COMMON_SUBDIRS += \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/zstreamdump/Makefile Mon Sep 21 21:41:02 2009 -0600 @@ -0,0 +1,67 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +PROG= zstreamdump +OBJS= zstreamdump.o +SRCS= $(OBJS:%.o=%.c) +POFILE= zstreamdump.po + +include ../Makefile.cmd + +INCS += -I../../uts/common/fs/zfs +INCS += -I../../common/zfs + +LDLIBS += -lzfs -lnvpair + +C99MODE= -xc99=%all +C99LMODE= -Xc99=%all + +CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(INCS) +$(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG + +# lint complains about unused _umem_* functions +LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2 +LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 + +.KEEP_STATE: + +.PARALLEL: + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(POST_PROCESS) + +install: all $(ROOTUSRSBINPROG) + +clean: + $(RM) $(OBJS) + +lint: lint_SRCS + +FRC: + +include ../Makefile.targ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/zstreamdump/zstreamdump.c Mon Sep 21 21:41:02 2009 -0600 @@ -0,0 +1,363 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <libnvpair.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> + +#include <sys/dmu.h> +#include <sys/zfs_ioctl.h> +#include <zfs_fletcher.h> + +uint64_t drr_record_count[DRR_NUMTYPES]; +uint64_t total_write_size = 0; +uint64_t total_stream_len = 0; +FILE *send_stream = 0; +boolean_t do_byteswap = B_FALSE; +boolean_t do_cksum = B_TRUE; +#define INITIAL_BUFLEN (1<<20) + +static void +usage(void) +{ + (void) fprintf(stderr, "usage: zstreamdump [-v] [-C] < file\n"); + (void) fprintf(stderr, "\t -v -- verbose\n"); + (void) fprintf(stderr, "\t -C -- suppress checksum verification\n"); + exit(1); +} + +/* + * ssread - send stream read. + * + * Read while computing incremental checksum + */ + +static size_t +ssread(void *buf, size_t len, zio_cksum_t *cksum) +{ + size_t outlen; + + if ((outlen = fread(buf, len, 1, send_stream)) == 0) + return (0); + + if (do_cksum && cksum) { + if (do_byteswap) + fletcher_4_incremental_byteswap(buf, len, cksum); + else + fletcher_4_incremental_native(buf, len, cksum); + } + total_stream_len += len; + return (outlen); +} + +int +main(int argc, char *argv[]) +{ + char *buf = malloc(INITIAL_BUFLEN); + dmu_replay_record_t thedrr; + dmu_replay_record_t *drr = &thedrr; + struct drr_begin *drrb = &thedrr.drr_u.drr_begin; + struct drr_end *drre = &thedrr.drr_u.drr_end; + struct drr_object *drro = &thedrr.drr_u.drr_object; + struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects; + struct drr_write *drrw = &thedrr.drr_u.drr_write; + struct drr_free *drrf = &thedrr.drr_u.drr_free; + char c; + boolean_t verbose = B_FALSE; + boolean_t first = B_TRUE; + int i, err; + zio_cksum_t zc = { 0 }; + zio_cksum_t pcksum = { 0 }; + + while ((c = getopt(argc, argv, ":vC")) != -1) { + switch (c) { + case 'C': + do_cksum = B_FALSE; + break; + case 'v': + verbose = B_TRUE; + break; + case ':': + (void) fprintf(stderr, + "missing argument for '%c' option\n", optopt); + usage(); + break; + case '?': + (void) fprintf(stderr, "invalid option '%c'\n", + optopt); + usage(); + } + } + + if (isatty(STDIN_FILENO)) { + (void) fprintf(stderr, + "Error: Backup stream can not be read " + "from a terminal.\n" + "You must redirect standard input.\n"); + exit(1); + } + + send_stream = stdin; + pcksum = zc; + while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) { + + if (first) { + if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { + do_byteswap = B_TRUE; + if (do_cksum) { + ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); + /* + * recalculate header checksum now + * that we know it needs to be + * byteswapped. + */ + fletcher_4_incremental_byteswap(drr, + sizeof (dmu_replay_record_t), &zc); + } + } else if (drrb->drr_magic != DMU_BACKUP_MAGIC) { + (void) fprintf(stderr, "Invalid stream " + "(bad magic number)\n"); + exit(1); + } + first = B_FALSE; + } + if (do_byteswap) { + drr->drr_type = BSWAP_32(drr->drr_type); + drr->drr_payloadlen = + BSWAP_32(drr->drr_payloadlen); + } + + /* + * At this point, the leading fields of the replay record + * (drr_type and drr_payloadlen) have been byte-swapped if + * necessary, but the rest of the data structure (the + * union of type-specific structures) is still in its + * original state. + */ + if (drr->drr_type >= DRR_NUMTYPES) { + (void) printf("INVALID record found: type 0x%x\n", + drr->drr_type); + (void) printf("Aborting.\n"); + exit(1); + } + + drr_record_count[drr->drr_type]++; + + switch (drr->drr_type) { + case DRR_BEGIN: + if (do_byteswap) { + drrb->drr_magic = BSWAP_64(drrb->drr_magic); + drrb->drr_version = BSWAP_64(drrb->drr_version); + drrb->drr_creation_time = + BSWAP_64(drrb->drr_creation_time); + drrb->drr_type = BSWAP_32(drrb->drr_type); + drrb->drr_flags = BSWAP_32(drrb->drr_flags); + drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); + drrb->drr_fromguid = + BSWAP_64(drrb->drr_fromguid); + } + + (void) printf("BEGIN record\n"); + (void) printf("\tversion = %llx\n", + (u_longlong_t)drrb->drr_version); + (void) printf("\tmagic = %llx\n", + (u_longlong_t)drrb->drr_magic); + (void) printf("\tcreation_time = %llx\n", + (u_longlong_t)drrb->drr_creation_time); + (void) printf("\ttype = %u\n", drrb->drr_type); + (void) printf("\tflags = 0x%x\n", drrb->drr_flags); + (void) printf("\ttoguid = %llx\n", + (u_longlong_t)drrb->drr_toguid); + (void) printf("\tfromguid = %llx\n", + (u_longlong_t)drrb->drr_fromguid); + (void) printf("\ttoname = %s\n", drrb->drr_toname); + if (verbose) + (void) printf("\n"); + + if (drrb->drr_version == 2 && + drr->drr_payloadlen != 0) { + nvlist_t *nv; + int sz = drr->drr_payloadlen; + + if (sz > 1<<20) { + free(buf); + buf = malloc(sz); + } + (void) ssread(buf, sz, &zc); + if (ferror(send_stream)) + perror("fread"); + err = nvlist_unpack(buf, sz, &nv, 0); + if (err) + perror(strerror(err)); + nvlist_print(stdout, nv); + nvlist_free(nv); + } + break; + + case DRR_END: + if (do_byteswap) { + drre->drr_checksum.zc_word[0] = + BSWAP_64(drre->drr_checksum.zc_word[0]); + drre->drr_checksum.zc_word[1] = + BSWAP_64(drre->drr_checksum.zc_word[1]); + drre->drr_checksum.zc_word[2] = + BSWAP_64(drre->drr_checksum.zc_word[2]); + drre->drr_checksum.zc_word[3] = + BSWAP_64(drre->drr_checksum.zc_word[3]); + } + /* + * We compare against the *previous* checksum + * value, because the stored checksum is of + * everything before the DRR_END record. + */ + if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum, + pcksum)) { + (void) printf("Expected checksum differs from " + "checksum in stream.\n"); + (void) printf("Expected checksum = " + "%llx/%llx/%llx/%llx\n", + pcksum.zc_word[0], + pcksum.zc_word[1], + pcksum.zc_word[2], + pcksum.zc_word[3]); + } + (void) printf("END checksum = %llx/%llx/%llx/%llx\n", + drre->drr_checksum.zc_word[0], + drre->drr_checksum.zc_word[1], + drre->drr_checksum.zc_word[2], + drre->drr_checksum.zc_word[3]); + + ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); + break; + + case DRR_OBJECT: + if (do_byteswap) { + drro->drr_object = BSWAP_64(drro->drr_object); + drro->drr_type = BSWAP_32(drro->drr_type); + drro->drr_bonustype = + BSWAP_32(drro->drr_bonustype); + drro->drr_blksz = BSWAP_32(drro->drr_blksz); + drro->drr_bonuslen = + BSWAP_32(drro->drr_bonuslen); + } + if (verbose) { + (void) printf("OBJECT object = %llu type = %u " + "bonustype = %u blksz = %u bonuslen = %u\n", + (u_longlong_t)drro->drr_object, + drro->drr_type, + drro->drr_bonustype, + drro->drr_blksz, + drro->drr_bonuslen); + } + if (drro->drr_bonuslen > 0) { + (void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen, + 8), &zc); + } + break; + + case DRR_FREEOBJECTS: + if (do_byteswap) { + drrfo->drr_firstobj = + BSWAP_64(drrfo->drr_firstobj); + drrfo->drr_numobjs = + BSWAP_64(drrfo->drr_numobjs); + } + if (verbose) { + (void) printf("FREEOBJECTS firstobj = %llu " + "numobjs = %llu\n", + (u_longlong_t)drrfo->drr_firstobj, + (u_longlong_t)drrfo->drr_numobjs); + } + break; + + case DRR_WRITE: + if (do_byteswap) { + drrw->drr_object = BSWAP_64(drrw->drr_object); + drrw->drr_type = BSWAP_32(drrw->drr_type); + drrw->drr_offset = BSWAP_64(drrw->drr_offset); + drrw->drr_length = BSWAP_64(drrw->drr_length); + } + if (verbose) { + (void) printf("WRITE object = %llu type = %u " + "offset = %llu length = %llu\n", + (u_longlong_t)drrw->drr_object, + drrw->drr_type, + (u_longlong_t)drrw->drr_offset, + (u_longlong_t)drrw->drr_length); + } + (void) ssread(buf, drrw->drr_length, &zc); + total_write_size += drrw->drr_length; + break; + + case DRR_FREE: + if (do_byteswap) { + drrf->drr_object = BSWAP_64(drrf->drr_object); + drrf->drr_offset = BSWAP_64(drrf->drr_offset); + drrf->drr_length = BSWAP_64(drrf->drr_length); + } + if (verbose) { + (void) printf("FREE object = %llu " + "offset = %llu length = %lld\n", + (u_longlong_t)drrf->drr_object, + (u_longlong_t)drrf->drr_offset, + (longlong_t)drrf->drr_length); + } + break; + } + pcksum = zc; + } + free(buf); + + /* Print final summary */ + + (void) printf("SUMMARY:\n"); + (void) printf("\tTotal DRR_BEGIN records = %lld\n", + (u_longlong_t)drr_record_count[DRR_BEGIN]); + (void) printf("\tTotal DRR_END records = %lld\n", + (u_longlong_t)drr_record_count[DRR_END]); + (void) printf("\tTotal DRR_OBJECT records = %lld\n", + (u_longlong_t)drr_record_count[DRR_OBJECT]); + (void) printf("\tTotal DRR_FREEOBJECTS records = %lld\n", + (u_longlong_t)drr_record_count[DRR_FREEOBJECTS]); + (void) printf("\tTotal DRR_WRITE records = %lld\n", + (u_longlong_t)drr_record_count[DRR_WRITE]); + (void) printf("\tTotal DRR_FREE records = %lld\n", + (u_longlong_t)drr_record_count[DRR_FREE]); + (void) printf("\tTotal records = %lld\n", + (u_longlong_t)(drr_record_count[DRR_BEGIN] + + drr_record_count[DRR_OBJECT] + + drr_record_count[DRR_FREEOBJECTS] + + drr_record_count[DRR_WRITE] + + drr_record_count[DRR_FREE] + + drr_record_count[DRR_END])); + (void) printf("\tTotal write size = %lld (0x%llx)\n", + (u_longlong_t)total_write_size, (u_longlong_t)total_write_size); + (void) printf("\tTotal stream length = %lld (0x%llx)\n", + (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len); + return (0); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/common/zfs/zfs_fletcher.c Mon Sep 21 21:41:02 2009 -0600 @@ -0,0 +1,245 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Fletcher Checksums + * ------------------ + * + * ZFS's 2nd and 4th order Fletcher checksums are defined by the following + * recurrence relations: + * + * a = a + f + * i i-1 i-1 + * + * b = b + a + * i i-1 i + * + * c = c + b (fletcher-4 only) + * i i-1 i + * + * d = d + c (fletcher-4 only) + * i i-1 i + * + * Where + * a_0 = b_0 = c_0 = d_0 = 0 + * and + * f_0 .. f_(n-1) are the input data. + * + * Using standard techniques, these translate into the following series: + * + * __n_ __n_ + * \ | \ | + * a = > f b = > i * f + * n /___| n - i n /___| n - i + * i = 1 i = 1 + * + * + * __n_ __n_ + * \ | i*(i+1) \ | i*(i+1)*(i+2) + * c = > ------- f d = > ------------- f + * n /___| 2 n - i n /___| 6 n - i + * i = 1 i = 1 + * + * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators. + * Since the additions are done mod (2^64), errors in the high bits may not + * be noticed. For this reason, fletcher-2 is deprecated. + * + * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators. + * A conservative estimate of how big the buffer can get before we overflow + * can be estimated using f_i = 0xffffffff for all i: + * + * % bc + * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4 + * 2264 + * quit + * % + * + * So blocks of up to 2k will not overflow. Our largest block size is + * 128k, which has 32k 4-byte words, so we can compute the largest possible + * accumulators, then divide by 2^64 to figure the max amount of overflow: + * + * % bc + * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c } + * a/2^64;b/2^64;c/2^64;d/2^64 + * 0 + * 0 + * 1365 + * 11186858 + * quit + * % + * + * So a and b cannot overflow. To make sure each bit of input has some + * effect on the contents of c and d, we can look at what the factors of + * the coefficients in the equations for c_n and d_n are. The number of 2s + * in the factors determines the lowest set bit in the multiplier. Running + * through the cases for n*(n+1)/2 reveals that the highest power of 2 is + * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow + * the 64-bit accumulators, every bit of every f_i effects every accumulator, + * even for 128k blocks. + * + * If we wanted to make a stronger version of fletcher4 (fletcher4c?), + * we could do our calculations mod (2^32 - 1) by adding in the carries + * periodically, and store the number of carries in the top 32-bits. + * + * -------------------- + * Checksum Performance + * -------------------- + * + * There are two interesting components to checksum performance: cached and + * uncached performance. With cached data, fletcher-2 is about four times + * faster than fletcher-4. With uncached data, the performance difference is + * negligible, since the cost of a cache fill dominates the processing time. + * Even though fletcher-4 is slower than fletcher-2, it is still a pretty + * efficient pass over the data. + * + * In normal operation, the data which is being checksummed is in a buffer + * which has been filled either by: + * + * 1. a compression step, which will be mostly cached, or + * 2. a bcopy() or copyin(), which will be uncached (because the + * copy is cache-bypassing). + * + * For both cached and uncached data, both fletcher checksums are much faster + * than sha-256, and slower than 'off', which doesn't touch the data at all. + */ + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/byteorder.h> +#include <sys/spa.h> + +void +fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint64_t *ip = buf; + const uint64_t *ipend = ip + (size / sizeof (uint64_t)); + uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += ip[0]; + a1 += ip[1]; + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint64_t *ip = buf; + const uint64_t *ipend = ip + (size / sizeof (uint64_t)); + uint64_t a0, b0, a1, b1; + + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { + a0 += BSWAP_64(ip[0]); + a1 += BSWAP_64(ip[1]); + b0 += a0; + b1 += a1; + } + + ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); +} + +void +fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_incremental_native(const void *buf, uint64_t size, + zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + a = zcp->zc_word[0]; + b = zcp->zc_word[1]; + c = zcp->zc_word[2]; + d = zcp->zc_word[3]; + + for (; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_incremental_byteswap(const void *buf, uint64_t size, + zio_cksum_t *zcp) +{ + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + a = zcp->zc_word[0]; + b = zcp->zc_word[1]; + c = zcp->zc_word[2]; + d = zcp->zc_word[3]; + + for (; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/common/zfs/zfs_fletcher.h Mon Sep 21 21:41:02 2009 -0600 @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _ZFS_FLETCHER_H +#define _ZFS_FLETCHER_H + +#include <sys/types.h> +#include <sys/spa.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * fletcher checksum functions + */ + +void fletcher_2_native(const void *, uint64_t, zio_cksum_t *); +void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_native(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_incremental_native(const void *, uint64_t, + zio_cksum_t *); +void fletcher_4_incremental_byteswap(const void *, uint64_t, + zio_cksum_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_FLETCHER_H */
--- a/usr/src/lib/libzfs/Makefile.com Mon Sep 21 21:03:13 2009 -0600 +++ b/usr/src/lib/libzfs/Makefile.com Mon Sep 21 21:41:02 2009 -0600 @@ -27,7 +27,7 @@ VERS= .1 OBJS_SHARED= zfs_namecheck.o zprop_common.o zfs_prop.o zpool_prop.o \ - zfs_deleg.o zfs_comutil.o + zfs_deleg.o zfs_comutil.o zfs_fletcher.o OBJS_COMMON= libzfs_dataset.o libzfs_util.o libzfs_graph.o libzfs_mount.o \ libzfs_pool.o libzfs_changelist.o libzfs_config.o libzfs_import.o \ libzfs_status.o libzfs_sendrecv.o
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c Mon Sep 21 21:03:13 2009 -0600 +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c Mon Sep 21 21:41:02 2009 -0600 @@ -40,10 +40,9 @@ #include "zfs_namecheck.h" #include "zfs_prop.h" +#include "zfs_fletcher.h" #include "libzfs_impl.h" -#include <fletcher.c> /* XXX */ - static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t, int, avl_tree_t *, char **);
--- a/usr/src/lib/libzfs/common/llib-lzfs Mon Sep 21 21:03:13 2009 -0600 +++ b/usr/src/lib/libzfs/common/llib-lzfs Mon Sep 21 21:41:02 2009 -0600 @@ -19,14 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /*LINTLIBRARY*/ /*PROTOLIB1*/ #include <libzfs.h> #include "../../../common/zfs/zfs_comutil.h" +#include "../../../common/zfs/zfs_fletcher.h"
--- a/usr/src/lib/libzfs/common/mapfile-vers Mon Sep 21 21:03:13 2009 -0600 +++ b/usr/src/lib/libzfs/common/mapfile-vers Mon Sep 21 21:41:02 2009 -0600 @@ -39,6 +39,12 @@ SUNWprivate_1.1 { global: + fletcher_2_native; + fletcher_2_byteswap; + fletcher_4_native; + fletcher_4_byteswap; + fletcher_4_incremental_native; + fletcher_4_incremental_byteswap; libzfs_errno; libzfs_error_action; libzfs_error_description;
--- a/usr/src/pkgdefs/SUNWzfsu/prototype_com Mon Sep 21 21:03:13 2009 -0600 +++ b/usr/src/pkgdefs/SUNWzfsu/prototype_com Mon Sep 21 21:41:02 2009 -0600 @@ -87,3 +87,4 @@ l none usr/sbin/zdb=../../usr/lib/isaexec s none usr/sbin/zfs=../../sbin/zfs s none usr/sbin/zpool=../../sbin/zpool +f none usr/sbin/zstreamdump 555 root bin
--- a/usr/src/uts/common/Makefile.files Mon Sep 21 21:03:13 2009 -0600 +++ b/usr/src/uts/common/Makefile.files Mon Sep 21 21:41:02 2009 -0600 @@ -1288,7 +1288,6 @@ dsl_deleg.o \ dsl_prop.o \ dsl_scrub.o \ - fletcher.o \ gzip.o \ lzjb.o \ metaslab.o \ @@ -1330,6 +1329,7 @@ zfs_deleg.o \ zfs_prop.o \ zfs_comutil.o \ + zfs_fletcher.o \ zpool_prop.o \ zprop_common.o
--- a/usr/src/uts/common/fs/zfs/fletcher.c Mon Sep 21 21:03:13 2009 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,245 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Fletcher Checksums - * ------------------ - * - * ZFS's 2nd and 4th order Fletcher checksums are defined by the following - * recurrence relations: - * - * a = a + f - * i i-1 i-1 - * - * b = b + a - * i i-1 i - * - * c = c + b (fletcher-4 only) - * i i-1 i - * - * d = d + c (fletcher-4 only) - * i i-1 i - * - * Where - * a_0 = b_0 = c_0 = d_0 = 0 - * and - * f_0 .. f_(n-1) are the input data. - * - * Using standard techniques, these translate into the following series: - * - * __n_ __n_ - * \ | \ | - * a = > f b = > i * f - * n /___| n - i n /___| n - i - * i = 1 i = 1 - * - * - * __n_ __n_ - * \ | i*(i+1) \ | i*(i+1)*(i+2) - * c = > ------- f d = > ------------- f - * n /___| 2 n - i n /___| 6 n - i - * i = 1 i = 1 - * - * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators. - * Since the additions are done mod (2^64), errors in the high bits may not - * be noticed. For this reason, fletcher-2 is deprecated. - * - * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators. - * A conservative estimate of how big the buffer can get before we overflow - * can be estimated using f_i = 0xffffffff for all i: - * - * % bc - * f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4 - * 2264 - * quit - * % - * - * So blocks of up to 2k will not overflow. Our largest block size is - * 128k, which has 32k 4-byte words, so we can compute the largest possible - * accumulators, then divide by 2^64 to figure the max amount of overflow: - * - * % bc - * a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c } - * a/2^64;b/2^64;c/2^64;d/2^64 - * 0 - * 0 - * 1365 - * 11186858 - * quit - * % - * - * So a and b cannot overflow. To make sure each bit of input has some - * effect on the contents of c and d, we can look at what the factors of - * the coefficients in the equations for c_n and d_n are. The number of 2s - * in the factors determines the lowest set bit in the multiplier. Running - * through the cases for n*(n+1)/2 reveals that the highest power of 2 is - * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow - * the 64-bit accumulators, every bit of every f_i effects every accumulator, - * even for 128k blocks. - * - * If we wanted to make a stronger version of fletcher4 (fletcher4c?), - * we could do our calculations mod (2^32 - 1) by adding in the carries - * periodically, and store the number of carries in the top 32-bits. - * - * -------------------- - * Checksum Performance - * -------------------- - * - * There are two interesting components to checksum performance: cached and - * uncached performance. With cached data, fletcher-2 is about four times - * faster than fletcher-4. With uncached data, the performance difference is - * negligible, since the cost of a cache fill dominates the processing time. - * Even though fletcher-4 is slower than fletcher-2, it is still a pretty - * efficient pass over the data. - * - * In normal operation, the data which is being checksummed is in a buffer - * which has been filled either by: - * - * 1. a compression step, which will be mostly cached, or - * 2. a bcopy() or copyin(), which will be uncached (because the - * copy is cache-bypassing). - * - * For both cached and uncached data, both fletcher checksums are much faster - * than sha-256, and slower than 'off', which doesn't touch the data at all. - */ - -#include <sys/types.h> -#include <sys/sysmacros.h> -#include <sys/byteorder.h> -#include <sys/spa.h> - -void -fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp) -{ - const uint64_t *ip = buf; - const uint64_t *ipend = ip + (size / sizeof (uint64_t)); - uint64_t a0, b0, a1, b1; - - for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { - a0 += ip[0]; - a1 += ip[1]; - b0 += a0; - b1 += a1; - } - - ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); -} - -void -fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) -{ - const uint64_t *ip = buf; - const uint64_t *ipend = ip + (size / sizeof (uint64_t)); - uint64_t a0, b0, a1, b1; - - for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { - a0 += BSWAP_64(ip[0]); - a1 += BSWAP_64(ip[1]); - b0 += a0; - b1 += a1; - } - - ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); -} - -void -fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp) -{ - const uint32_t *ip = buf; - const uint32_t *ipend = ip + (size / sizeof (uint32_t)); - uint64_t a, b, c, d; - - for (a = b = c = d = 0; ip < ipend; ip++) { - a += ip[0]; - b += a; - c += b; - d += c; - } - - ZIO_SET_CHECKSUM(zcp, a, b, c, d); -} - -void -fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp) -{ - const uint32_t *ip = buf; - const uint32_t *ipend = ip + (size / sizeof (uint32_t)); - uint64_t a, b, c, d; - - for (a = b = c = d = 0; ip < ipend; ip++) { - a += BSWAP_32(ip[0]); - b += a; - c += b; - d += c; - } - - ZIO_SET_CHECKSUM(zcp, a, b, c, d); -} - -void -fletcher_4_incremental_native(const void *buf, uint64_t size, - zio_cksum_t *zcp) -{ - const uint32_t *ip = buf; - const uint32_t *ipend = ip + (size / sizeof (uint32_t)); - uint64_t a, b, c, d; - - a = zcp->zc_word[0]; - b = zcp->zc_word[1]; - c = zcp->zc_word[2]; - d = zcp->zc_word[3]; - - for (; ip < ipend; ip++) { - a += ip[0]; - b += a; - c += b; - d += c; - } - - ZIO_SET_CHECKSUM(zcp, a, b, c, d); -} - -void -fletcher_4_incremental_byteswap(const void *buf, uint64_t size, - zio_cksum_t *zcp) -{ - const uint32_t *ip = buf; - const uint32_t *ipend = ip + (size / sizeof (uint32_t)); - uint64_t a, b, c, d; - - a = zcp->zc_word[0]; - b = zcp->zc_word[1]; - c = zcp->zc_word[2]; - d = zcp->zc_word[3]; - - for (; ip < ipend; ip++) { - a += BSWAP_32(ip[0]); - b += a; - c += b; - d += c; - } - - ZIO_SET_CHECKSUM(zcp, a, b, c, d); -}
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h Mon Sep 21 21:03:13 2009 -0600 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h Mon Sep 21 21:41:02 2009 -0600 @@ -58,7 +58,7 @@ typedef struct dmu_replay_record { enum { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, - DRR_WRITE, DRR_FREE, DRR_END, + DRR_WRITE, DRR_FREE, DRR_END, DRR_NUMTYPES } drr_type; uint32_t drr_payloadlen; union {