changeset 806:849fb015aa25 onnv_27

6337437 .zfs doesn't work over NFS
author ek110237
date Mon, 31 Oct 2005 22:53:57 -0800
parents 3946c495718a
children b30cbada1800
files usr/src/uts/common/fs/nfs/nfs4_dispatch.c usr/src/uts/common/fs/nfs/nfs4_srv_ns.c usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c usr/src/uts/common/fs/nfs/nfs4_xdr.c usr/src/uts/common/fs/nfs/nfs_export.c usr/src/uts/common/fs/nfs/nfs_server.c usr/src/uts/common/nfs/export.h usr/src/uts/common/nfs/nfs.h usr/src/uts/common/nfs/nfs4.h usr/src/uts/common/nfs/nfs4_kprot.h usr/src/uts/intel/nfs/Makefile usr/src/uts/sparc/nfs/Makefile
diffstat 12 files changed, 440 insertions(+), 133 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/fs/nfs/nfs4_dispatch.c	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/fs/nfs/nfs4_dispatch.c	Mon Oct 31 22:53:57 2005 -0800
@@ -503,7 +503,7 @@
 	/*
 	 * Send out the replayed reply or the 'real' one.
 	 */
-	if (!svc_sendreply(xprt,  xdr_COMPOUND4res, (char *)rbp)) {
+	if (!svc_sendreply(xprt,  xdr_COMPOUND4res_srv, (char *)rbp)) {
 		DTRACE_PROBE2(nfss__e__dispatch_sendfail,
 			struct svc_req *, xprt,
 			char *, rbp);
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c	Mon Oct 31 22:53:57 2005 -0800
@@ -58,9 +58,13 @@
 	 * XXX nfs4_fid() does nothing and returns EREMOTE.
 	 * XXX nfs3_fid()/nfs_fid() returns nfs filehandle as its fid
 	 * which has a bigger length than local fid.
-	 * NFS_FHMAXDATA is the size of fhandle_t.fh_xdata[NFS_FHMAXDATA].
+	 * NFS_FHMAXDATA_EXT is the size of
+	 * fhandle_ext_t.fh_xdata[NFS_FHMAXDATA_EXT].
+	 *
+	 * Note: nfs[2,3,4]_fid() only gets called for diskless clients.
 	 */
-	if (error == EREMOTE || (error == 0 && fidp->fid_len > NFS_FHMAXDATA)) {
+	if (error == EREMOTE ||
+	    (error == 0 && fidp->fid_len > NFS_FHMAXDATA_EXT)) {
 
 		va.va_mask = AT_NODEID;
 		error = VOP_GETATTR(vp, &va, 0, CRED());
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_readdir.c	Mon Oct 31 22:53:57 2005 -0800
@@ -990,6 +990,7 @@
 			    FATTR4_FILEID_MASK)) {
 
 				if (ae & FATTR4_FILEHANDLE_MASK) {
+					bool_t fh_error;
 					struct {
 						uint_t len;
 						char *val;
@@ -998,11 +999,12 @@
 					fh.len = 0;
 					fh.val = fh.fh;
 					(void) makefh4((nfs_fh4 *)&fh, vp,
-						(newexi ? newexi : cs->exi));
+					    (newexi ? newexi : cs->exi));
 
-					if ((ptr +
-					    (fh.len / BYTES_PER_XDR_UNIT) + 1)
-					    > ptr_redzone) {
+					fh_error = xdr_inline_encode_nfs_fh4(
+					    &ptr, ptr_redzone,
+					    (nfs_fh4_fmt_t *)fh.val);
+					if (fh_error) {
 						if (nents ||
 						    IS_MIN_ATTR_MASK(ar)) {
 							no_space = TRUE;
@@ -1013,13 +1015,6 @@
 						ptr = lastentry_ptr;
 						goto reencode_attrs;
 					}
-					IXDR_PUT_U_INT32(ptr, fh.len);
-					/* encode the RNDUP FILL first */
-					rndup = RNDUP(fh.len) /
-						BYTES_PER_XDR_UNIT;
-					ptr[rndup - 1] = 0;
-					bcopy(fh.fh, ptr, fh.len);
-					ptr += rndup;
 				}
 				if (ae & FATTR4_FILEID_MASK) {
 					IXDR_PUT_HYPER(ptr, va.va_nodeid);
--- a/usr/src/uts/common/fs/nfs/nfs4_xdr.c	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/fs/nfs/nfs4_xdr.c	Mon Oct 31 22:53:57 2005 -0800
@@ -149,14 +149,303 @@
 }
 
 /*
- * Called in nfs_acl_xdr.c
+ * XDR_INLINE encode a filehandle.
+ */
+bool_t
+xdr_inline_encode_nfs_fh4(uint32_t **ptrp, uint32_t *ptr_redzone,
+	nfs_fh4_fmt_t *fhp)
+{
+	uint32_t *ptr = *ptrp;
+	uint_t otw_len;
+	char *curp;
+	uint_t dlen;
+	uint32_t padword;
+
+	/*
+	 * First get the variable sized part of the filehandle.
+	 */
+	otw_len = fhp->fh4_len + fhp->fh4_xlen + sizeof (fhp->fh4_fsid) +
+	    sizeof (fhp->fh4_len) + sizeof (fhp->fh4_xlen);
+	/*
+	 * Round out to a full word.
+	 */
+	otw_len = RNDUP(otw_len);
+	padword = (otw_len / BYTES_PER_XDR_UNIT) - 1;
+
+	/*
+	 * Add in the fixed sized pieces.
+	 */
+	otw_len += sizeof (fhp->fh4_flag);
+
+#ifdef VOLATILE_FH_TEST
+	otw_len += sizeof (fhp->fh4_volatile_id);
+#endif
+
+	/*
+	 * Make sure we don't exceed our buffer.
+	 */
+	if ((ptr + (otw_len / BYTES_PER_XDR_UNIT) + 1) > ptr_redzone)
+		return (FALSE);
+
+	IXDR_PUT_U_INT32(ptr, otw_len);
+
+	/*
+	 * Zero out the pading.
+	 */
+	ptr[padword] = 0;
+
+	/* fh4_fsid */
+	IXDR_PUT_INT32(ptr, fhp->fh4_fsid.val[0]);
+	IXDR_PUT_INT32(ptr, fhp->fh4_fsid.val[1]);
+
+	/*
+	 * Since the next pieces are unaligned, we need to
+	 * do bytewise copies.
+	 */
+	curp = (char *)ptr;
+
+	/* fh4_len + fh4_data */
+	dlen = sizeof (fhp->fh4_len);
+	dlen += fhp->fh4_len < NFS_FHMAXDATA ? NFS_FHMAXDATA : fhp->fh4_len;
+	bcopy(&fhp->fh4_len, curp, dlen);
+	curp += dlen;
+
+	/* fh4_xlen + fh4_xdata */
+	dlen = sizeof (fhp->fh4_xlen);
+	dlen += fhp->fh4_xlen < NFS_FHMAXDATA ? NFS_FHMAXDATA : fhp->fh4_xlen;
+	bcopy(&fhp->fh4_xlen, curp, dlen);
+	curp += dlen;
+
+	/* do necessary rounding/padding */
+	curp = (char *)RNDUP((uintptr_t)curp);
+	ptr = (uint32_t *)curp;
+
+	/*
+	 * With the above padding, we're word aligned again.
+	 */
+	ASSERT(((uintptr_t)ptr % BYTES_PER_XDR_UNIT) == 0);
+
+	/* fh4_flag */
+	IXDR_PUT_INT32(ptr, fhp->fh4_flag);
+
+#ifdef VOLATILE_FH_TEST
+	/* fh4_volatile_id */
+	IXDR_PUT_INT32(ptr, fhp->fh4_volatile_id);
+#endif
+	*ptrp = ptr;
+
+	return (TRUE);
+}
+
+static char xdr_crud[BYTES_PER_XDR_UNIT];
+
+static bool_t
+xdr_decode_nfs_fh4(XDR *xdrs, nfs_fh4 *objp)
+{
+	uint32_t fhsize;		/* filehandle size */
+	uint32_t fsize;			/* fh_len size */
+	uint32_t xsize;			/* fh_xlen size */
+	uint32_t rsize;			/* bytes to round */
+	uint32_t psize;			/* pad size */
+	uint32_t dsize;			/* "data" size */
+	nfs_fh4_fmt_t *fh_fmtp;
+
+	ASSERT(xdrs->x_op == XDR_DECODE);
+
+	/*
+	 * Retrieve the filehandle length.
+	 */
+	if (!XDR_GETINT32(xdrs, (int32_t *)&fhsize))
+		return (FALSE);
+
+	/*
+	 * Check to see if what the client sent us is bigger than what
+	 * we can ever possibly send out or smaller than what we could
+	 * possibly send out.
+	 */
+	if (fhsize > sizeof (nfs_fh4_fmt_t) ||
+	    fhsize < sizeof (fsid_t) + sizeof (ushort_t) + sizeof (ushort_t))
+		return (FALSE);
+
+	rsize = fhsize % BYTES_PER_XDR_UNIT;
+	if (rsize != 0)
+		rsize = BYTES_PER_XDR_UNIT - rsize;
+
+	objp->nfs_fh4_val = kmem_zalloc(sizeof (nfs_fh4_fmt_t), KM_SLEEP);
+	objp->nfs_fh4_len = sizeof (nfs_fh4_fmt_t);
+	fh_fmtp = (nfs_fh4_fmt_t *)objp->nfs_fh4_val;
+
+	/*
+	 * Decode what should be fh4_fsid.
+	 */
+	if (!XDR_GETINT32(xdrs, (int32_t *)&fh_fmtp->fh4_fsid.val[0]))
+		return (FALSE);
+	if (!XDR_GETINT32(xdrs, (int32_t *)&fh_fmtp->fh4_fsid.val[1]))
+		return (FALSE);
+	fhsize -= sizeof (fsid_t);
+
+	/*
+	 * Decode what should be fh4_len.  fh4_len is two bytes, so we're
+	 * unaligned now, have to use XDR_GETBYTES from now on.
+	 */
+	if (!XDR_GETBYTES(xdrs, (caddr_t)&fh_fmtp->fh4_len,
+	    sizeof (ushort_t)))
+		return (FALSE);
+	fhsize -= sizeof (ushort_t);
+
+	fsize = fh_fmtp->fh4_len < NFS_FHMAXDATA ? NFS_FHMAXDATA :
+			fh_fmtp->fh4_len;
+	/*
+	 * Make sure the client isn't sending us a bogus length for fh4_data.
+	 */
+	if (fhsize < fsize)
+		return (FALSE);
+
+	if (!XDR_GETBYTES(xdrs, (caddr_t)&fh_fmtp->fh4_data, fsize))
+		return (FALSE);
+	fhsize -= fsize;
+
+	/* make sure we have enough left to decode fh_xlen */
+	if (fhsize < sizeof (ushort_t))
+		return (FALSE);
+
+	if (!XDR_GETBYTES(xdrs, (caddr_t)&fh_fmtp->fh4_xlen,
+	    sizeof (ushort_t)))
+		return (FALSE);
+	fhsize -= sizeof (ushort_t);
+
+	xsize = fh_fmtp->fh4_xlen < NFS_FHMAXDATA ? NFS_FHMAXDATA :
+			fh_fmtp->fh4_xlen;
+	/*
+	 * Make sure the client isn't sending us a bogus length for fh4_xdata.
+	 */
+	if (fhsize < xsize)
+		return (FALSE);
+
+	if (!XDR_GETBYTES(xdrs, (caddr_t)&fh_fmtp->fh4_xdata, xsize))
+		return (FALSE);
+	fhsize -= xsize;
+
+	/* we purposedly align things, so skip padding */
+	dsize = fsize + xsize + sizeof (ushort_t) + sizeof (ushort_t);
+	psize = RNDUP(dsize);
+	if (psize != dsize)
+		if (!XDR_GETBYTES(xdrs, (caddr_t)&xdr_crud, psize - dsize))
+			return (FALSE);
+
+	/* make sure we have enough left to decode fh4_flag */
+	if (fhsize < sizeof (uint32_t))
+		return (FALSE);
+
+	if (!XDR_GETBYTES(xdrs, (caddr_t)&fh_fmtp->fh4_flag,
+	    sizeof (uint32_t)))
+		return (FALSE);
+	fhsize -= sizeof (uint32_t);
+
+#ifdef VOLATILE_FH_TEST
+	/* make sure we have enough left to decode fh4_volatile_id */
+	if (fhsize < sizeof (uint32_t))
+		return (FALSE);
+
+	if (!XDR_GETBYTES(xdrs, (caddr_t)&fh_fmtp->fh4_volatile_id,
+	    sizeof (uint32_t)))
+		return (FALSE);
+	fhsize -= sizeof (uint32_t);
+#endif
+	/*
+	 * Make sure client didn't send request with too much padding.
+	 */
+	if (fhsize > sizeof (uint32_t))
+		return (FALSE);
+
+	if (rsize)
+		if (!XDR_GETBYTES(xdrs, (caddr_t)&xdr_crud, rsize))
+			return (FALSE);
+
+	return (TRUE);
+}
+
+static char zero_word[BYTES_PER_XDR_UNIT] = { 0, 0, 0, 0 };
+
+static bool_t
+xdr_encode_nfs_fh4(XDR *xdrs, nfs_fh4 *objp)
+{
+	uint_t otwsize, fsize, xsize;	/* otw, file, and export sizes */
+	uint_t dsize, rsize;		/* rounding sizes */
+	nfs_fh4_fmt_t *fh_fmtp;
+
+	ASSERT(xdrs->x_op == XDR_ENCODE);
+
+	fh_fmtp = (nfs_fh4_fmt_t *)objp->nfs_fh4_val;
+	fsize = fh_fmtp->fh4_len < NFS_FHMAXDATA ? NFS_FHMAXDATA :
+			fh_fmtp->fh4_len;
+	xsize = fh_fmtp->fh4_xlen < NFS_FHMAXDATA ? NFS_FHMAXDATA :
+			fh_fmtp->fh4_xlen;
+	/* fh4_i */
+	otwsize = sizeof (fsid_t) + sizeof (ushort_t) + fsize +
+			sizeof (ushort_t) + xsize;
+
+	/* round out to a full word */
+	otwsize = RNDUP(otwsize);
+
+	/* fh4_flag */
+	otwsize += sizeof (uint32_t);
+
+#ifdef VOLATILE_FH_TEST
+	/* fh4_volatile_id */
+	otwsize += sizeof (uint32_t);
+#endif
+
+	/*
+	 * XDR in filehandle size.
+	 */
+	if (!XDR_PUTINT32(xdrs, (int32_t *)&otwsize))
+		return (FALSE);
+
+	if (!XDR_PUTINT32(xdrs, (int32_t *)&fh_fmtp->fh4_fsid.val[0]))
+		return (FALSE);
+	if (!XDR_PUTINT32(xdrs, (int32_t *)&fh_fmtp->fh4_fsid.val[1]))
+		return (FALSE);
+
+	if (!XDR_PUTBYTES(xdrs, (caddr_t)&fh_fmtp->fh4_len, fsize +
+	    sizeof (ushort_t)))
+		return (FALSE);
+
+	if (!XDR_PUTBYTES(xdrs, (caddr_t)&fh_fmtp->fh4_xlen, xsize +
+	    sizeof (ushort_t)))
+		return (FALSE);
+
+	dsize = fsize + xsize + sizeof (ushort_t) + sizeof (ushort_t);
+	rsize = RNDUP(dsize);
+
+	/*
+	 * Pad in the extra space to force alignment.
+	 */
+	if (dsize != rsize)
+		if (!XDR_PUTBYTES(xdrs, (caddr_t)&zero_word, rsize - dsize))
+			return (FALSE);
+
+	if (!XDR_PUTINT32(xdrs, (int32_t *)&fh_fmtp->fh4_flag))
+		return (FALSE);
+
+#ifdef VOLATILE_FH_TEST
+	if (!XDR_PUTINT32(xdrs, (int32_t *)&fh_fmtp->fh4_volatile_id))
+		return (FALSE);
+#endif
+
+	return (TRUE);
+}
+
+/*
+ * XDR a NFSv4 filehandle.
  */
 bool_t
 xdr_nfs_fh4(XDR *xdrs, nfs_fh4 *objp)
 {
-	if (xdrs->x_op != XDR_FREE)
-		return (xdr_bytes(xdrs, (char **)&objp->nfs_fh4_val,
-			(uint_t *)&objp->nfs_fh4_len, NFS4_FHSIZE));
+	if (xdrs->x_op == XDR_DECODE)
+		return (xdr_decode_nfs_fh4(xdrs, objp));
+	else if (xdrs->x_op == XDR_ENCODE)
+		return (xdr_encode_nfs_fh4(xdrs, objp));
 
 	if (objp->nfs_fh4_val != NULL) {
 		kmem_free(objp->nfs_fh4_val, objp->nfs_fh4_len);
@@ -3279,7 +3568,7 @@
 }
 
 static bool_t
-xdr_nfs_argop4_free(XDR *xdrs, nfs_argop4 **arrayp, int len)
+xdr_snfs_argop4_free(XDR *xdrs, nfs_argop4 **arrayp, int len)
 {
 	int i;
 	nfs_argop4 *array = *arrayp;
@@ -3457,9 +3746,6 @@
 static bool_t
 xdr_nfs_argop4(XDR *xdrs, nfs_argop4 *objp)
 {
-	if (!xdr_int(xdrs, (int *)&objp->argop))
-		return (FALSE);
-
 	/*
 	 * These should be ordered by frequency of use
 	 */
@@ -3614,6 +3900,30 @@
 	return (FALSE);
 }
 
+static bool_t
+xdr_cnfs_argop4_wrap(XDR *xdrs, nfs_argop4 *objp)
+{
+	if (!xdr_int(xdrs, (int *)&objp->argop))
+		return (FALSE);
+
+	return (xdr_nfs_argop4(xdrs, objp));
+}
+
+static bool_t
+xdr_snfs_argop4(XDR *xdrs, nfs_argop4 *objp)
+{
+	if (!xdr_int(xdrs, (int *)&objp->argop))
+		return (FALSE);
+
+	switch (objp->argop) {
+	case OP_PUTFH:
+		return (xdr_decode_nfs_fh4(xdrs,
+			&objp->nfs_argop4_u.opputfh.object));
+	default:
+		return (xdr_nfs_argop4(xdrs, objp));
+	}
+}
+
 /*
  * Client side encode only arg op processing
  */
@@ -3632,7 +3942,7 @@
 	 * Special case the private pseudo ops
 	 */
 	if (!(objp->argop & SUNW_PRIVATE_OP))
-		return (xdr_nfs_argop4(xdrs, objp));
+		return (xdr_cnfs_argop4_wrap(xdrs, objp));
 
 	/*
 	 * These should be ordered by frequency of use
@@ -3897,10 +4207,14 @@
 }
 
 static bool_t
+xdr_snfs_resop4_free(XDR *xdrs, nfs_resop4 **arrayp, int len, int decode_len)
+{
+	return (xdr_nfs_resop4_free(xdrs, arrayp, len, decode_len));
+}
+
+static bool_t
 xdr_nfs_resop4(XDR *xdrs, nfs_resop4 *objp)
 {
-	if (!xdr_int(xdrs, (int *)&objp->resop))
-		return (FALSE);
 	/*
 	 * These should be ordered by frequency of use
 	 */
@@ -4052,6 +4366,35 @@
 }
 
 static bool_t
+xdr_cnfs_resop4_wrap(XDR *xdrs, nfs_resop4 *objp)
+{
+	if (!xdr_int(xdrs, (int *)&objp->resop))
+		return (FALSE);
+
+	return (xdr_nfs_resop4(xdrs, objp));
+}
+
+static bool_t
+xdr_snfs_resop4(XDR *xdrs, nfs_resop4 *objp)
+{
+	if (!xdr_int(xdrs, (int *)&objp->resop))
+		return (FALSE);
+
+	switch (objp->resop) {
+	case OP_GETFH:
+		if (!XDR_PUTINT32(xdrs,
+		    (int32_t *)&objp->nfs_resop4_u.opgetfh.status))
+			return (FALSE);
+		if (objp->nfs_resop4_u.opgetfh.status != NFS4_OK)
+			return (TRUE);
+		return (xdr_encode_nfs_fh4(xdrs,
+			&objp->nfs_resop4_u.opgetfh.object));
+	default:
+		return (xdr_nfs_resop4(xdrs, objp));
+	}
+}
+
+static bool_t
 xdr_nfs_resop4_clnt(XDR *xdrs, nfs_resop4 *objp, nfs_argop4 *aobjp)
 {
 	if (!xdr_int(xdrs, (int *)&objp->resop))
@@ -4260,7 +4603,7 @@
 }
 
 bool_t
-xdr_COMPOUND4args(XDR *xdrs, COMPOUND4args *objp)
+xdr_COMPOUND4args_srv(XDR *xdrs, COMPOUND4args *objp)
 {
 	if (!xdr_bytes(xdrs, (char **)&objp->tag.utf8string_val,
 			(uint_t *)&objp->tag.utf8string_len,
@@ -4271,9 +4614,9 @@
 	if (xdrs->x_op != XDR_FREE)
 		return (xdr_array(xdrs, (char **)&objp->array,
 			(uint_t *)&objp->array_len, NFS4_COMPOUND_LIMIT,
-			sizeof (nfs_argop4), (xdrproc_t)xdr_nfs_argop4));
-
-	return (xdr_nfs_argop4_free(xdrs, &objp->array, objp->array_len));
+			sizeof (nfs_argop4), (xdrproc_t)xdr_snfs_argop4));
+
+	return (xdr_snfs_argop4_free(xdrs, &objp->array, objp->array_len));
 }
 
 bool_t
@@ -4345,7 +4688,7 @@
 }
 
 bool_t
-xdr_COMPOUND4res(XDR *xdrs, COMPOUND4res *objp)
+xdr_COMPOUND4res_srv(XDR *xdrs, COMPOUND4res *objp)
 {
 	if (!xdr_int(xdrs, (int32_t *)&objp->status))
 		return (FALSE);
@@ -4357,9 +4700,9 @@
 	if (xdrs->x_op != XDR_FREE)
 		return (xdr_array(xdrs, (char **)&objp->array,
 			(uint_t *)&objp->array_len, NFS4_COMPOUND_LIMIT,
-			sizeof (nfs_resop4), (xdrproc_t)xdr_nfs_resop4));
-
-	return (xdr_nfs_resop4_free(xdrs, &objp->array,
+			sizeof (nfs_resop4), (xdrproc_t)xdr_snfs_resop4));
+
+	return (xdr_snfs_resop4_free(xdrs, &objp->array,
 				    objp->array_len, objp->array_len));
 }
 
--- a/usr/src/uts/common/fs/nfs/nfs_export.c	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/fs/nfs/nfs_export.c	Mon Oct 31 22:53:57 2005 -0800
@@ -159,63 +159,6 @@
 }
 
 /*
- * Counted byte string compare routine, optimized for file ids.
- */
-int
-nfs_fhbcmp(char *d1, char *d2, int l)
-{
-	int k;
-
-	if (l > NFS_FHMAXDATA)
-		return (1);
-
-	/*
-	 * We are always passed pointers to the data portions of
-	 * two fids, where pointers are always 2 bytes from 32 bit
-	 * alignment. If the length is also 2 bytes off word alignment,
-	 * we can do word compares, because the two bytes before the fid
-	 * data are always the length packed into a 16 bit short, so we
-	 * can safely start our comparisons at d1-2 and d2-2.
-	 * If the length is 2 bytes off word alignment, that probably
-	 * means that first two bytes are zeroes. This means that
-	 * first word in each fid, including the length are going to be
-	 * equal (we wouldn't call fhbcmp if the lengths weren't the
-	 * same). Thus it makes the most sense to start comparing the
-	 * last words of each data portion.
-	 */
-
-	if ((l & 0x3) == 2) {
-		/*
-		 * We are going move the data pointers to the
-		 * last word. Adding just the length, puts us to the
-		 * word past end of the data. So reduce length by one
-		 * word length.
-		 */
-		k = l - 4;
-		/*
-		 * Both adjusted length and the data pointer are offset two
-		 * bytes from word alignment. Adding them together gives
-		 * us word alignment.
-		 */
-		d1 += k;
-		d2 += k;
-		l += 2;
-		while (l -= 4) {
-			if (*(int *)d1 != *(int *)d2)
-				return (1);
-			d1 -= 4;
-			d2 -= 4;
-		}
-	} else {
-		while (l--) {
-			if (*d1++ != *d2++)
-				return (1);
-		}
-	}
-	return (0);
-}
-
-/*
  * Free the memory allocated within a secinfo entry.
  */
 void
@@ -1964,7 +1907,14 @@
 
 	fh->nfs_fh4_len = NFS_FH4_LEN;
 
-	fh_fmtp->fh4_i = exi->exi_fh;	/* copy the fhandle template */
+	fh_fmtp->fh4_i.fhx_fsid = exi->exi_fh.fh_fsid;
+	fh_fmtp->fh4_i.fhx_xlen = exi->exi_fh.fh_xlen;
+
+	bzero(fh_fmtp->fh4_i.fhx_data, sizeof (fh_fmtp->fh4_i.fhx_data));
+	bzero(fh_fmtp->fh4_i.fhx_xdata, sizeof (fh_fmtp->fh4_i.fhx_xdata));
+	bcopy(exi->exi_fh.fh_xdata, fh_fmtp->fh4_i.fhx_xdata,
+		exi->exi_fh.fh_xlen);
+
 	fh_fmtp->fh4_len = fid.fid_len;
 	ASSERT(fid.fid_len <= sizeof (fh_fmtp->fh4_data));
 	bcopy(fid.fid_data, fh_fmtp->fh4_data, fid.fid_len);
@@ -2407,7 +2357,7 @@
 find_volrnm_fh(struct exportinfo *exi, nfs_fh4 *fh4p)
 {
 	struct ex_vol_rename *p = NULL;
-	fhandle_t *fhp;
+	fhandle_ext_t *fhp;
 
 	/* XXX shouldn't we assert &exported_lock held? */
 	ASSERT(MUTEX_HELD(&exi->exi_vol_rename_lock));
@@ -2415,9 +2365,10 @@
 	if (fh4p->nfs_fh4_len != NFS_FH4_LEN) {
 		return (NULL);
 	}
-	fhp = &((struct nfs_fh4_fmt *)fh4p->nfs_fh4_val)->fh4_i;
+	fhp = &((nfs_fh4_fmt_t *)fh4p->nfs_fh4_val)->fh4_i;
 	for (p = exi->exi_vol_rename; p != NULL; p = p->vrn_next) {
-		if (bcmp(fhp, &p->vrn_fh_fmt.fh4_i, sizeof (fhandle_t)) == 0)
+		if (bcmp(fhp, &p->vrn_fh_fmt.fh4_i,
+		    sizeof (fhandle_ext_t)) == 0)
 			break;
 	}
 	return (p);
--- a/usr/src/uts/common/fs/nfs/nfs_server.c	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c	Mon Oct 31 22:53:57 2005 -0800
@@ -984,8 +984,8 @@
 
 	/* RFS4_compound = 1 */
 	{rfs4_compound,
-	    xdr_COMPOUND4args, NULL_xdrproc_t, sizeof (COMPOUND4args),
-	    xdr_COMPOUND4res, NULL_xdrproc_t, sizeof (COMPOUND4res),
+	    xdr_COMPOUND4args_srv, NULL_xdrproc_t, sizeof (COMPOUND4args),
+	    xdr_COMPOUND4res_srv, NULL_xdrproc_t, sizeof (COMPOUND4res),
 	    rfs4_compound_free, 0, 0},
 };
 
--- a/usr/src/uts/common/nfs/export.h	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/nfs/export.h	Mon Oct 31 22:53:57 2005 -0800
@@ -347,7 +347,7 @@
 
 #define	EQFID(fidp1, fidp2)	\
 	((fidp1)->fid_len == (fidp2)->fid_len && \
-	    nfs_fhbcmp((char *)(fidp1)->fid_data, (char *)(fidp2)->fid_data, \
+	    bcmp((char *)(fidp1)->fid_data, (char *)(fidp2)->fid_data, \
 	    (uint_t)(fidp1)->fid_len) == 0)
 
 #define	exportmatch(exi, fsid, fid)	\
--- a/usr/src/uts/common/nfs/nfs.h	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/nfs/nfs.h	Mon Oct 31 22:53:57 2005 -0800
@@ -390,24 +390,12 @@
 };
 
 /*
- * File access handle
- * This structure is the Sun server representation of a file.
- * It is handed out by a server for the client to use in further
- * file transactions.
+ * "Legacy" filehandles use NFS_FHMAXDATA (10) byte fids. Filesystems that
+ * return a larger fid than NFS_FHMAXDATA, such as ZFS's .zfs snapshot
+ * directory, can use up to NFS_FHMAXDATA_EXT bytes for their fid.
  */
-
-/*
- * This struct is only used to find the size of the data field in the
- * fhandle structure below.
- */
-struct fhsize {
-	fsid_t	f1;
-	ushort_t f2;
-	char	f3[4];
-	ushort_t f4;
-	char	f5[4];
-};
-#define	NFS_FHMAXDATA	((NFS_FHSIZE - sizeof (struct fhsize) + 8) / 2)
+#define	NFS_FHMAXDATA		10
+#define	NFS_FHMAXDATA_EXT	26
 
 /*
  * The current nfs file handle size for version 3 is currently 32 which is
@@ -420,9 +408,11 @@
 #define	NFS3_CURFHSIZE	32
 
 /*
- * This is the actual definition of a filehandle.  There is some dependence
- * on this layout in NFS-related code, particularly in the user-level lock
- * manager, so be careful about changing it.
+ * This is the actual definition of a legacy filehandle.  There is some
+ * dependence on this layout in NFS-related code, particularly in the
+ * user-level lock manager, so be careful about changing it.
+ *
+ * Currently NFSv2 and NFSv3 only use this structure.
  */
 
 struct svcfh {
@@ -436,6 +426,18 @@
 typedef struct svcfh fhandle_t;
 
 /*
+ * This is the actual definition of a extended filehandle.  This is currently
+ * only used for NFSv4.
+ */
+typedef struct fhandle_ext {
+	fsid_t	fhx_fsid;			/* filesystem id */
+	ushort_t fhx_len;			/* file number length */
+	char	fhx_data[NFS_FHMAXDATA_EXT];	/* and data */
+	ushort_t fhx_xlen;			/* export file number length */
+	char	fhx_xdata[NFS_FHMAXDATA_EXT];	/* and data */
+} fhandle_ext_t;
+
+/*
  * Arguments to remote write and writecache
  */
 /*
--- a/usr/src/uts/common/nfs/nfs4.h	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/nfs/nfs4.h	Mon Oct 31 22:53:57 2005 -0800
@@ -944,16 +944,16 @@
 #ifdef VOLATILE_FH_TEST
 
 struct nfs_fh4_fmt {
-	fhandle_t fh4_i;
-	uint32_t  fh4_flag;
-	uint32_t  fh4_volatile_id;
+	fhandle_ext_t	fh4_i;
+	uint32_t	fh4_flag;
+	uint32_t	fh4_volatile_id;
 };
 
 #else /* VOLATILE_FH_TEST */
 
 struct nfs_fh4_fmt {
-	fhandle_t fh4_i;
-	uint32_t  fh4_flag;
+	fhandle_ext_t	fh4_i;
+	uint32_t	fh4_flag;
 };
 
 #endif /* VOLATILE_FH_TEST */
@@ -961,11 +961,11 @@
 #define	FH4_NAMEDATTR	1
 #define	FH4_ATTRDIR	2
 
-#define	fh4_fsid	fh4_i.fh_fsid
-#define	fh4_len		fh4_i.fh_len 	/* fid length */
-#define	fh4_data	fh4_i.fh_data 	/* fid bytes */
-#define	fh4_xlen	fh4_i.fh_xlen
-#define	fh4_xdata	fh4_i.fh_xdata
+#define	fh4_fsid	fh4_i.fhx_fsid
+#define	fh4_len		fh4_i.fhx_len 	/* fid length */
+#define	fh4_data	fh4_i.fhx_data 	/* fid bytes */
+#define	fh4_xlen	fh4_i.fhx_xlen
+#define	fh4_xdata	fh4_i.fhx_xdata
 typedef struct nfs_fh4_fmt nfs_fh4_fmt_t;
 
 #define	fh4_to_fmt4(fh4p) ((nfs_fh4_fmt_t *)(fh4p)->nfs_fh4_val)
@@ -1286,6 +1286,9 @@
 extern uint_t nfs4_tsize(struct knetconfig *);
 extern uint_t rfs4_tsize(struct svc_req *);
 
+extern bool_t	xdr_inline_encode_nfs_fh4(uint32_t **, uint32_t *,
+			nfs_fh4_fmt_t *);
+
 #ifdef DEBUG
 extern int		rfs4_do_pre_op_attr;
 extern int		rfs4_do_post_op_attr;
--- a/usr/src/uts/common/nfs/nfs4_kprot.h	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/common/nfs/nfs4_kprot.h	Mon Oct 31 22:53:57 2005 -0800
@@ -20,7 +20,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1618,9 +1618,9 @@
 extern  bool_t xdr_nfstime4(XDR *, nfstime4 *);
 extern  bool_t xdr_settime4(XDR *, settime4 *);
 extern  bool_t xdr_COMPOUND4args_clnt(XDR *, COMPOUND4args_clnt *);
-extern  bool_t xdr_COMPOUND4args(XDR *, COMPOUND4args *);
+extern  bool_t xdr_COMPOUND4args_srv(XDR *, COMPOUND4args *);
 extern  bool_t xdr_COMPOUND4res_clnt(XDR *, COMPOUND4res_clnt *);
-extern  bool_t xdr_COMPOUND4res(XDR *, COMPOUND4res *);
+extern  bool_t xdr_COMPOUND4res_srv(XDR *, COMPOUND4res *);
 extern  bool_t xdr_CB_COMPOUND4args(XDR *, CB_COMPOUND4args *);
 extern  bool_t xdr_CB_COMPOUND4res(XDR *, CB_COMPOUND4res *);
 
--- a/usr/src/uts/intel/nfs/Makefile	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/intel/nfs/Makefile	Mon Oct 31 22:53:57 2005 -0800
@@ -22,7 +22,7 @@
 #
 # uts/intel/nfs/Makefile
 #
-# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
@@ -62,9 +62,15 @@
 #
 #	Overrides.
 #
+VOLFH_debug32	= -DVOLATILE_FH_TEST
+VOLFH_debug64	= -DVOLATILE_FH_TEST
+VOLFH_obj32	=
+VOLFH_obj64	=
+
 MODSTUBS_DIR	 = $(OBJS_DIR)
 $(MODSTUBS_O)	:= AS_CPPFLAGS += -DNFS_MODULE
 CLEANFILES	+= $(MODSTUBS_O)
+CFLAGS		+= $(VOLFH_$(OBJS_DIR))
 
 #
 #	Default build targets.
--- a/usr/src/uts/sparc/nfs/Makefile	Mon Oct 31 22:44:43 2005 -0800
+++ b/usr/src/uts/sparc/nfs/Makefile	Mon Oct 31 22:53:57 2005 -0800
@@ -21,7 +21,7 @@
 #
 #
 # uts/sparc/nfs/Makefile
-# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #ident	"%Z%%M%	%I%	%E% SMI"
@@ -61,10 +61,13 @@
 #
 #	Overrides.
 #
+VOLFH_debug64	= -DVOLATILE_FH_TEST
+VOLFH_obj64	=
+
 MODSTUBS_DIR	= $(OBJS_DIR)
 $(MODSTUBS_O)	:= AS_CPPFLAGS += -DNFS_MODULE
 CLEANFILES	+= $(MODSTUBS_O)
-CFLAGS		+= $(CCVERBOSE)
+CFLAGS		+= $(CCVERBOSE) $(VOLFH_$(OBJS_DIR))
 
 #
 #	Default build targets.