changeset 3055:e5701846929e

PSARC 2005/516 IPsec Tunnel Reform 4882852 tunnels vs. inverse acquire. 4970365 Support of ESP tunnel mode within Solaris 5027528 in.iked should be more intelligent about tunnel addresses 6180161 need to support multiple tunnels to a single nat 6208976 ipsecconf error messages make me think there are monsters under the bed 6313012 Clean up from removal of ipsec_inbound_debug_tag() 6351840 assertion failed: (ipha->ipha_protocol != 6) && (ipha->ipha_protocol != 17), ip.c, line: 15351 6359831 multicast tunnels don't get their IPsec policy checked. 6369094 ipseckey shouldn't accept/save-out encryption algorithm even it's none/any 6374560 ipseckey debug functions should be moved to libipsecutil 6374596 dump utilities need to be able to understand inner tunnel addresses and netmasks 6402781 Five dead declarations in IPsec code 6405338 spdsock leaks policy head references 6437366 NAT-OA payloads not processed early enough. 6465594 ipsec_policy_delete() uses wrong ipsec_selkey_t structure. 6467596 spdsock_ext_to_actvec() needs to reset "act" upon every SPD_ATTR_NEXT. 6470725 PF_POLICY shouldn't accept '0' for an algorithm value. 6475903 Outbound DROP rules are not enforced 6480815 INVERSE_ACQUIRE failures leak in in.iked 6482403 Race in in.iked, early door call vs. rest of initialization code 6482653 Don't accept UDP-encapsulated ESP on non-NAT SAs. 6487857 Post-ACQUIRE, AH+ESP packets misinitalized ipha/ip6
author danmcd
date Fri, 03 Nov 2006 07:10:24 -0800
parents cd724d2a9a13
children e86c65871ade
files usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c usr/src/cmd/cmd-inet/usr.sbin/ikeadm.c usr/src/cmd/cmd-inet/usr.sbin/ipsecconf.c usr/src/cmd/cmd-inet/usr.sbin/ipseckey.c usr/src/lib/libipsecutil/Makefile.com usr/src/lib/libipsecutil/common/ipsec_util.c usr/src/lib/libipsecutil/common/ipsec_util.h usr/src/lib/libipsecutil/common/mapfile-vers usr/src/uts/common/inet/ip.h usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip6.c usr/src/uts/common/inet/ip/ip_sadb.c usr/src/uts/common/inet/ip/ipdrop.c usr/src/uts/common/inet/ip/ipsecah.c usr/src/uts/common/inet/ip/ipsecesp.c usr/src/uts/common/inet/ip/keysock.c usr/src/uts/common/inet/ip/nattymod.c usr/src/uts/common/inet/ip/sadb.c usr/src/uts/common/inet/ip/spd.c usr/src/uts/common/inet/ip/spdsock.c usr/src/uts/common/inet/ip/tun.c usr/src/uts/common/inet/ipclassifier.h usr/src/uts/common/inet/ipdrop.h usr/src/uts/common/inet/ipsec_impl.h usr/src/uts/common/inet/ipsec_info.h usr/src/uts/common/inet/sadb.h usr/src/uts/common/inet/spdsock.h usr/src/uts/common/inet/tcp/tcp.c usr/src/uts/common/inet/tun.h usr/src/uts/common/net/if.h usr/src/uts/common/net/pfkeyv2.h usr/src/uts/common/net/pfpolicy.h
diffstat 32 files changed, 7937 insertions(+), 3482 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ifconfig/ifconfig.c	Fri Nov 03 07:10:24 2006 -0800
@@ -3026,16 +3026,21 @@
 	 */
 	assert(tparams->ifta_vers == IFTUN_VERSION);
 
-	ipsr = (ipsec_req_t *)(&tparams->ifta_secinfo);
-	if (ipsr->ipsr_ah_req & IPSEC_PREF_REQUIRED) {
-		(void) printf("ah (%s)  ",
-		    rparsealg(ipsr->ipsr_auth_alg, IPSEC_PROTO_AH));
-	}
-	if (ipsr->ipsr_esp_req & IPSEC_PREF_REQUIRED) {
-		(void) printf("esp (%s",
-		    rparsealg(ipsr->ipsr_esp_alg, IPSEC_PROTO_ESP));
-		(void) printf("/%s)",
-		    rparsealg(ipsr->ipsr_esp_auth_alg, IPSEC_PROTO_AH));
+	if (tparams->ifta_flags & IFTUN_COMPLEX_SECURITY) {
+		(void) printf("-->  use 'ipsecconf -ln -i %s'",
+		    tparams->ifta_lifr_name);
+	} else {
+		ipsr = (ipsec_req_t *)(&tparams->ifta_secinfo);
+		if (ipsr->ipsr_ah_req & IPSEC_PREF_REQUIRED) {
+			(void) printf("ah (%s)  ",
+			    rparsealg(ipsr->ipsr_auth_alg, IPSEC_PROTO_AH));
+		}
+		if (ipsr->ipsr_esp_req & IPSEC_PREF_REQUIRED) {
+			(void) printf("esp (%s",
+			    rparsealg(ipsr->ipsr_esp_alg, IPSEC_PROTO_ESP));
+			(void) printf("/%s)",
+			    rparsealg(ipsr->ipsr_esp_auth_alg, IPSEC_PROTO_AH));
+		}
 	}
 	(void) printf("\n");
 }
--- a/usr/src/cmd/cmd-inet/usr.sbin/ikeadm.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ikeadm.c	Fri Nov 03 07:10:24 2006 -0800
@@ -1563,10 +1563,10 @@
 	}
 
 	(void) printf("%s ", prefix);
-	(void) dump_sockaddr((struct sockaddr *)beg, B_TRUE, stdout);
+	(void) dump_sockaddr((struct sockaddr *)beg, 0, B_TRUE, stdout);
 	if (range) {
 		(void) printf(" - ");
-		(void) dump_sockaddr((struct sockaddr *)end, B_TRUE, stdout);
+		(void) dump_sockaddr((struct sockaddr *)end, 0, B_TRUE, stdout);
 	}
 	(void) printf("\n");
 
@@ -1592,7 +1592,7 @@
 		(void) printf(":\n");
 
 	(void) printf("%s ", prefix);
-	(void) dump_sockaddr((struct sockaddr *)sa, B_FALSE, stdout);
+	(void) dump_sockaddr((struct sockaddr *)sa, 0, B_FALSE, stdout);
 }
 
 static void
--- a/usr/src/cmd/cmd-inet/usr.sbin/ipsecconf.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipsecconf.c	Fri Nov 03 07:10:24 2006 -0800
@@ -64,6 +64,9 @@
  */
 #define	MAXLEN			1024
 
+/* Max length of tunnel interface string identifier */
+#define	TUNNAMEMAXLEN		LIFNAMSIZ
+
 /*
  * Used by parse_one and parse/parse_action to communicate
  * the errors. -1 is failure, which is not defined here.
@@ -110,10 +113,23 @@
 /* Types of Error messages */
 typedef enum error_type {BAD_ERROR, DUP_ERROR, REQ_ERROR} error_type_t;
 
+/* Error message human readable conversions */
+static char *sys_error_message(int);
+static void error_message(error_type_t, int, int);
+
 static int cmd;
 static char *filename;
 static char lo_buf[MAXLEN];			/* Leftover buffer */
 
+/*
+ * The new SPD_EXT_TUN_NAME extension has a tunnel name in it.  Use the empty
+ * string ("", stored in the char value "all_polheads") for all policy heads
+ * (global and all tunnels).  Set interface_name to NULL for global-only, or
+ * specify a name of an IP-in-IP tunnel.
+ */
+static char *interface_name;
+static char all_polheads;	/* So we can easily get "". */
+
 /* Error reporting stuff */
 #define	CBUF_LEN		4096		/* Maximum size of the cmd */
 /*
@@ -167,6 +183,7 @@
  */
 static struct hostent *shp, *dhp;
 static unsigned int splen, dplen;
+static char tunif[TUNNAMEMAXLEN];
 static boolean_t has_saprefix, has_daprefix;
 static uint32_t seq_cnt = 0;
 
@@ -239,6 +256,8 @@
 	uint8_t has_dmask;
 	uint8_t has_type;
 	uint8_t has_code;
+	uint8_t has_negotiate;
+	uint8_t has_tunnel;
 	uint16_t swap;
 
 	struct in6_addr	ips_src_addr_v6;
@@ -263,6 +282,11 @@
 	 * SPD_RULE_FLAG_OUTBOUND		0x0002
 	 */
 	uint8_t			ips_dir;
+	/*
+	 * Keep track of tunnel separately due to explosion of ways to set
+	 * inbound/outbound.
+	 */
+	boolean_t		ips_tunnel;
 	uint64_t		ips_policy_index;
 	uint32_t		ips_act_cnt;
 	ips_act_props_t	*ips_acts;
@@ -286,6 +310,8 @@
 } str_tval_t;
 
 static int	parse_int(const char *);
+static int	parse_index(const char *, char *);
+static int	attach_tunname(spd_if_t *);
 static void	usage(void);
 static int	ipsec_conf_del(int, boolean_t);
 static int	ipsec_conf_add(void);
@@ -298,8 +324,7 @@
 static int	parse_one(FILE *, act_prop_t *);
 static void	reconfigure();
 static void	in_prefixlentomask(unsigned int, uchar_t *);
-static unsigned int in_getprefixlen(char *);
-static int	in_masktoprefix(uint8_t *, boolean_t);
+static int	in_getprefixlen(char *);
 static int	parse_address(int, char *);
 #ifdef DEBUG_HEAVY
 static void	pfpol_msg_dump(spd_msg_t *msg, char *);
@@ -348,9 +373,12 @@
 #define	TOK_dir 	12
 #define	TOK_type	13
 #define	TOK_code	14
+#define	TOK_negotiate	15
+#define	TOK_tunnel	16
 
 #define	IPS_SA SPD_ATTR_END
 #define	IPS_DIR SPD_ATTR_EMPTY
+#define	IPS_NEG SPD_ATTR_NOP
 
 
 static str_tval_t pattern_table[] = {
@@ -375,6 +403,8 @@
 	{"dir",			TOK_dir,		IPS_DIR},
 	{"type",		TOK_type,		SPD_EXT_ICMP_TYPECODE},
 	{"code",		TOK_code,		SPD_EXT_ICMP_TYPECODE},
+	{"negotiate",		TOK_negotiate,		IPS_NEG},
+	{"tunnel",		TOK_tunnel,		SPD_EXT_TUN_NAME},
 	{NULL, 			0,				0},
 };
 
@@ -580,7 +610,7 @@
 		if (cnt < 0) {
 			err(-1, gettext("alglist failed: write"));
 		} else {
-			errx(-1, gettext("admin failed: short write"));
+			errx(-1, gettext("alglist failed: short write"));
 		}
 	}
 
@@ -934,6 +964,7 @@
 		struct spd_ext_actions *spd_ext_actions;
 		struct spd_attribute *ap;
 		struct spd_typecode *spd_typecode;
+		spd_if_t *spd_if;
 		ips_act_props_t *act_ptr;
 		uint32_t rule_priority = 0;
 
@@ -977,6 +1008,8 @@
 		spd_rule->spd_rule_len = SPD_8TO64(sizeof (struct spd_rule));
 		spd_rule->spd_rule_type = SPD_EXT_RULE;
 		spd_rule->spd_rule_flags = conf->ips_dir;
+		if (conf->ips_tunnel)
+			spd_rule->spd_rule_flags |= SPD_RULE_FLAG_TUNNEL;
 
 		next = (uint64_t *)&(spd_rule[1]);
 
@@ -990,6 +1023,18 @@
 			next = (uint64_t *)&(spd_proto[1]);
 		}
 
+		/* tunnel */
+		if (conf->has_tunnel != 0) {
+			spd_if = (spd_if_t *)next;
+			spd_if->spd_if_len =
+			    SPD_8TO64(P2ROUNDUP(strlen(tunif) + 1, 8) +
+			    sizeof (spd_if_t));
+			spd_if->spd_if_exttype = SPD_EXT_TUN_NAME;
+			(void) strlcpy((char *)spd_if->spd_if_name, tunif,
+				TUNNAMEMAXLEN);
+			next = (uint64_t *)(spd_if) + spd_if->spd_if_len;
+		}
+
 		/* icmp type/code */
 		if (conf->ips_ulp_prot == IPPROTO_ICMP ||
 		    conf->ips_ulp_prot == IPPROTO_ICMPV6) {
@@ -1159,7 +1204,7 @@
 
 
 static int
-send_pf_pol_message(int ipsec_cmd, ips_conf_t *conf)
+send_pf_pol_message(int ipsec_cmd, ips_conf_t *conf, int *diag)
 {
 	int retval;
 	int cnt;
@@ -1169,6 +1214,8 @@
 	spd_ext_t *exts[SPD_EXT_MAX+1];
 	int fd = get_pf_pol_socket();
 
+	*diag = 0;
+
 	if (fd < 0)
 		return (EBADF);
 
@@ -1201,16 +1248,17 @@
 #endif
 
 			if (cnt > 8 && return_buf->spd_msg_errno) {
-				int diag = return_buf->spd_msg_diagnostic;
+				*diag = return_buf->spd_msg_diagnostic;
 				if (!ipsecconf_qflag) {
 					warnx("%s: %s",
-					    gettext("spd_msg return"),
-					    strerror(
+					    gettext("Kernel returned"),
+					    sys_error_message(
 					    return_buf->spd_msg_errno));
 				}
-				if (diag != 0)
-					(void) printf("%s\n",
-					    spdsock_diag(diag));
+				if (*diag != 0)
+					(void) printf(gettext(
+					    "\t(spdsock diagnostic: %s)\n"),
+					    spdsock_diag(*diag));
 #ifdef DEBUG_HEAVY
 				pfpol_msg_dump((spd_msg_t *)polmsg.iov_base,
 				    "message in");
@@ -1277,8 +1325,16 @@
 		cmd = IPSEC_CONF_VIEW;
 		goto done;
 	}
-	while ((c = getopt(argc, argv, "nlfa:qd:r:")) != EOF) {
+	while ((c = getopt(argc, argv, "nlfLFa:qd:r:i:")) != EOF) {
 		switch (c) {
+		case 'F':
+			if (interface_name != NULL) {
+				usage();
+				exit(1);
+			}
+			/* Apply to all policy heads - global and tunnels. */
+			interface_name = &all_polheads;
+			/* FALLTHRU */
 		case 'f':
 			/* Only one command at a time */
 			if (cmd != 0) {
@@ -1287,6 +1343,14 @@
 			}
 			cmd = IPSEC_CONF_FLUSH;
 			break;
+		case 'L':
+			if (interface_name != NULL) {
+				usage();
+				exit(1);
+			}
+			/* Apply to all policy heads - global and tunnels. */
+			interface_name = &all_polheads;
+			/* FALLTHRU */
 		case 'l':
 			/* Only one command at a time */
 			if (cmd != 0) {
@@ -1296,8 +1360,8 @@
 			cmd = IPSEC_CONF_LIST;
 			break;
 		case 'a':
-			/* Only one command at a time */
-			if (cmd != 0) {
+			/* Only one command at a time, and no interface name */
+			if (cmd != 0 || interface_name != NULL) {
 				usage();
 				exit(1);
 			}
@@ -1305,13 +1369,16 @@
 			filename = optarg;
 			break;
 		case 'd':
-			/* Only one command at a time */
+			/*
+			 * Only one command at a time.  Interface name is
+			 * optional.
+			 */
 			if (cmd != 0) {
 				usage();
 				exit(1);
 			}
 			cmd = IPSEC_CONF_DEL;
-			index = parse_int(optarg);
+			index = parse_index(optarg, NULL);
 			break;
 		case 'n' :
 			ipsecconf_nflag++;
@@ -1320,14 +1387,27 @@
 			ipsecconf_qflag++;
 			break;
 		case 'r' :
-			/* only one command at a time */
-			if (cmd != 0) {
+			/* Only one command at a time, and no interface name */
+			if (cmd != 0 || interface_name != NULL) {
 				usage();
 				exit(1);
 			}
 			cmd = IPSEC_CONF_SUB;
 			filename = optarg;
 			break;
+		case 'i':
+			if (interface_name != NULL) {
+				warnx(
+				    gettext("Interface name already selected"));
+				exit(1);
+			}
+			interface_name = optarg;
+			/* Check for some cretin using the all-polheads name. */
+			if (strlen(optarg) == 0) {
+				usage();
+				exit(1);
+			}
+			break;
 		default :
 			usage();
 			exit(1);
@@ -1366,6 +1446,10 @@
 		(void) restore_all_signals();
 		break;
 	case IPSEC_CONF_VIEW:
+		if (interface_name != NULL) {
+			warnx(gettext("Cannot view for one interface only.\n"));
+			exit(1);
+		}
 		ret = ipsec_conf_view();
 		break;
 	case IPSEC_CONF_DEL:
@@ -1629,135 +1713,11 @@
 		(void) printf("%d ", port);
 }
 
-#if 0
 /*
- * Print the mask (source or destination depending on the specified type)
- * defined in the policy pointed to by cptr.
- * We follow ifconfig's lead, i.e. we use the decimal dot notation for IPv4
- * masks and the /N prefix length form for IPv6.
- */
-static void
-print_mask(ips_conf_t *cptr, int type)
-{
-	struct in_addr addr;
-	struct in6_addr addr6;
-	struct in_addr mask;
-	char buf[INET6_ADDRSTRLEN];
-	boolean_t isv4;
-	struct in6_addr *in_addr_ptr;
-	struct in6_addr *in_mask_ptr;
-
-	if (type == IPS_SRC_MASK) {
-		in_addr_ptr = &cptr->ips_src_addr_v6;
-		in_mask_ptr = &cptr->ips_src_mask_v6;
-	} else {
-		in_addr_ptr = &cptr->ips_dst_addr_v6;
-		in_mask_ptr = &cptr->ips_dst_mask_v6;
-	}
-
-	isv4 = cptr->ips_isv4;
-
-	/*
-	 * If the address is INADDR_ANY, don't print the mask.
-	 */
-	if (isv4) {
-		IN6_V4MAPPED_TO_INADDR(in_addr_ptr, &addr);
-		if (addr.s_addr == INADDR_ANY)
-			return;
-	} else {
-		addr6 = *in_addr_ptr;
-		if (IN6_IS_ADDR_UNSPECIFIED(&addr6))
-			return;
-	}
-
-	if (isv4) {
-		(void) printf(" ");
-		print_pattern_string(type);
-		IN6_V4MAPPED_TO_INADDR(in_mask_ptr, &mask);
-		(void) printf("%s ",
-		    inet_ntop(AF_INET, (uchar_t *)&mask.s_addr, buf,
-		    INET6_ADDRSTRLEN));
-	} else {
-		(void) printf("/%d ",
-		    in_masktoprefix((uint8_t *)&in_mask_ptr->s6_addr, B_FALSE));
-	}
-}
-
-/*
- * Print the address and mask.
+ * Print the address, given as "raw" input via the void pointer.
  */
 static void
-print_address(ips_conf_t *cptr, int type)
-{
-	char  *cp;
-	struct hostent *hp;
-	char	domain[MAXHOSTNAMELEN + 1];
-	struct in_addr addr;
-	struct in6_addr addr6;
-	char abuf[INET6_ADDRSTRLEN];
-	int error_num;
-	struct in6_addr *in_addr_ptr;
-	uchar_t *addr_ptr;
-	sa_family_t af;
-	int addr_len;
-
-	if (type == SPD_EXT_LCLADDR)
-		in_addr_ptr = &cptr->ips_src_addr_v6;
-	else
-		in_addr_ptr = &cptr->ips_dst_addr_v6;
-
-	if (cptr->ips_isv4) {
-		af = AF_INET;
-		/* we don't print unspecified addresses */
-		IN6_V4MAPPED_TO_INADDR(in_addr_ptr, &addr);
-		if (addr.s_addr == INADDR_ANY)
-			return;
-		addr_ptr = (uchar_t *)&addr.s_addr;
-		addr_len = IPV4_ADDR_LEN;
-	} else {
-		af = AF_INET6;
-		addr6 = *in_addr_ptr;
-		/* we don't print unspecified addresses */
-		if (IN6_IS_ADDR_UNSPECIFIED(&addr6))
-			return;
-		addr_ptr = (uchar_t *)&addr6.s6_addr;
-		addr_len = sizeof (struct in6_addr);
-	}
-
-	print_pattern_string(type);
-
-	if (!ipsecconf_nflag) {
-		if (sysinfo(SI_HOSTNAME, domain, MAXHOSTNAMELEN) != -1 &&
-			(cp = strchr(domain, '.')) != NULL) {
-			(void) strcpy(domain, cp + 1);
-		} else {
-			domain[0] = 0;
-		}
-		hp = getipnodebyaddr(addr_ptr, addr_len, af, &error_num);
-		if (hp == NULL)
-			cp = NULL;
-		else {
-			if ((cp = strchr(hp->h_name, '.')) != 0 &&
-					strcasecmp(cp + 1, domain) == 0)
-				*cp = 0;
-			cp = hp->h_name;
-		}
-	}
-
-	if (cp) {
-		(void) printf("%s", cp);
-	} else {
-		(void) printf("%s", inet_ntop(af, addr_ptr, abuf,
-		    INET6_ADDRSTRLEN));
-	}
-}
-#endif
-
-/*
- * Print the address and mask.
- */
-static void
-print_address2(void *input, int isv4)
+print_raw_address(void *input, boolean_t isv4)
 {
 	char  *cp;
 	struct hostent *hp;
@@ -1869,10 +1829,16 @@
 {
 	int ret;
 	int pfd;
-	struct spd_msg msg;
+	struct spd_msg *msg;
 	int cnt;
 	spd_msg_t *rmsg;
 	spd_ext_t *exts[SPD_EXT_MAX+1];
+	/*
+	 * Add an extra 8 bytes of space (+1 uint64_t) to avoid truncation
+	 * issues.
+	 */
+	uint64_t buffer[
+	    SPD_8TO64(sizeof (*msg) + sizeof (spd_if_t) + LIFNAMSIZ) + 1];
 
 	pfd = get_pf_pol_socket();
 
@@ -1881,12 +1847,15 @@
 		return (-1);
 	}
 
-	(void) memset(&msg, 0, sizeof (msg));
-	msg.spd_msg_version = PF_POLICY_V1;
-	msg.spd_msg_type = SPD_DUMP;
-	msg.spd_msg_len = SPD_8TO64(sizeof (msg));
-
-	cnt = write(pfd, &msg, sizeof (msg));
+	(void) memset(buffer, 0, sizeof (buffer));
+	msg = (struct spd_msg *)buffer;
+	msg->spd_msg_version = PF_POLICY_V1;
+	msg->spd_msg_type = SPD_DUMP;
+	msg->spd_msg_len = SPD_8TO64(sizeof (*msg));
+
+	msg->spd_msg_len += attach_tunname((spd_if_t *)(msg + 1));
+
+	cnt = write(pfd, msg, SPD_64TO8(msg->spd_msg_len));
 
 	if (cnt < 0) {
 		warn(gettext("dump: invalid write() return"));
@@ -1899,7 +1868,8 @@
 	if (rmsg == NULL || rmsg->spd_msg_errno != 0) {
 		warnx("%s: %s", gettext("ruleset dump failed"),
 		    (rmsg == NULL ?
-			gettext("read error") : strerror(rmsg->spd_msg_errno)));
+			gettext("read error") :
+			sys_error_message(rmsg->spd_msg_errno)));
 		(void) close(pfd);
 		return (-1);
 	}
@@ -1916,7 +1886,7 @@
 
 		if (rmsg->spd_msg_errno != 0) {
 			warnx("%s: %s", gettext("dump read: bad message"),
-			    strerror(rmsg->spd_msg_errno));
+			    sys_error_message(rmsg->spd_msg_errno));
 			(void) close(pfd);
 			return (-1);
 		}
@@ -1927,7 +1897,7 @@
 			if (strlen(spdsock_diag_buf) != 0)
 				warnx(spdsock_diag_buf);
 			warnx("%s: %s", gettext("dump read: bad message"),
-			    strerror(rmsg->spd_msg_errno));
+			    sys_error_message(rmsg->spd_msg_errno));
 			(void) close(pfd);
 			return (ret);
 		}
@@ -1997,6 +1967,7 @@
 	struct spd_ext_actions *spd_ext_actions;
 	struct spd_typecode *spd_typecode;
 	struct spd_attribute *app;
+	spd_if_t *spd_if;
 	uint32_t rv;
 	uint16_t act_count;
 
@@ -2004,8 +1975,16 @@
 	    SPDSOCK_DIAG_BUF_LEN);
 
 	if (rv == KGE_OK && exts[SPD_EXT_RULE] != NULL) {
+		spd_if = (spd_if_t *)exts[SPD_EXT_TUN_NAME];
 		spd_rule = (struct spd_rule *)exts[SPD_EXT_RULE];
-		(void) printf("%s %lld\n", INDEX_TAG, spd_rule->spd_rule_index);
+		if (spd_if == NULL) {
+			(void) printf("%s %lld\n", INDEX_TAG,
+			    spd_rule->spd_rule_index);
+		} else {
+			(void) printf("%s %s,%lld\n", INDEX_TAG,
+			    (char *)spd_if->spd_if_name,
+			    spd_rule->spd_rule_index);
+		}
 	} else {
 		if (strlen(spdsock_diag_buf) != 0)
 			warnx(spdsock_diag_buf);
@@ -2015,6 +1994,13 @@
 
 	(void) printf("%c ", CURL_BEGIN);
 
+	if (spd_if != NULL) {
+		(void) printf("tunnel %s negotiate %s ",
+		    (char *)spd_if->spd_if_name,
+		    (spd_rule->spd_rule_flags & SPD_RULE_FLAG_TUNNEL) ?
+		    "tunnel" : "transport");
+	}
+
 	if (exts[SPD_EXT_PROTO] != NULL) {
 		spd_proto = (struct spd_proto *)exts[SPD_EXT_PROTO];
 		print_ulp(spd_proto->spd_proto_number);
@@ -2024,11 +2010,8 @@
 		spd_address = (spd_address_t *)exts[SPD_EXT_LCLADDR];
 
 		(void) printf("laddr ");
-		if (spd_address->spd_address_len == 2)
-			print_address2((spd_address+1), 1);
-		else
-			print_address2((spd_address+1), 0);
-
+		print_raw_address((spd_address + 1),
+		    (spd_address->spd_address_len == 2));
 		(void) printf("/%d ", spd_address->spd_address_prefixlen);
 	}
 
@@ -2045,11 +2028,8 @@
 		spd_address = (spd_address_t *)exts[SPD_EXT_REMADDR];
 
 		(void) printf("raddr ");
-		if (spd_address->spd_address_len == 2)
-			print_address2((spd_address+1), 1);
-		else
-			print_address2((spd_address+1), 0);
-
+		print_raw_address((spd_address + 1),
+		    (spd_address->spd_address_len == 2));
 		(void) printf("/%d ", spd_address->spd_address_prefixlen);
 	}
 
@@ -2199,6 +2179,7 @@
 	struct spd_typecode *spd_typecode;
 	struct spd_ext_actions *spd_ext_actions;
 	struct spd_attribute *app;
+	spd_if_t *spd_if;
 	char abuf[INET6_ADDRSTRLEN];
 	uint32_t rv;
 	uint16_t act_count;
@@ -2226,6 +2207,11 @@
 		}
 
 		switch (i) {
+		case SPD_EXT_TUN_NAME:
+			spd_if = (spd_if_t *)exts[i];
+			(void) printf("spd_if = %s\n", spd_if->spd_if_name);
+			break;
+
 		case SPD_EXT_ICMP_TYPECODE:
 			spd_typecode = (struct spd_typecode *)exts[i];
 			(void) printf("icmp type %d-%d code %d-%d\n",
@@ -2447,6 +2433,7 @@
 	int ret = 0;
 	int offset, prev_offset;
 	int nlines;
+	char lifname[LIFNAMSIZ];
 
 	if (act_props == NULL) {
 		warn(gettext("memory"));
@@ -2476,13 +2463,15 @@
 		 */
 		buf = ibuf + index_len;
 		buf++;			/* Skip the space */
-		index = parse_int(buf);
+		index = parse_index(buf, lifname);
 		if (index == -1) {
 			warnx(gettext("Invalid index in the file"));
 			free(act_props);
 			return (-1);
 		}
-		if (index == policy_index) {
+		if (index == policy_index &&
+		    (interface_name == NULL ||
+			strncmp(interface_name, lifname, LIFNAMSIZ) == 0)) {
 			if (!ignore_spd) {
 				ret = parse_one(fp, act_props);
 				if (ret == -1) {
@@ -2554,15 +2543,20 @@
 	struct spd_msg *msg;
 	struct spd_rule *rule;
 	int sfd = socket(PF_POLICY, SOCK_RAW, PF_POLICY_V1);
-	int cnt;
+	int cnt, len, alloclen;
 
 	if (sfd < 0) {
 		warn(gettext("unable to open policy socket"));
 		return (-1);
 	}
 
-	msg = (spd_msg_t *)malloc(sizeof (spd_msg_t)
-	    + sizeof (struct spd_rule));
+	/*
+	 * Add an extra 8 bytes of space (+1 uint64_t) to avoid truncation
+	 * issues.
+	 */
+	alloclen = sizeof (spd_msg_t) + sizeof (struct spd_rule) +
+	    sizeof (spd_if_t) + LIFNAMSIZ + 8;
+	msg = (spd_msg_t *)malloc(alloclen);
 
 	if (msg == NULL) {
 		warn("malloc");
@@ -2571,7 +2565,7 @@
 
 	rule = (struct spd_rule *)(msg + 1);
 
-	(void) memset(msg, 0, sizeof (spd_msg_t) + sizeof (struct spd_rule));
+	(void) memset(msg, 0, alloclen);
 	msg->spd_msg_version = PF_POLICY_V1;
 	msg->spd_msg_type = SPD_DELETERULE;
 	msg->spd_msg_len = SPD_8TO64(sizeof (spd_msg_t)
@@ -2581,10 +2575,12 @@
 	rule->spd_rule_len = SPD_8TO64(sizeof (struct spd_rule));
 	rule->spd_rule_index = index;
 
-	cnt = write(sfd, msg,
-	    sizeof (spd_msg_t) + sizeof (struct spd_rule));
-
-	if (cnt != sizeof (spd_msg_t) + sizeof (struct spd_rule)) {
+	msg->spd_msg_len += attach_tunname((spd_if_t *)(rule + 1));
+
+	len = SPD_64TO8(msg->spd_msg_len);
+	cnt = write(sfd, msg, len);
+
+	if (cnt != len) {
 		if (cnt < 0) {
 			(void) close(sfd);
 			free(msg);
@@ -2598,9 +2594,8 @@
 		}
 	}
 
-	cnt = read(sfd, msg,
-	    sizeof (spd_msg_t) + sizeof (struct spd_rule));
-	if (cnt != sizeof (spd_msg_t) + sizeof (struct spd_rule)) {
+	cnt = read(sfd, msg, len);
+	if (cnt != len) {
 		if (cnt < 0) {
 			(void) close(sfd);
 			free(msg);
@@ -2628,23 +2623,33 @@
 static int
 ipsec_conf_flush(int db)
 {
-	int pfd, cnt;
+	int pfd, cnt, len;
 	int sfd = socket(PF_POLICY, SOCK_RAW, PF_POLICY_V1);
-	struct spd_msg msg;
+	struct spd_msg *msg;
+	/*
+	 * Add an extra 8 bytes of space (+1 uint64_t) to avoid truncation
+	 * issues.
+	 */
+	uint64_t buffer[
+	    SPD_8TO64(sizeof (*msg) + sizeof (spd_if_t) + LIFNAMSIZ) + 1];
 
 	if (sfd < 0) {
 		warn(gettext("unable to open policy socket"));
 		return (-1);
 	}
 
-	(void) memset(&msg, 0, sizeof (msg));
-	msg.spd_msg_version = PF_POLICY_V1;
-	msg.spd_msg_type = SPD_FLUSH;
-	msg.spd_msg_len = SPD_8TO64(sizeof (msg));
-	msg.spd_msg_spdid = db;
-
-	cnt = write(sfd, &msg, sizeof (msg));
-	if (cnt != sizeof (msg)) {
+	(void) memset(buffer, 0, sizeof (buffer));
+	msg = (struct spd_msg *)buffer;
+	msg->spd_msg_version = PF_POLICY_V1;
+	msg->spd_msg_type = SPD_FLUSH;
+	msg->spd_msg_len = SPD_8TO64(sizeof (*msg));
+	msg->spd_msg_spdid = db;
+
+	msg->spd_msg_len += attach_tunname((spd_if_t *)(msg + 1));
+
+	len = SPD_64TO8(msg->spd_msg_len);
+	cnt = write(sfd, msg, len);
+	if (cnt != len) {
 		if (cnt < 0) {
 			warn(gettext("Flush failed: write"));
 			return (-1);
@@ -2654,8 +2659,8 @@
 		}
 	}
 
-	cnt = read(sfd, &msg, sizeof (msg));
-	if (cnt != sizeof (msg)) {
+	cnt = read(sfd, msg, len);
+	if (cnt != len) {
 		if (cnt < 0) {
 			warn(gettext("Flush failed: read"));
 			return (-1);
@@ -2665,9 +2670,9 @@
 		}
 	}
 	(void) close(sfd);
-	if (msg.spd_msg_errno != 0) {
+	if (msg->spd_msg_errno != 0) {
 		warnx("%s: %s", gettext("Flush failed: SPD_FLUSH"),
-		    strerror(msg.spd_msg_errno));
+		    sys_error_message(msg->spd_msg_errno));
 		return (-1);
 	}
 
@@ -2693,25 +2698,38 @@
 	return (0);
 }
 
-/* function to send SPD_FLIP and SPD_CLONE messages */
+/*
+ * function to send SPD_FLIP and SPD_CLONE messages
+ * Do it for ALL polheads for simplicity's sake.
+ */
 static void
 ipsec_conf_admin(uint8_t type)
 {
 	int cnt;
 	int sfd = socket(PF_POLICY, SOCK_RAW, PF_POLICY_V1);
-	struct spd_msg msg;
+	struct spd_msg *msg;
+	uint64_t buffer[
+	    SPD_8TO64(sizeof (struct spd_msg) + sizeof (spd_if_t))];
+	char *save_ifname;
 
 	if (sfd < 0) {
 		err(-1, gettext("unable to open policy socket"));
 	}
 
-	(void) memset(&msg, 0, sizeof (msg));
-	msg.spd_msg_version = PF_POLICY_V1;
-	msg.spd_msg_type = type;
-	msg.spd_msg_len = SPD_8TO64(sizeof (msg));
-
-	cnt = write(sfd, &msg, sizeof (msg));
-	if (cnt != sizeof (msg)) {
+	(void) memset(buffer, 0, sizeof (buffer));
+	msg = (struct spd_msg *)buffer;
+	msg->spd_msg_version = PF_POLICY_V1;
+	msg->spd_msg_type = type;
+	msg->spd_msg_len = SPD_8TO64(sizeof (buffer));
+
+	save_ifname = interface_name;
+	/* Apply to all policy heads - global and tunnels. */
+	interface_name = &all_polheads;
+	(void) attach_tunname((spd_if_t *)(msg + 1));
+	interface_name = save_ifname;
+
+	cnt = write(sfd, msg, sizeof (buffer));
+	if (cnt != sizeof (buffer)) {
 		if (cnt < 0) {
 			err(-1, gettext("admin failed: write"));
 		} else {
@@ -2719,8 +2737,8 @@
 		}
 	}
 
-	cnt = read(sfd, &msg, sizeof (msg));
-	if (cnt != sizeof (msg)) {
+	cnt = read(sfd, msg, sizeof (buffer));
+	if (cnt != sizeof (buffer)) {
 		if (cnt < 0) {
 			err(-1, gettext("admin failed: read"));
 		} else {
@@ -2728,8 +2746,8 @@
 		}
 	}
 	(void) close(sfd);
-	if (msg.spd_msg_errno != 0) {
-		errno = msg.spd_msg_errno;
+	if (msg->spd_msg_errno != 0) {
+		errno = msg->spd_msg_errno;
 		err(-1, gettext("admin failed"));
 	}
 }
@@ -2746,12 +2764,15 @@
 usage(void)
 {
 	(void) fprintf(stderr, gettext(
-		"Usage:	ipsecconf\n"
-		"\tipsecconf -a ([-]|<filename>) [-q]\n"
-		"\tipsecconf -r ([-]|<filename>) [-q]\n"
-		"\tipsecconf -d <index>\n"
-		"\tipsecconf -l [-n]\n"
-		"\tipsecconf -f\n"));
+	"Usage:	ipsecconf\n"
+	"\tipsecconf -a ([-]|<filename>) [-q]\n"
+	"\tipsecconf -r ([-]|<filename>) [-q]\n"
+	"\tipsecconf -d [-i tunnel-interface] <index>\n"
+	"\tipsecconf -d <tunnel-interface,index>\n"
+	"\tipsecconf -l [-n] [-i tunnel-interface]\n"
+	"\tipsecconf -f [-i tunnel-interface]\n"
+	"\tipsecconf -L [-n]\n"
+	"\tipsecconf -F\n"));
 }
 
 /*
@@ -2831,58 +2852,72 @@
 }
 
 /*
- * Convert a mask to a prefix length.
- * Returns prefix length on success, 0 otherwise.
+ * Parses <interface>,<index>.  Sets iname or the global interface_name (if
+ * iname == NULL) to <interface> and returns <index>.  Calls exit() if we have
+ * an interface_name already set.
  */
-static unsigned int
-in_getprefixlen(char *mask)
+static int
+parse_index(const char *str, char *iname)
 {
-	long prefixlen;
-	char *end;
-
-	prefixlen = strtol(mask, &end, 10);
-	if (prefixlen < 0) {
-		return (0);
+	char *intf, *num, *copy;
+	int rc;
+
+	copy = strdup(str);
+	if (copy == NULL) {
+		warnx(gettext("Out of memory"));
+		exit(1);
+	}
+
+	intf = strtok(copy, ",");
+	/* Just want the rest of the string unmolested, so use "" for arg2. */
+	num = strtok(NULL, "");
+	if (num == NULL) {
+		/* No comma found, just parse it like an int. */
+		free(copy);
+		return (parse_int(str));
 	}
-	if (mask == end) {
-		return (0);
+
+	if (iname != NULL) {
+		(void) strlcpy(iname, intf, LIFNAMSIZ);
+	} else {
+		if (interface_name != NULL) {
+			warnx(gettext("Interface name already selected"));
+			exit(1);
+		}
+
+		interface_name = strdup(intf);
+		if (interface_name == NULL) {
+			warnx(gettext("Out of memory"));
+			exit(1);
+		}
 	}
-	if (*end != '\0') {
-		return (0);
-	}
-	return ((unsigned int)prefixlen);
+
+	rc = parse_int(num);
+	free(copy);
+	return (rc);
 }
 
 /*
- * Convert an IPv6 mask to a prefix len.  I assume all IPv6 masks are
- * contiguous, so I stop at the first bit!
+ * Convert a mask to a prefix length.
+ * Returns prefix length on success, -1 otherwise.
  */
 static int
-in_masktoprefix(uint8_t *mask, boolean_t is_v4mapped)
+in_getprefixlen(char *mask)
 {
-	int rc = 0;
-	uint8_t last;
-	int limit = IPV6_ABITS;
-
-	if (is_v4mapped) {
-		mask += ((IPV6_ABITS - IP_ABITS)/8);
-		limit = IP_ABITS;
+	int prefixlen;
+	char *end;
+
+	prefixlen = (int)strtol(mask, &end, 10);
+	if (prefixlen < 0) {
+		return (-1);
 	}
-
-	while (*mask == 0xff) {
-		rc += 8;
-		if (rc == limit)
-			return (limit);
-		mask++;
+	if (mask == end) {
+		return (-1);
 	}
-
-	last = *mask;
-	while (last != 0) {
-		rc++;
-		last = (last << 1) & 0xff;
+	if (*end != '\0') {
+		return (-1);
 	}
-
-	return (rc);
+	return (prefixlen);
 }
 
 /*
@@ -2908,7 +2943,7 @@
 parse_address(int type, char *addr_str)
 {
 	char *ptr;
-	unsigned int prefix_len = 0;
+	int prefix_len = 0;
 	struct netent *ne = NULL;
 	struct hostent *hp = NULL;
 	int h_errno;
@@ -2923,7 +2958,7 @@
 		*ptr++ = NULL;
 
 		prefix_len = in_getprefixlen(ptr);
-		if (prefix_len == 0)
+		if (prefix_len < 0)
 			return (-1);
 	}
 
@@ -3002,7 +3037,7 @@
 static int
 do_port_adds(ips_conf_t *cptr)
 {
-	int ret;
+	int ret, diag;
 
 	assert(IN6_IS_ADDR_UNSPECIFIED(&cptr->ips_src_addr_v6));
 	assert(IN6_IS_ADDR_UNSPECIFIED(&cptr->ips_dst_addr_v6));
@@ -3011,13 +3046,13 @@
 	(void) dump_conf(cptr);
 #endif
 
-	ret = send_pf_pol_message(SPD_ADDRULE, cptr);
+	ret = send_pf_pol_message(SPD_ADDRULE, cptr, &diag);
 	if (ret != 0 && !ipsecconf_qflag) {
 		warnx(
-		    gettext("Could not add IPv4 policy for sport %d, dport %d"),
+		    gettext("Could not add IPv4 policy for sport %d, dport %d "
+			"- diagnostic %d - %s"),
 		    ntohs(cptr->ips_src_port_min),
-		    ntohs(cptr->ips_dst_port_min));
-
+		    ntohs(cptr->ips_dst_port_min), diag, spdsock_diag(diag));
 	}
 
 	return (ret);
@@ -3058,7 +3093,7 @@
 	struct in_addr mask_v4;
 
 	if (hp->h_addr_list[1] != NULL) {
-		return (EINVAL);
+		return (EOPNOTSUPP);
 	}
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(mask_v6)) {
@@ -3102,7 +3137,7 @@
  * cases.
  */
 static int
-do_address_adds(ips_conf_t *cptr)
+do_address_adds(ips_conf_t *cptr, int *diag)
 {
 	int i, j;
 	int ret = 0;	/* For ioctl() call. */
@@ -3184,7 +3219,7 @@
 				    isv4);
 			}
 
-			ret = send_pf_pol_message(SPD_ADDRULE, cptr);
+			ret = send_pf_pol_message(SPD_ADDRULE, cptr, diag);
 
 			if (ret == 0) {
 				add_count++;
@@ -3383,8 +3418,7 @@
 	 * Make sure that we get a null terminated string.
 	 * For a bad input, we truncate at VALID_ALG_LEN.
 	 */
-	(void) strncpy(tstr, str, VALID_ALG_LEN - 1);
-	tstr[VALID_ALG_LEN - 1] = '\0';
+	(void) strlcpy(tstr, str, VALID_ALG_LEN);
 	lens = strtok(tstr, "()");
 	lens = strtok(NULL, "()");
 
@@ -3456,6 +3490,27 @@
 	return (0);
 }
 
+static char *
+sys_error_message(int syserr)
+{
+	char *mesg;
+
+	switch (syserr) {
+	case EEXIST:
+		mesg = gettext("Entry already exists");
+		break;
+	case ENOENT:
+		mesg = gettext("Tunnel not found");
+		break;
+	case EINVAL:
+		mesg = gettext("Invalid entry");
+		break;
+	default :
+		mesg = strerror(syserr);
+	}
+	return (mesg);
+}
+
 static void
 error_message(error_type_t error, int type, int line)
 {
@@ -3504,6 +3559,12 @@
 	case IPSEC_CONF_ICMP_CODE:
 		mesg = gettext("ICMP code");
 		break;
+	case IPSEC_CONF_NEGOTIATE:
+		mesg = gettext("Negotiate");
+		break;
+	case IPSEC_CONF_TUNNEL:
+		mesg = gettext("Tunnel");
+		break;
 	default :
 		return;
 	}
@@ -3536,7 +3597,7 @@
 		return (0);
 	}
 	if (!is_alg) {
-		warnx(gettext("No IPSEC algorithms given"));
+		warnx(gettext("No IPsec algorithms given"));
 		return (-1);
 	}
 	if (cptr->iap_attr == 0) {
@@ -3734,8 +3795,9 @@
 				 * If we never read a newline character,
 				 * we don't want to print 0.
 				 */
-				warnx(gettext("Bad start on line %d :"),
-				(linecount == 0) ? 1 : linecount);
+				warnx(gettext("line %d : line must start "
+				    "with \"{\" character"),
+				    (linecount == 0) ? 1 : linecount);
 				return (-1);
 			}
 			buf++;
@@ -3933,7 +3995,7 @@
 				 * character, we don't want
 				 * to print 0.
 				 */
-				warnx(gettext("(parse one)"
+				warnx(gettext("(parsing one command)"
 				    "Invalid action on line %d: %s"),
 				    (linecount == 0) ? 1 : linecount,
 				    act_props->ap[ap_num].act);
@@ -4519,7 +4581,64 @@
 			cptr->ips_icmp_code = (uint8_t)code;
 			cptr->ips_icmp_code_end = (uint8_t)code_end;
 			break;
+		case TOK_tunnel:
+			if (cptr->has_tunnel == 1) {
+				error_message(BAD_ERROR,
+				    IPSEC_CONF_TUNNEL, line_no);
+				return (-1);
+			}
+			i++, line_no++;
+			if (act_props->pattern[i] == NULL) {
+				error_message(BAD_ERROR,
+				    IPSEC_CONF_TUNNEL, line_no);
+				return (-1);
+			}
+
+			if (strlcpy(tunif, act_props->pattern[i],
+			    TUNNAMEMAXLEN) >= TUNNAMEMAXLEN) {
+				error_message(BAD_ERROR,
+				    IPSEC_CONF_TUNNEL, line_no);
+				return (-1);
+			}
+			cptr->has_tunnel = 1;
+			break;
+		case TOK_negotiate:
+			if (cptr->has_negotiate == 1) {
+				error_message(BAD_ERROR,
+				    IPSEC_CONF_NEGOTIATE, line_no);
+				return (-1);
+			}
+			i++, line_no++;
+			if (act_props->pattern[i] == NULL) {
+				error_message(BAD_ERROR,
+				    IPSEC_CONF_NEGOTIATE, line_no);
+				return (-1);
+			}
+
+			if (strncmp(act_props->pattern[i], "tunnel", 6) == 0) {
+				cptr->ips_tunnel = B_TRUE;
+			} else if (strncmp(
+			    act_props->pattern[i], "transport", 9) != 0) {
+				error_message(BAD_ERROR,
+				    IPSEC_CONF_NEGOTIATE, line_no);
+				return (-1);
+			}
+			cptr->has_negotiate = 1;
+			break;
 		}
+
+	}
+
+	/* Sanity check that certain tokens occur together */
+	if (cptr->has_tunnel + cptr->has_negotiate == 1) {
+		if (cptr->has_negotiate == 0) {
+			error_message(REQ_ERROR, IPSEC_CONF_NEGOTIATE, line_no);
+		} else {
+			error_message(REQ_ERROR, IPSEC_CONF_TUNNEL, line_no);
+		}
+		errx(1, gettext(
+		    "tunnel and negotiate tokens must occur together"));
+		return (-1);
 	}
 
 	/*
@@ -4943,7 +5062,7 @@
 	act_prop_t *act_props = malloc(sizeof (act_prop_t));
 	ips_conf_t conf;
 	FILE *fp, *policy_fp;
-	int ret, i, j;
+	int ret, i, j, diag;
 	char *warning = gettext(
 		"\tWARNING : New policy entries that are being added may\n "
 		"\taffect the existing connections. Existing connections\n"
@@ -5026,7 +5145,7 @@
 				goto bail;
 			}
 		} else {
-			ret = do_address_adds(&conf);
+			ret = do_address_adds(&conf, &diag);
 			switch (ret) {
 			case 0:
 				/* no error. */
@@ -5039,7 +5158,21 @@
 					"Can't set mask and /NN prefix."));
 				ret = -1;
 				break;
+			case ENOENT:
+				warnx(gettext("Cannot find tunnel "
+				    "interface %s."), interface_name);
+				ret = -1;
+				break;
 			case EINVAL:
+				/*
+				 * PF_POLICY didn't like what we sent.  We
+				 * can't check all input up here, but we
+				 * do in-kernel.
+				 */
+				warnx(gettext("PF_POLICY invalid input:\n\t%s"),
+				    spdsock_diag(diag));
+				break;
+			case EOPNOTSUPP:
 				warnx(gettext("Can't set /NN"
 					" prefix on multi-host name."));
 				ret = -1;
@@ -5248,7 +5381,7 @@
 			if (strncasecmp(pbuf, INDEX_TAG, index_len) == 0) {
 				buf = pbuf + index_len;
 				buf++;
-				if ((pindex = parse_int(buf)) == -1) {
+				if ((pindex = parse_index(buf, NULL)) == -1) {
 					/* bad index, we can't continue */
 					warnx(gettext(
 						"Invalid index in the file"));
@@ -5333,6 +5466,9 @@
 		/* reset the globals */
 		linecount = 0;
 		pindex = 0;
+		/* free(NULL) also works. */
+		free(interface_name);
+		interface_name = NULL;
 
 		/* reopen for next pass, automagically starting over. */
 		policy_fp = fopen(POLICY_CONF_FILE, "r");
@@ -5360,3 +5496,24 @@
 
 	return (0);
 }
+
+/*
+ * Constructs a tunnel interface ID extension.  Returns the length
+ * of the extension in 64-bit-words.
+ */
+static int
+attach_tunname(spd_if_t *tunname)
+{
+	if (tunname == NULL || interface_name == NULL)
+		return (0);
+
+	tunname->spd_if_exttype = SPD_EXT_TUN_NAME;
+	/*
+	 * Use "-3" because there's 4 bytes in the message itself, and
+	 * we lose one because of the '\0' terminator.
+	 */
+	tunname->spd_if_len = SPD_8TO64(
+	    P2ROUNDUP(sizeof (*tunname) + strlen(interface_name) - 3, 8));
+	(void) strlcpy((char *)tunname->spd_if_name, interface_name, LIFNAMSIZ);
+	return (tunname->spd_if_len);
+}
--- a/usr/src/cmd/cmd-inet/usr.sbin/ipseckey.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ipseckey.c	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -62,7 +61,6 @@
 
 #include <ipsec_util.h>
 
-static char numprint[NBUF_SIZE];
 static int keysock;
 static uint32_t seq;
 static pid_t mypid;
@@ -72,10 +70,6 @@
 /* Defined as a uint64_t array for alignment purposes. */
 static uint64_t get_buffer[MAX_GET_SIZE];
 
-/* local prototypes */
-static const char *do_inet_ntop(const void *, char *, size_t);
-static void printsatime(int64_t, const char *, const char *, const char *);
-
 /*
  * When something syntactically bad happens while reading commands,
  * print it.  For command line, exit.  For reading from a file, exit, and
@@ -229,22 +223,6 @@
 	{NULL,	0}	/* Token value is irrelevant for this entry. */
 };
 
-static char *
-rparsesatype(int type)
-{
-	struct typetable *tt = type_table;
-
-	while (tt->type != NULL && type != tt->token)
-		tt++;
-
-	if (tt->type == NULL) {
-		(void) snprintf(numprint, NBUF_SIZE, "%d", type);
-	} else {
-		return (tt->type);
-	}
-
-	return (numprint);
-}
 
 static int
 parsesatype(char *type)
@@ -326,6 +304,11 @@
 #define	TOK_NATREM		40
 #define	TOK_NATLPORT		41
 #define	TOK_NATRPORT		42
+#define	TOK_IPROTO		43
+#define	TOK_IDSTADDR		44
+#define	TOK_IDSTADDR6		45
+#define	TOK_ISRCPORT		46
+#define	TOK_IDSTPORT		47
 
 static struct toktable {
 	char *string;
@@ -362,11 +345,21 @@
 	{"dst",			TOK_DSTADDR,		NEXTADDR},
 	{"proxyaddr",		TOK_PROXYADDR,		NEXTADDR},
 	{"proxy",		TOK_PROXYADDR,		NEXTADDR},
+	{"innersrc",		TOK_PROXYADDR,		NEXTADDR},
+	{"isrc",		TOK_PROXYADDR,		NEXTADDR},
+	{"innerdst",		TOK_IDSTADDR,		NEXTADDR},
+	{"idst",		TOK_IDSTADDR,		NEXTADDR},
 
 	{"sport",		TOK_SRCPORT,		NEXTNUM},
 	{"dport",		TOK_DSTPORT,		NEXTNUM},
+	{"innersport",		TOK_ISRCPORT,		NEXTNUM},
+	{"isport",		TOK_ISRCPORT,		NEXTNUM},
+	{"innerdport",		TOK_IDSTPORT,		NEXTNUM},
+	{"idport",		TOK_IDSTPORT,		NEXTNUM},
 	{"proto",		TOK_PROTO,		NEXTNUM},
 	{"ulp",			TOK_PROTO,		NEXTNUM},
+	{"iproto",		TOK_IPROTO,		NEXTNUM},
+	{"iulp",		TOK_IPROTO,		NEXTNUM},
 
 	{"saddr6",		TOK_SRCADDR6,		NEXTADDR},
 	{"srcaddr6",		TOK_SRCADDR6,		NEXTADDR},
@@ -376,6 +369,10 @@
 	{"dst6",		TOK_DSTADDR6,		NEXTADDR},
 	{"proxyaddr6",		TOK_PROXYADDR6,		NEXTADDR},
 	{"proxy6",		TOK_PROXYADDR6,		NEXTADDR},
+	{"innersrc6",		TOK_PROXYADDR6,		NEXTADDR},
+	{"isrc6",		TOK_PROXYADDR6,		NEXTADDR},
+	{"innerdst6",		TOK_IDSTADDR6,		NEXTADDR},
+	{"idst6",		TOK_IDSTADDR6,		NEXTADDR},
 
 	{"authkey",		TOK_AUTHKEY,		NEXTHEX},
 	{"encrkey",		TOK_ENCRKEY,		NEXTHEX},
@@ -457,27 +454,6 @@
 }
 
 /*
- * Return a string containing the name of the specified numerical algorithm
- * identifier.
- */
-static char *
-rparsealg(uint8_t alg, int proto_num)
-{
-	static struct ipsecalgent *holder = NULL; /* we're single-threaded */
-
-	if (holder != NULL)
-		freeipsecalgent(holder);
-
-	holder = getipsecalgbynum(alg, proto_num, NULL);
-	if (holder == NULL) {
-		(void) snprintf(numprint, NBUF_SIZE, "%d", alg);
-		return (numprint);
-	}
-
-	return (*(holder->a_names));
-}
-
-/*
  * Return the numerical algorithm identifier corresponding to the specified
  * algorithm name.
  */
@@ -539,20 +515,6 @@
 	{NULL,		0}
 };
 
-static char *
-rparseidtype(uint16_t type)
-{
-	struct idtypes *idp;
-
-	for (idp = idtypes; idp->idtype != NULL; idp++) {
-		if (type == idp->retval)
-			return (idp->idtype);
-	}
-
-	(void) snprintf(numprint, NBUF_SIZE, "%d", type);
-	return (numprint);
-}
-
 static uint16_t
 parseidtype(char *type)
 {
@@ -638,7 +600,7 @@
 			dummy.he.h_length = sizeof (struct in6_addr);
 		} else if (inet_pton(AF_INET, addr, &addr1) == 1) {
 			/*
-			 * Remape to AF_INET6 anyway.
+			 * Remap to AF_INET6 anyway.
 			 */
 			dummy.he.h_addr_list = dummy.addtl;
 			dummy.addtl[0] = (char *)&addr1;
@@ -662,8 +624,8 @@
 	}
 
 	*hpp = hp;
-	/* Always return sockaddr_storage for now. */
-	return (sizeof (struct sockaddr_storage));
+	/* Always return sockaddr_in6 for now. */
+	return (sizeof (struct sockaddr_in6));
 }
 
 /*
@@ -774,840 +736,6 @@
 }
 
 /*
- * Expand the diagnostic code into a message.
- */
-static void
-print_diagnostic(FILE *file, uint16_t diagnostic)
-{
-	/* Use two spaces so above strings can fit on the line. */
-	(void) fprintf(file, gettext("  Diagnostic code %u:  %s.\n"),
-	    diagnostic, keysock_diag(diagnostic));
-}
-
-/*
- * Prints the base PF_KEY message.
- */
-static void
-print_sadb_msg(struct sadb_msg *samsg, time_t wallclock)
-{
-	if (wallclock != 0)
-		printsatime(wallclock, gettext("%sTimestamp: %s\n"), "", NULL);
-
-	(void) printf(gettext("Base message (version %u) type "),
-	    samsg->sadb_msg_version);
-	switch (samsg->sadb_msg_type) {
-	case SADB_RESERVED:
-		(void) printf(gettext("RESERVED (warning: set to 0)"));
-		break;
-	case SADB_GETSPI:
-		(void) printf("GETSPI");
-		break;
-	case SADB_UPDATE:
-		(void) printf("UPDATE");
-		break;
-	case SADB_ADD:
-		(void) printf("ADD");
-		break;
-	case SADB_DELETE:
-		(void) printf("DELETE");
-		break;
-	case SADB_GET:
-		(void) printf("GET");
-		break;
-	case SADB_ACQUIRE:
-		(void) printf("ACQUIRE");
-		break;
-	case SADB_REGISTER:
-		(void) printf("REGISTER");
-		break;
-	case SADB_EXPIRE:
-		(void) printf("EXPIRE");
-		break;
-	case SADB_FLUSH:
-		(void) printf("FLUSH");
-		break;
-	case SADB_DUMP:
-		(void) printf("DUMP");
-		break;
-	case SADB_X_PROMISC:
-		(void) printf("X_PROMISC");
-		break;
-	case SADB_X_INVERSE_ACQUIRE:
-		(void) printf("X_INVERSE_ACQUIRE");
-		break;
-	default:
-		(void) printf(gettext("Unknown (%u)"), samsg->sadb_msg_type);
-		break;
-	}
-	(void) printf(gettext(", SA type "));
-
-	switch (samsg->sadb_msg_satype) {
-	case SADB_SATYPE_UNSPEC:
-		(void) printf(gettext("<unspecified/all>"));
-		break;
-	case SADB_SATYPE_AH:
-		(void) printf("AH");
-		break;
-	case SADB_SATYPE_ESP:
-		(void) printf("ESP");
-		break;
-	case SADB_SATYPE_RSVP:
-		(void) printf("RSVP");
-		break;
-	case SADB_SATYPE_OSPFV2:
-		(void) printf("OSPFv2");
-		break;
-	case SADB_SATYPE_RIPV2:
-		(void) printf("RIPv2");
-		break;
-	case SADB_SATYPE_MIP:
-		(void) printf(gettext("Mobile IP"));
-		break;
-	default:
-		(void) printf(gettext("<unknown %u>"), samsg->sadb_msg_satype);
-		break;
-	}
-
-	(void) printf(".\n");
-
-	if (samsg->sadb_msg_errno != 0) {
-		(void) printf(gettext("Error %s from PF_KEY.\n"),
-		    strerror(samsg->sadb_msg_errno));
-		print_diagnostic(stdout, samsg->sadb_x_msg_diagnostic);
-	}
-
-	(void) printf(gettext("Message length %u bytes, seq=%u, pid=%u.\n"),
-	    SADB_64TO8(samsg->sadb_msg_len), samsg->sadb_msg_seq,
-	    samsg->sadb_msg_pid);
-}
-
-/*
- * Print the SA extension for PF_KEY.
- */
-static void
-print_sa(char *prefix, struct sadb_sa *assoc)
-{
-	if (assoc->sadb_sa_len != SADB_8TO64(sizeof (*assoc))) {
-		warnx(gettext("WARNING: SA info extension length (%u) is bad."),
-		    SADB_64TO8(assoc->sadb_sa_len));
-	}
-
-	(void) printf(gettext("%sSADB_ASSOC spi=0x%x, replay=%u, state="),
-	    prefix, ntohl(assoc->sadb_sa_spi), assoc->sadb_sa_replay);
-	switch (assoc->sadb_sa_state) {
-	case SADB_SASTATE_LARVAL:
-		(void) printf(gettext("LARVAL"));
-		break;
-	case SADB_SASTATE_MATURE:
-		(void) printf(gettext("MATURE"));
-		break;
-	case SADB_SASTATE_DYING:
-		(void) printf(gettext("DYING"));
-		break;
-	case SADB_SASTATE_DEAD:
-		(void) printf(gettext("DEAD"));
-		break;
-	default:
-		(void) printf(gettext("<unknown %u>"), assoc->sadb_sa_state);
-	}
-
-	if (assoc->sadb_sa_auth != SADB_AALG_NONE) {
-		(void) printf(gettext("\n%sAuthentication algorithm = "),
-		    prefix);
-		(void) dump_aalg(assoc->sadb_sa_auth, stdout);
-	}
-
-	if (assoc->sadb_sa_encrypt != SADB_EALG_NONE) {
-		(void) printf(gettext("\n%sEncryption algorithm = "), prefix);
-		(void) dump_ealg(assoc->sadb_sa_encrypt, stdout);
-	}
-
-	(void) printf(gettext("\n%sflags=0x%x < "), prefix,
-	    assoc->sadb_sa_flags);
-	if (assoc->sadb_sa_flags & SADB_SAFLAGS_PFS)
-		(void) printf("PFS ");
-	if (assoc->sadb_sa_flags & SADB_SAFLAGS_NOREPLAY)
-		(void) printf("NOREPLAY ");
-
-	/* BEGIN Solaris-specific flags. */
-	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_USED)
-		(void) printf("X_USED ");
-	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_UNIQUE)
-		(void) printf("X_UNIQUE ");
-	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_AALG1)
-		(void) printf("X_AALG1 ");
-	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_AALG2)
-		(void) printf("X_AALG2 ");
-	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_EALG1)
-		(void) printf("X_EALG1 ");
-	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_EALG2)
-		(void) printf("X_EALG2 ");
-	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC)
-		(void) printf("X_NATT_LOC ");
-	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM)
-		(void) printf("X_NATT_REM ");
-	/* END Solaris-specific flags. */
-
-	(void) printf(">\n");
-}
-
-static void
-printsatime(int64_t lt, const char *msg, const char *pfx, const char *pfx2)
-{
-	char tbuf[TBUF_SIZE]; /* For strftime() call. */
-	const char *tp = tbuf;
-	time_t t = lt;
-	if (t != lt) {
-		if (lt > 0)
-			t = LONG_MAX;
-		else
-			t = LONG_MIN;
-	}
-
-	if (strftime(tbuf, TBUF_SIZE, NULL, localtime(&t)) == 0)
-		tp = gettext("<time conversion failed>");
-	(void) printf(msg, pfx, tp);
-	if (vflag && (pfx2 != NULL))
-		(void) printf(gettext("%s\t(raw time value %llu)\n"), pfx2, lt);
-}
-
-/*
- * Print the SA lifetime information.  (An SADB_EXT_LIFETIME_* extension.)
- */
-static void
-print_lifetimes(time_t wallclock, struct sadb_lifetime *current,
-    struct sadb_lifetime *hard, struct sadb_lifetime *soft)
-{
-	int64_t scratch;
-	char *soft_prefix = gettext("SLT: ");
-	char *hard_prefix = gettext("HLT: ");
-	char *current_prefix = gettext("CLT: ");
-
-	if (current != NULL &&
-	    current->sadb_lifetime_len != SADB_8TO64(sizeof (*current))) {
-		warnx(gettext("WARNING: CURRENT lifetime extension length "
-			"(%u) is bad."),
-		    SADB_64TO8(current->sadb_lifetime_len));
-	}
-
-	if (hard != NULL &&
-	    hard->sadb_lifetime_len != SADB_8TO64(sizeof (*hard))) {
-		warnx(gettext("WARNING: HARD lifetime "
-			"extension length (%u) is bad."),
-		    SADB_64TO8(hard->sadb_lifetime_len));
-	}
-
-	if (soft != NULL &&
-	    soft->sadb_lifetime_len != SADB_8TO64(sizeof (*soft))) {
-		warnx(gettext("WARNING: SOFT lifetime "
-		    "extension length (%u) is bad."),
-		    SADB_64TO8(soft->sadb_lifetime_len));
-	}
-
-	(void) printf(" LT: Lifetime information\n");
-
-	if (current != NULL) {
-		/* Express values as current values. */
-		(void) printf(gettext(
-		    "%s%llu bytes protected, %u allocations used.\n"),
-		    current_prefix, current->sadb_lifetime_bytes,
-		    current->sadb_lifetime_allocations);
-		printsatime(current->sadb_lifetime_addtime,
-		    gettext("%sSA added at time %s\n"),
-		    current_prefix, current_prefix);
-		if (current->sadb_lifetime_usetime != 0) {
-			printsatime(current->sadb_lifetime_usetime,
-			    gettext("%sSA first used at time %s\n"),
-			    current_prefix, current_prefix);
-		}
-		printsatime(wallclock, gettext("%sTime now is %s\n"),
-		    current_prefix, current_prefix);
-	}
-
-	if (soft != NULL) {
-		(void) printf(gettext("%sSoft lifetime information:  "),
-		    soft_prefix);
-		(void) printf(gettext("%llu bytes of lifetime, %u "
-		    "allocations.\n"), soft->sadb_lifetime_bytes,
-		    soft->sadb_lifetime_allocations);
-		(void) printf(gettext("%s%llu seconds of post-add lifetime.\n"),
-		    soft_prefix, soft->sadb_lifetime_addtime);
-		(void) printf(gettext("%s%llu seconds of post-use lifetime.\n"),
-		    soft_prefix, soft->sadb_lifetime_usetime);
-		/* If possible, express values as time remaining. */
-		if (current != NULL) {
-			if (soft->sadb_lifetime_bytes != 0)
-				(void) printf(gettext(
-				    "%s%llu more bytes can be protected.\n"),
-				    soft_prefix,
-				    (soft->sadb_lifetime_bytes >
-					current->sadb_lifetime_bytes) ?
-				    (soft->sadb_lifetime_bytes -
-					current->sadb_lifetime_bytes) : (0));
-			if (soft->sadb_lifetime_addtime != 0 ||
-			    (soft->sadb_lifetime_usetime != 0 &&
-				current->sadb_lifetime_usetime != 0)) {
-				int64_t adddelta, usedelta;
-
-				if (soft->sadb_lifetime_addtime != 0) {
-					adddelta =
-					    current->sadb_lifetime_addtime +
-					    soft->sadb_lifetime_addtime -
-					    wallclock;
-				} else {
-					adddelta = TIME_MAX;
-				}
-
-				if (soft->sadb_lifetime_usetime != 0 &&
-				    current->sadb_lifetime_usetime != 0) {
-					usedelta =
-					    current->sadb_lifetime_usetime +
-					    soft->sadb_lifetime_usetime -
-					    wallclock;
-				} else {
-					usedelta = TIME_MAX;
-				}
-				(void) printf("%s", soft_prefix);
-				scratch = MIN(adddelta, usedelta);
-				if (scratch >= 0) {
-					(void) printf(gettext("Soft expiration "
-					    "occurs in %lld seconds, "),
-					    scratch);
-				} else {
-					(void) printf(gettext(
-					    "Soft expiration occurred "));
-				}
-				scratch += wallclock;
-				printsatime(scratch, gettext("%sat %s.\n"), "",
-				    soft_prefix);
-			}
-		}
-	}
-
-	if (hard != NULL) {
-		(void) printf(gettext("%sHard lifetime information:  "),
-		    hard_prefix);
-		(void) printf(gettext("%llu bytes of lifetime, "
-		    "%u allocations.\n"), hard->sadb_lifetime_bytes,
-		    hard->sadb_lifetime_allocations);
-		(void) printf(gettext("%s%llu seconds of post-add lifetime.\n"),
-		    hard_prefix, hard->sadb_lifetime_addtime);
-		(void) printf(gettext("%s%llu seconds of post-use lifetime.\n"),
-		    hard_prefix, hard->sadb_lifetime_usetime);
-		/* If possible, express values as time remaining. */
-		if (current != NULL) {
-			if (hard->sadb_lifetime_bytes != 0)
-				(void) printf(gettext(
-				    "%s%llu more bytes can be protected.\n"),
-				    hard_prefix,
-				    (hard->sadb_lifetime_bytes >
-					current->sadb_lifetime_bytes) ?
-				    (hard->sadb_lifetime_bytes -
-					current->sadb_lifetime_bytes) : (0));
-			if (hard->sadb_lifetime_addtime != 0 ||
-			    (hard->sadb_lifetime_usetime != 0 &&
-				current->sadb_lifetime_usetime != 0)) {
-				int64_t adddelta, usedelta;
-
-				if (hard->sadb_lifetime_addtime != 0) {
-					adddelta =
-					    current->sadb_lifetime_addtime +
-					    hard->sadb_lifetime_addtime -
-					    wallclock;
-				} else {
-					adddelta = TIME_MAX;
-				}
-
-				if (hard->sadb_lifetime_usetime != 0 &&
-				    current->sadb_lifetime_usetime != 0) {
-					usedelta =
-					    current->sadb_lifetime_usetime +
-					    hard->sadb_lifetime_usetime -
-					    wallclock;
-				} else {
-					usedelta = TIME_MAX;
-				}
-				(void) printf("%s", hard_prefix);
-				scratch = MIN(adddelta, usedelta);
-				if (scratch >= 0) {
-					(void) printf(gettext("Hard expiration "
-					    "occurs in %lld seconds, "),
-					    scratch);
-				} else {
-					(void) printf(gettext(
-					    "Hard expiration occured "));
-				}
-				scratch += wallclock;
-				printsatime(scratch, gettext("%sat %s.\n"), "",
-				    hard_prefix);
-			}
-		}
-	}
-}
-
-/*
- * Print an SADB_EXT_ADDRESS_* extension.
- */
-static void
-print_address(char *prefix, struct sadb_address *addr)
-{
-	struct protoent *pe;
-
-	(void) printf("%s", prefix);
-	switch (addr->sadb_address_exttype) {
-	case SADB_EXT_ADDRESS_SRC:
-		(void) printf(gettext("Source address "));
-		break;
-	case SADB_EXT_ADDRESS_DST:
-		(void) printf(gettext("Destination address "));
-		break;
-	case SADB_EXT_ADDRESS_PROXY:
-		(void) printf(gettext("Proxy address "));
-		break;
-	case SADB_X_EXT_ADDRESS_NATT_LOC:
-		(void) printf(gettext("NATT local address "));
-		break;
-	case SADB_X_EXT_ADDRESS_NATT_REM:
-		(void) printf(gettext("NATT remote address "));
-		break;
-	}
-
-	(void) printf(gettext("(proto=%d"), addr->sadb_address_proto);
-	if (!nflag) {
-		if (addr->sadb_address_proto == 0) {
-			(void) printf(gettext("/<unspecified>"));
-		} else if ((pe = getprotobynumber(addr->sadb_address_proto))
-		    != NULL) {
-			(void) printf("/%s", pe->p_name);
-		} else {
-			(void) printf(gettext("/<unknown>"));
-		}
-	}
-	(void) printf(gettext(")\n%s"), prefix);
-	(void) dump_sockaddr((struct sockaddr *)(addr + 1), B_FALSE, stdout);
-}
-
-/*
- * Print an SADB_EXT_KEY extension.
- */
-static void
-print_key(char *prefix, struct sadb_key *key)
-{
-	(void) printf("%s", prefix);
-
-	switch (key->sadb_key_exttype) {
-	case SADB_EXT_KEY_AUTH:
-		(void) printf(gettext("Authentication"));
-		break;
-	case SADB_EXT_KEY_ENCRYPT:
-		(void) printf(gettext("Encryption"));
-		break;
-	}
-
-	(void) printf(gettext(" key.\n%s"), prefix);
-	(void) dump_key((uint8_t *)(key + 1), key->sadb_key_bits, stdout);
-	(void) putchar('\n');
-}
-
-/*
- * Print an SADB_EXT_IDENTITY_* extension.
- */
-static void
-print_ident(char *prefix, struct sadb_ident *id)
-{
-	boolean_t canprint = B_TRUE;
-
-	(void) printf("%s", prefix);
-	switch (id->sadb_ident_exttype) {
-	case SADB_EXT_IDENTITY_SRC:
-		(void) printf(gettext("Source"));
-		break;
-	case SADB_EXT_IDENTITY_DST:
-		(void) printf(gettext("Destination"));
-		break;
-	}
-
-	(void) printf(gettext(" identity, uid=%d, type "), id->sadb_ident_id);
-	canprint = dump_sadb_idtype(id->sadb_ident_type, stdout, NULL);
-	(void) printf("\n%s", prefix);
-	if (canprint)
-		(void) printf("%s\n", (char *)(id + 1));
-	else
-		(void) printf(gettext("<cannot print>\n"));
-}
-
-/*
- * Print an SADB_SENSITIVITY extension.
- */
-static void
-print_sens(char *prefix, struct sadb_sens *sens)
-{
-	uint64_t *bitmap = (uint64_t *)(sens + 1);
-	int i;
-
-	(void) printf(
-	    gettext("%sSensitivity DPD %d, sens level=%d, integ level=%d\n"),
-	    prefix, sens->sadb_sens_dpd, sens->sadb_sens_sens_level,
-	    sens->sadb_sens_integ_level);
-	for (i = 0; sens->sadb_sens_sens_len-- > 0; i++, bitmap++)
-		(void) printf(
-		    gettext("%s Sensitivity BM extended word %d 0x%llx\n"),
-		    i, *bitmap);
-	for (i = 0; sens->sadb_sens_integ_len-- > 0; i++, bitmap++)
-		(void) printf(
-		    gettext("%s Integrity BM extended word %d 0x%llx\n"),
-		    i, *bitmap);
-}
-
-/*
- * Print an SADB_EXT_PROPOSAL extension.
- */
-static void
-print_prop(char *prefix, struct sadb_prop *prop)
-{
-	struct sadb_comb *combs;
-	int i, numcombs;
-
-	(void) printf(gettext("%sProposal, replay counter = %u.\n"), prefix,
-	    prop->sadb_prop_replay);
-
-	numcombs = prop->sadb_prop_len - SADB_8TO64(sizeof (*prop));
-	numcombs /= SADB_8TO64(sizeof (*combs));
-
-	combs = (struct sadb_comb *)(prop + 1);
-
-	for (i = 0; i < numcombs; i++) {
-		(void) printf(gettext("%s Combination #%u "), prefix, i + 1);
-		if (combs[i].sadb_comb_auth != SADB_AALG_NONE) {
-			(void) printf(gettext("Authentication = "));
-			(void) dump_aalg(combs[i].sadb_comb_auth, stdout);
-			(void) printf(gettext("  minbits=%u, maxbits=%u.\n%s "),
-			    combs[i].sadb_comb_auth_minbits,
-			    combs[i].sadb_comb_auth_maxbits, prefix);
-		}
-
-		if (combs[i].sadb_comb_encrypt != SADB_EALG_NONE) {
-			(void) printf(gettext("Encryption = "));
-			(void) dump_ealg(combs[i].sadb_comb_encrypt, stdout);
-			(void) printf(gettext("  minbits=%u, maxbits=%u.\n%s "),
-			    combs[i].sadb_comb_encrypt_minbits,
-			    combs[i].sadb_comb_encrypt_maxbits, prefix);
-		}
-
-		(void) printf(gettext("HARD: "));
-		if (combs[i].sadb_comb_hard_allocations)
-			(void) printf(gettext("alloc=%u "),
-			    combs[i].sadb_comb_hard_allocations);
-		if (combs[i].sadb_comb_hard_bytes)
-			(void) printf(gettext("bytes=%llu "),
-			    combs[i].sadb_comb_hard_bytes);
-		if (combs[i].sadb_comb_hard_addtime)
-			(void) printf(gettext("post-add secs=%llu "),
-			    combs[i].sadb_comb_hard_addtime);
-		if (combs[i].sadb_comb_hard_usetime)
-			(void) printf(gettext("post-use secs=%llu"),
-			    combs[i].sadb_comb_hard_usetime);
-
-		(void) printf(gettext("\n%s SOFT: "), prefix);
-		if (combs[i].sadb_comb_soft_allocations)
-			(void) printf(gettext("alloc=%u "),
-			    combs[i].sadb_comb_soft_allocations);
-		if (combs[i].sadb_comb_soft_bytes)
-			(void) printf(gettext("bytes=%llu "),
-			    combs[i].sadb_comb_soft_bytes);
-		if (combs[i].sadb_comb_soft_addtime)
-			(void) printf(gettext("post-add secs=%llu "),
-			    combs[i].sadb_comb_soft_addtime);
-		if (combs[i].sadb_comb_soft_usetime)
-			(void) printf(gettext("post-use secs=%llu"),
-			    combs[i].sadb_comb_soft_usetime);
-		(void) putchar('\n');
-	}
-}
-
-/*
- * Print an extended proposal (SADB_X_EXT_EPROP).
- */
-static void
-print_eprop(char *prefix, struct sadb_prop *eprop)
-{
-	uint64_t *sofar;
-	struct sadb_x_ecomb *ecomb;
-	struct sadb_x_algdesc *algdesc;
-	int i, j;
-
-	(void) printf(gettext("%sExtended Proposal, replay counter = %u, "),
-	    prefix, eprop->sadb_prop_replay);
-	(void) printf(gettext("number of combinations = %u.\n"),
-	    eprop->sadb_x_prop_numecombs);
-
-	sofar = (uint64_t *)(eprop + 1);
-	ecomb = (struct sadb_x_ecomb *)sofar;
-
-	for (i = 0; i < eprop->sadb_x_prop_numecombs; ) {
-		(void) printf(gettext("%s Extended combination #%u:\n"),
-		    prefix, ++i);
-
-		(void) printf(gettext("%s HARD: "), prefix);
-		(void) printf(gettext("alloc=%u, "),
-		    ecomb->sadb_x_ecomb_hard_allocations);
-		(void) printf(gettext("bytes=%llu, "),
-		    ecomb->sadb_x_ecomb_hard_bytes);
-		(void) printf(gettext("post-add secs=%llu, "),
-		    ecomb->sadb_x_ecomb_hard_addtime);
-		(void) printf(gettext("post-use secs=%llu\n"),
-		    ecomb->sadb_x_ecomb_hard_usetime);
-
-		(void) printf(gettext("%s SOFT: "), prefix);
-		(void) printf(gettext("alloc=%u, "),
-		    ecomb->sadb_x_ecomb_soft_allocations);
-		(void) printf(gettext("bytes=%llu, "),
-		    ecomb->sadb_x_ecomb_soft_bytes);
-		(void) printf(gettext("post-add secs=%llu, "),
-		    ecomb->sadb_x_ecomb_soft_addtime);
-		(void) printf(gettext("post-use secs=%llu\n"),
-		    ecomb->sadb_x_ecomb_soft_usetime);
-
-		sofar = (uint64_t *)(ecomb + 1);
-		algdesc = (struct sadb_x_algdesc *)sofar;
-
-		for (j = 0; j < ecomb->sadb_x_ecomb_numalgs; ) {
-			(void) printf(gettext("%s Alg #%u "), prefix, ++j);
-			switch (algdesc->sadb_x_algdesc_satype) {
-			case SADB_SATYPE_ESP:
-				(void) printf(gettext("for ESP "));
-				break;
-			case SADB_SATYPE_AH:
-				(void) printf(gettext("for AH "));
-				break;
-			default:
-				(void) printf(gettext("for satype=%d "),
-				    algdesc->sadb_x_algdesc_satype);
-			}
-			switch (algdesc->sadb_x_algdesc_algtype) {
-			case SADB_X_ALGTYPE_CRYPT:
-				(void) printf(gettext("Encryption = "));
-				(void) dump_ealg(algdesc->sadb_x_algdesc_alg,
-				    stdout);
-				break;
-			case SADB_X_ALGTYPE_AUTH:
-				(void) printf(gettext("Authentication = "));
-				(void) dump_aalg(algdesc->sadb_x_algdesc_alg,
-				    stdout);
-				break;
-			default:
-				(void) printf(gettext("algtype(%d) = alg(%d)"),
-				    algdesc->sadb_x_algdesc_algtype,
-				    algdesc->sadb_x_algdesc_alg);
-				break;
-			}
-
-			(void) printf(gettext("  minbits=%u, maxbits=%u.\n"),
-			    algdesc->sadb_x_algdesc_minbits,
-			    algdesc->sadb_x_algdesc_maxbits);
-
-			sofar = (uint64_t *)(++algdesc);
-		}
-		ecomb = (struct sadb_x_ecomb *)sofar;
-	}
-}
-
-/*
- * Print an SADB_EXT_SUPPORTED extension.
- */
-static void
-print_supp(char *prefix, struct sadb_supported *supp)
-{
-	struct sadb_alg *algs;
-	int i, numalgs;
-
-	(void) printf(gettext("%sSupported "), prefix);
-	switch (supp->sadb_supported_exttype) {
-	case SADB_EXT_SUPPORTED_AUTH:
-		(void) printf(gettext("authentication"));
-		break;
-	case SADB_EXT_SUPPORTED_ENCRYPT:
-		(void) printf(gettext("encryption"));
-		break;
-	}
-	(void) printf(gettext(" algorithms.\n"));
-
-	algs = (struct sadb_alg *)(supp + 1);
-	numalgs = supp->sadb_supported_len - SADB_8TO64(sizeof (*supp));
-	numalgs /= SADB_8TO64(sizeof (*algs));
-	for (i = 0; i < numalgs; i++) {
-		(void) printf("%s", prefix);
-		switch (supp->sadb_supported_exttype) {
-		case SADB_EXT_SUPPORTED_AUTH:
-			(void) dump_aalg(algs[i].sadb_alg_id, stdout);
-			break;
-		case SADB_EXT_SUPPORTED_ENCRYPT:
-			(void) dump_ealg(algs[i].sadb_alg_id, stdout);
-			break;
-		}
-		(void) printf(gettext(" minbits=%u, maxbits=%u, ivlen=%u.\n"),
-		    algs[i].sadb_alg_minbits, algs[i].sadb_alg_maxbits,
-		    algs[i].sadb_alg_ivlen);
-	}
-}
-
-/*
- * Print an SADB_EXT_SPIRANGE extension.
- */
-static void
-print_spirange(char *prefix, struct sadb_spirange *range)
-{
-	(void) printf(gettext("%sSPI Range, min=0x%x, max=0x%x\n"), prefix,
-	    htonl(range->sadb_spirange_min),
-	    htonl(range->sadb_spirange_max));
-}
-
-/*
- * Print an SADB_X_EXT_KM_COOKIE extension.
- */
-
-static void
-print_kmc(char *prefix, struct sadb_x_kmc *kmc)
-{
-	char *cookie_label;
-
-	if ((cookie_label = kmc_lookup_by_cookie(kmc->sadb_x_kmc_cookie)) ==
-	    NULL)
-		cookie_label = gettext("<Label not found.>");
-
-	(void) printf(gettext("%sProtocol %u, cookie=\"%s\" (%u)\n"), prefix,
-	    kmc->sadb_x_kmc_proto, cookie_label, kmc->sadb_x_kmc_cookie);
-}
-
-/*
- * Take a PF_KEY message pointed to buffer and print it.  Useful for DUMP
- * and GET.
- */
-static void
-print_samsg(uint64_t *buffer, boolean_t want_timestamp)
-{
-	uint64_t *current;
-	struct sadb_msg *samsg = (struct sadb_msg *)buffer;
-	struct sadb_ext *ext;
-	struct sadb_lifetime *currentlt = NULL, *hardlt = NULL, *softlt = NULL;
-	int i;
-	time_t wallclock;
-
-	(void) time(&wallclock);
-
-	print_sadb_msg(samsg, want_timestamp ? wallclock : 0);
-	current = (uint64_t *)(samsg + 1);
-	while (current - buffer < samsg->sadb_msg_len) {
-		int lenbytes;
-
-		ext = (struct sadb_ext *)current;
-		lenbytes = SADB_64TO8(ext->sadb_ext_len);
-		switch (ext->sadb_ext_type) {
-		case SADB_EXT_SA:
-			print_sa(gettext("SA: "), (struct sadb_sa *)current);
-			break;
-		/*
-		 * Pluck out lifetimes and print them at the end.  This is
-		 * to show relative lifetimes.
-		 */
-		case SADB_EXT_LIFETIME_CURRENT:
-			currentlt = (struct sadb_lifetime *)current;
-			break;
-		case SADB_EXT_LIFETIME_HARD:
-			hardlt = (struct sadb_lifetime *)current;
-			break;
-		case SADB_EXT_LIFETIME_SOFT:
-			softlt = (struct sadb_lifetime *)current;
-			break;
-
-		case SADB_EXT_ADDRESS_SRC:
-			print_address(gettext("SRC: "),
-			    (struct sadb_address *)current);
-			break;
-		case SADB_EXT_ADDRESS_DST:
-			print_address(gettext("DST: "),
-			    (struct sadb_address *)current);
-			break;
-		case SADB_EXT_ADDRESS_PROXY:
-			print_address(gettext("PXY: "),
-			    (struct sadb_address *)current);
-			break;
-		case SADB_EXT_KEY_AUTH:
-			print_key(gettext("AKY: "), (struct sadb_key *)current);
-			break;
-		case SADB_EXT_KEY_ENCRYPT:
-			print_key(gettext("EKY: "), (struct sadb_key *)current);
-			break;
-		case SADB_EXT_IDENTITY_SRC:
-			print_ident(gettext("SID: "),
-			    (struct sadb_ident *)current);
-			break;
-		case SADB_EXT_IDENTITY_DST:
-			print_ident(gettext("DID: "),
-			    (struct sadb_ident *)current);
-			break;
-		case SADB_EXT_SENSITIVITY:
-			print_sens(gettext("SNS: "),
-			    (struct sadb_sens *)current);
-			break;
-		case SADB_EXT_PROPOSAL:
-			print_prop(gettext("PRP: "),
-			    (struct sadb_prop *)current);
-			break;
-		case SADB_EXT_SUPPORTED_AUTH:
-			print_supp(gettext("SUA: "),
-			    (struct sadb_supported *)current);
-			break;
-		case SADB_EXT_SUPPORTED_ENCRYPT:
-			print_supp(gettext("SUE: "),
-			    (struct sadb_supported *)current);
-			break;
-		case SADB_EXT_SPIRANGE:
-			print_spirange(gettext("SPR: "),
-			    (struct sadb_spirange *)current);
-			break;
-		case SADB_X_EXT_EPROP:
-			print_eprop(gettext("EPR: "),
-			    (struct sadb_prop *)current);
-			break;
-		case SADB_X_EXT_KM_COOKIE:
-			print_kmc(gettext("KMC: "),
-			    (struct sadb_x_kmc *)current);
-			break;
-		case SADB_X_EXT_ADDRESS_NATT_REM:
-			print_address(gettext("NRM: "),
-			    (struct sadb_address *)current);
-			break;
-		case SADB_X_EXT_ADDRESS_NATT_LOC:
-			print_address(gettext("NLC: "),
-			    (struct sadb_address *)current);
-			break;
-		default:
-			(void) printf(gettext(
-			    "UNK: Unknown ext. %d, len %d.\n"),
-			    ext->sadb_ext_type, lenbytes);
-			for (i = 0; i < ext->sadb_ext_len; i++)
-				(void) printf(gettext("UNK: 0x%llx\n"),
-				    ((uint64_t *)ext)[i]);
-			break;
-		}
-		current += ext->sadb_ext_len;
-	}
-	/*
-	 * Print lifetimes NOW.
-	 */
-	if (currentlt != NULL || hardlt != NULL || softlt != NULL)
-		print_lifetimes(wallclock, currentlt, hardlt, softlt);
-
-	if (current - buffer != samsg->sadb_msg_len) {
-		warnx(gettext("WARNING: insufficient buffer "
-			"space or corrupt message."));
-	}
-
-	(void) fflush(stdout);	/* Make sure our message is out there. */
-}
-
-/*
  * Write a message to the PF_KEY socket.  If verbose, print the message
  * heading into the kernel.
  */
@@ -1618,7 +746,7 @@
 		(void) printf(
 		    gettext("VERBOSE ON:  Message to kernel looks like:\n"));
 		(void) printf("==========================================\n");
-		print_samsg(msg, B_FALSE);
+		print_samsg(msg, B_FALSE, vflag);
 		(void) printf("==========================================\n");
 	}
 
@@ -1716,292 +844,6 @@
  */
 
 /*
- * Print save information for a lifetime extension.
- *
- * NOTE : It saves the lifetime in absolute terms.  For example, if you
- * had a hard_usetime of 60 seconds, you'll save it as 60 seconds, even though
- * there may have been 59 seconds burned off the clock.
- */
-static boolean_t
-save_lifetime(struct sadb_lifetime *lifetime, FILE *ofile)
-{
-	char *prefix;
-
-	prefix = (lifetime->sadb_lifetime_exttype == SADB_EXT_LIFETIME_SOFT) ?
-	    "soft" : "hard";
-
-	if (putc('\t', ofile) == EOF)
-		return (B_FALSE);
-
-	if (lifetime->sadb_lifetime_allocations != 0 && fprintf(ofile,
-	    "%s_alloc %u ", prefix, lifetime->sadb_lifetime_allocations) < 0)
-		return (B_FALSE);
-
-	if (lifetime->sadb_lifetime_bytes != 0 && fprintf(ofile,
-	    "%s_bytes %llu ", prefix, lifetime->sadb_lifetime_bytes) < 0)
-		return (B_FALSE);
-
-	if (lifetime->sadb_lifetime_addtime != 0 && fprintf(ofile,
-	    "%s_addtime %llu ", prefix, lifetime->sadb_lifetime_addtime) < 0)
-		return (B_FALSE);
-
-	if (lifetime->sadb_lifetime_usetime != 0 && fprintf(ofile,
-	    "%s_usetime %llu ", prefix, lifetime->sadb_lifetime_usetime) < 0)
-		return (B_FALSE);
-
-	return (B_TRUE);
-}
-
-/*
- * Print save information for an address extension.
- */
-static boolean_t
-save_address(struct sadb_address *addr, FILE *ofile)
-{
-	char *printable_addr, buf[INET6_ADDRSTRLEN];
-	const char *prefix, *pprefix;
-	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(addr + 1);
-	struct sockaddr_in *sin = (struct sockaddr_in *)sin6;
-	int af = sin->sin_family;
-
-	/*
-	 * Address-family reality check.
-	 */
-	if (af != AF_INET6 && af != AF_INET)
-		return (B_FALSE);
-
-	switch (addr->sadb_address_exttype) {
-	case SADB_EXT_ADDRESS_SRC:
-		prefix = "src";
-		pprefix = "sport";
-		break;
-	case SADB_EXT_ADDRESS_DST:
-		prefix = "dst";
-		pprefix = "dport";
-		break;
-	case SADB_EXT_ADDRESS_PROXY:
-		prefix = "proxy";
-		pprefix = NULL;
-		break;
-	case SADB_X_EXT_ADDRESS_NATT_LOC:
-		prefix = "nat_loc ";
-		pprefix = "nat_lport";
-		break;
-	case SADB_X_EXT_ADDRESS_NATT_REM:
-		prefix = "nat_rem ";
-		pprefix = "nat_rport";
-		break;
-	}
-
-	if (fprintf(ofile, "    %s ", prefix) < 0)
-		return (B_FALSE);
-
-	/*
-	 * Do not do address-to-name translation, given that we live in
-	 * an age of names that explode into many addresses.
-	 */
-	printable_addr = (char *)inet_ntop(af,
-	    (af == AF_INET) ? (char *)&sin->sin_addr : (char *)&sin6->sin6_addr,
-	    buf, sizeof (buf));
-	if (printable_addr == NULL)
-		printable_addr = "<inet_ntop() failed>";
-	if (fprintf(ofile, "%s", printable_addr) < 0)
-		return (B_FALSE);
-
-	/*
-	 * The port is in the same position for struct sockaddr_in and
-	 * struct sockaddr_in6.  We exploit that property here.
-	 */
-	if ((pprefix != NULL) && (sin->sin_port != 0))
-		(void) fprintf(ofile, " %s %d", pprefix, ntohs(sin->sin_port));
-
-	return (B_TRUE);
-}
-
-/*
- * Print save information for a key extension. Returns whether writing
- * to the specified output file was successful or not.
- */
-static boolean_t
-save_key(struct sadb_key *key, FILE *ofile)
-{
-	char *prefix;
-
-	if (putc('\t', ofile) == EOF)
-		return (B_FALSE);
-
-	prefix = (key->sadb_key_exttype == SADB_EXT_KEY_AUTH) ? "auth" : "encr";
-
-	if (fprintf(ofile, "%skey ", prefix) < 0)
-		return (B_FALSE);
-
-	if (dump_key((uint8_t *)(key + 1), key->sadb_key_bits, ofile) == -1)
-		return (B_FALSE);
-
-	return (B_TRUE);
-}
-
-/*
- * Print save information for an identity extension.
- */
-static boolean_t
-save_ident(struct sadb_ident *ident, FILE *ofile)
-{
-	char *prefix;
-
-	if (putc('\t', ofile) == EOF)
-		return (B_FALSE);
-
-	prefix = (ident->sadb_ident_exttype == SADB_EXT_IDENTITY_SRC) ? "src" :
-	    "dst";
-
-	if (fprintf(ofile, "%sidtype %s ", prefix,
-	    rparseidtype(ident->sadb_ident_type)) < 0)
-		return (B_FALSE);
-
-	if (ident->sadb_ident_type == SADB_X_IDENTTYPE_DN ||
-	    ident->sadb_ident_type == SADB_X_IDENTTYPE_GN) {
-		if (fprintf(ofile, gettext("<can-not-print>")) < 0)
-			return (B_FALSE);
-	} else {
-		if (fprintf(ofile, "%s", (char *)(ident + 1)) < 0)
-			return (B_FALSE);
-	}
-
-	return (B_TRUE);
-}
-
-/*
- * "Save" a security association to an output file.
- *
- * NOTE the lack of calls to gettext() because I'm outputting parseable stuff.
- * ALSO NOTE that if you change keywords (see parsecmd()), you'll have to
- * change them here as well.
- */
-static void
-save_assoc(uint64_t *buffer, FILE *ofile)
-{
-	int seen_proto = 0;
-	uint64_t *current;
-	struct sadb_address *addr;
-	struct sadb_msg *samsg = (struct sadb_msg *)buffer;
-	struct sadb_ext *ext;
-#define	bail2(s)	do { \
-				int t = errno; \
-				(void) fclose(ofile); \
-				errno = t; \
-				interactive = B_FALSE;	/* Guarantees exit. */ \
-				Bail(s); \
-			} while (B_FALSE)	/* How do I lint-clean this? */
-
-#define	savenl() if (fputs(" \\\n", ofile) == EOF) { bail2("savenl"); }
-
-	if (fputs("# begin assoc\n", ofile) == EOF)
-		Bail("save_assoc: Opening comment of SA");
-	if (fprintf(ofile, "add %s ", rparsesatype(samsg->sadb_msg_satype)) < 0)
-		Bail("save_assoc: First line of SA");
-	/* LINTED E_CONST_COND */
-	savenl();
-
-	current = (uint64_t *)(samsg + 1);
-	while (current - buffer < samsg->sadb_msg_len) {
-		struct sadb_sa *assoc;
-
-		ext = (struct sadb_ext *)current;
-		switch (ext->sadb_ext_type) {
-		case SADB_EXT_SA:
-			assoc = (struct sadb_sa *)ext;
-			if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
-				if (fprintf(ofile, "# WARNING: SA was dying "
-				    "or dead.\n") < 0) {
-					/* LINTED E_CONST_COND */
-					bail2("save_assoc: fprintf not mature");
-				}
-			}
-			if (fprintf(ofile, "    spi 0x%x ",
-			    ntohl(assoc->sadb_sa_spi)) < 0)
-				/* LINTED E_CONST_COND */
-				bail2("save_assoc: fprintf spi");
-			if (fprintf(ofile, "encr_alg %s ",
-			    rparsealg(assoc->sadb_sa_encrypt,
-				IPSEC_PROTO_ESP)) < 0)
-				/* LINTED E_CONST_COND */
-				bail2("save_assoc: fprintf encrypt");
-			if (fprintf(ofile, "auth_alg %s ",
-			    rparsealg(assoc->sadb_sa_auth,
-				IPSEC_PROTO_AH)) < 0)
-				/* LINTED E_CONST_COND */
-				bail2("save_assoc: fprintf auth");
-			if (fprintf(ofile, "replay %d ",
-			    assoc->sadb_sa_replay) < 0)
-				/* LINTED E_CONST_COND */
-				bail2("save_assoc: fprintf replay");
-			if (assoc->sadb_sa_flags & (SADB_X_SAFLAGS_NATT_LOC |
-			    SADB_X_SAFLAGS_NATT_REM)) {
-				if (fprintf(ofile, "encap udp") < 0)
-					/* LINTED E_CONST_COND */
-					bail2("save_assoc: fprintf encap");
-			}
-			/* LINTED E_CONST_COND */
-			savenl();
-			break;
-		case SADB_EXT_LIFETIME_HARD:
-		case SADB_EXT_LIFETIME_SOFT:
-			if (!save_lifetime((struct sadb_lifetime *)ext, ofile))
-				/* LINTED E_CONST_COND */
-				bail2("save_lifetime");
-			/* LINTED E_CONST_COND */
-			savenl();
-			break;
-		case SADB_EXT_ADDRESS_SRC:
-		case SADB_EXT_ADDRESS_DST:
-		case SADB_EXT_ADDRESS_PROXY:
-		case SADB_X_EXT_ADDRESS_NATT_REM:
-		case SADB_X_EXT_ADDRESS_NATT_LOC:
-			addr = (struct sadb_address *)ext;
-			if (!seen_proto && addr->sadb_address_proto) {
-				(void) fprintf(ofile, "    proto %d",
-				    addr->sadb_address_proto);
-				/* LINTED E_CONST_COND */
-				savenl();
-				seen_proto = 1;
-			}
-			if (!save_address(addr, ofile))
-				/* LINTED E_CONST_COND */
-				bail2("save_address");
-			/* LINTED E_CONST_COND */
-			savenl();
-			break;
-		case SADB_EXT_KEY_AUTH:
-		case SADB_EXT_KEY_ENCRYPT:
-			if (!save_key((struct sadb_key *)ext, ofile))
-				/* LINTED E_CONST_COND */
-				bail2("save_address");
-			/* LINTED E_CONST_COND */
-			savenl();
-			break;
-		case SADB_EXT_IDENTITY_SRC:
-		case SADB_EXT_IDENTITY_DST:
-			if (!save_ident((struct sadb_ident *)ext, ofile))
-				/* LINTED E_CONST_COND */
-				bail2("save_address");
-			/* LINTED E_CONST_COND */
-			savenl();
-			break;
-		case SADB_EXT_SENSITIVITY:
-		default:
-			/* Skip over irrelevant extensions. */
-			break;
-		}
-		current += ext->sadb_ext_len;
-	}
-
-	if (fputs(gettext("\n# end assoc\n\n"), ofile) == EOF)
-		/* LINTED E_CONST_COND */
-		bail2("save_assoc: last fputs");
-}
-
-/*
  * Because "save" and "dump" both use the SADB_DUMP message, fold both
  * into the same function.
  */
@@ -2036,7 +878,7 @@
 		    msg->sadb_msg_seq != 0 &&
 		    msg->sadb_msg_errno == 0) {
 			if (ofile == NULL) {
-				print_samsg(get_buffer, B_FALSE);
+				print_samsg(get_buffer, B_FALSE, vflag);
 				(void) putchar('\n');
 			} else {
 				save_assoc(get_buffer, ofile);
@@ -2109,8 +951,9 @@
  * buffer_size: size of buffer
  * spi: spi for this message (set by caller)
  * srcport: source port if specified
- * dstport: destination port is specified
+ * dstport: destination port if specified
  * proto: IP protocol number if specified
+ * iproto: Inner (tunnel mode) IP protocol number if specified
  * NATT note: we are going to assume a semi-sane world where NAT
  * boxen don't explode to multiple addresses.
  */
@@ -2118,11 +961,7 @@
 doaddresses(uint8_t sadb_msg_type, uint8_t sadb_msg_satype, int cmd,
     struct hostent *srchp, struct hostent *dsthp,
     struct sadb_address *src, struct sadb_address *dst,
-    boolean_t unspec_src, uint64_t *buffer, int buffer_size, uint32_t spi,
-    uint16_t srcport, uint16_t dstport, uint16_t proto,
-    struct hostent *natt_lhp, struct hostent *natt_rhp,
-    struct sadb_address *natt_loc, struct sadb_address *natt_rem,
-    uint16_t natt_lport, uint16_t natt_rport)
+    boolean_t unspec_src, uint64_t *buffer, int buffer_size, uint32_t spi)
 {
 	boolean_t single_dst;
 	struct sockaddr_in6 *sin6;
@@ -2130,41 +969,24 @@
 	int i, rc;
 	char **walker;	/* For the SRC and PROXY walking functions. */
 	char *first_match;
-	uint64_t savebuf[SADB_8TO64(MAX_GET_SIZE)];
+	uint64_t savebuf[MAX_GET_SIZE];
+	uint16_t srcport = 0, dstport = 0;
 
 	/*
 	 * Okay, now we have "src", "dst", and maybe "proxy" reassigned
 	 * to point into the buffer to be written to PF_KEY, we can do
 	 * potentially several writes based on destination address.
 	 *
-	 * First, fill in port numbers and protocol in extensions.
+	 * First, obtain port numbers from passed-in extensions.
 	 */
 
 	if (src != NULL) {
-		src->sadb_address_proto = proto;
 		sin6 = (struct sockaddr_in6 *)(src + 1);
-		sin6->sin6_port = htons(srcport);
+		srcport = ntohs(sin6->sin6_port);
 	}
 	if (dst != NULL) {
-		dst->sadb_address_proto = proto;
 		sin6 = (struct sockaddr_in6 *)(dst + 1);
-		sin6->sin6_port = htons(dstport);
-	}
-	if (natt_loc != NULL) {
-		sin6 = (struct sockaddr_in6 *)(natt_loc + 1);
-		bzero(sin6, sizeof (*sin6));
-		bcopy(natt_lhp->h_addr_list[0], &sin6->sin6_addr,
-		    sizeof (struct in6_addr));
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = htons(natt_lport);
-	}
-	if (natt_rem != NULL) {
-		sin6 = (struct sockaddr_in6 *)(natt_rem + 1);
-		bzero(sin6, sizeof (*sin6));
-		bcopy(natt_rhp->h_addr_list[0], &sin6->sin6_addr,
-		    sizeof (struct in6_addr));
-		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = htons(natt_rport);
+		dstport = ntohs(sin6->sin6_port);
 	}
 
 	/*
@@ -2513,12 +1335,12 @@
 			}
 		}
 		if (cmd == CMD_GET) {
-			if (SADB_64TO8(msgp->sadb_msg_len) > MAX_GET_SIZE) {
+			if (msgp->sadb_msg_len > MAX_GET_SIZE) {
 				warnx(gettext("WARNING:  "
 				    "SA information bigger than %d bytes."),
-				    MAX_GET_SIZE);
+				    SADB_64TO8(MAX_GET_SIZE));
 			}
-			print_samsg(buffer, B_FALSE);
+			print_samsg(buffer, B_FALSE, vflag);
 		}
 
 		/* ...and then restore the saved buffer. */
@@ -2541,21 +1363,25 @@
 	uint64_t *buffer, *nexthdr;
 	struct sadb_msg msg;
 	struct sadb_sa *assoc = NULL;
-	struct sadb_address *src = NULL, *dst = NULL, *proxy = NULL;
+	struct sadb_address *src = NULL, *dst = NULL;
+	struct sadb_address *isrc = NULL, *idst = NULL;
 	struct sadb_address *natt_local = NULL, *natt_remote = NULL;
 	struct sadb_key *encrypt = NULL, *auth = NULL;
 	struct sadb_ident *srcid = NULL, *dstid = NULL;
 	struct sadb_lifetime *hard = NULL, *soft = NULL;  /* Current? */
 	struct sockaddr_in6 *sin6;
 	/* MLS TODO:  Need sensitivity eventually. */
-	int next, token, sa_len, alloclen, totallen = sizeof (msg);
+	int next, token, sa_len, alloclen, totallen = sizeof (msg), prefix;
 	uint32_t spi;
-	char *thiscmd;
-	boolean_t readstate = B_FALSE, unspec_src = B_FALSE, use_natt = B_FALSE;
-	struct hostent *srchp = NULL, *dsthp = NULL, *proxyhp = NULL;
+	char *thiscmd, *pstr;
+	boolean_t readstate = B_FALSE, unspec_src = B_FALSE;
+	boolean_t alloc_inner = B_FALSE, use_natt = B_FALSE;
+	struct hostent *srchp = NULL, *dsthp = NULL, *isrchp = NULL,
+	    *idsthp = NULL;
 	struct hostent *natt_lhp = NULL, *natt_rhp = NULL;
-	uint16_t srcport = 0, dstport = 0, natt_lport = 0, natt_rport = 0;
-	uint8_t proto = 0;
+	uint16_t srcport = 0, dstport = 0, natt_lport = 0, natt_rport = 0,
+	    isrcport = 0, idstport = 0;
+	uint8_t proto = 0, iproto = 0;
 
 	thiscmd = (cmd == CMD_ADD) ? "add" : "update";
 
@@ -2657,6 +1483,11 @@
 				    IPSEC_PROTO_AH);
 				break;
 			case TOK_ENCRALG:
+				if (satype == SADB_SATYPE_AH) {
+					warnx(gettext("Cannot specify"
+					    " encryption with SA type ah."));
+					usage();
+				}
 				if (assoc->sadb_sa_encrypt != 0) {
 					warnx(gettext("Can only specify single"
 						" encryption algorithm."));
@@ -2700,6 +1531,26 @@
 			dstport = parsenum(*argv, B_TRUE);
 			argv++;
 			break;
+		case TOK_ISRCPORT:
+			alloc_inner = B_TRUE;
+			if (isrcport != 0) {
+				warnx(gettext("Can only specify "
+					"single inner-source port."));
+				usage();
+			}
+			isrcport = parsenum(*argv, B_TRUE);
+			argv++;
+			break;
+		case TOK_IDSTPORT:
+			alloc_inner = B_TRUE;
+			if (idstport != 0) {
+				warnx(gettext("Can only specify "
+				    "single inner-destination port."));
+				usage();
+			}
+			idstport = parsenum(*argv, B_TRUE);
+			argv++;
+			break;
 		case TOK_NATLPORT:
 			if (natt_lport != 0) {
 				warnx(gettext("Can only specify "
@@ -2740,6 +1591,16 @@
 			proto = parsenum(*argv, B_TRUE);
 			argv++;
 			break;
+		case TOK_IPROTO:
+			alloc_inner = B_TRUE;
+			if (iproto != 0) {
+				warnx(gettext("Can only specify "
+				    "single inner protocol."));
+				usage();
+			}
+			iproto = parsenum(*argv, B_TRUE);
+			argv++;
+			break;
 		case TOK_SRCADDR:
 		case TOK_SRCADDR6:
 			if (src != NULL) {
@@ -2808,45 +1669,162 @@
 			break;
 		case TOK_PROXYADDR:
 		case TOK_PROXYADDR6:
-			if (proxy != NULL) {
+			if (isrc != NULL) {
 				warnx(gettext("Can only specify single "
-					"proxy address."));
+					"proxy/inner-source address."));
 				usage();
 			}
-			sa_len = parseaddr(*argv, &proxyhp,
+			if ((pstr = strchr(*argv, '/')) != NULL) {
+				/* Parse out the prefix. */
+				errno = 0;
+				prefix = strtol(pstr + 1, NULL, 10);
+				if (errno != 0) {
+					warnx(gettext("Invalid prefix %s."),
+					    pstr);
+					usage();
+				}
+				/* Recycle pstr */
+				alloclen = (int)(pstr - *argv);
+				pstr = malloc(alloclen + 1);
+				if (pstr == NULL) {
+					Bail("malloc(pstr)");
+				}
+				(void) strlcpy(pstr, *argv, alloclen + 1);
+			} else {
+				pstr = *argv;
+				/*
+				 * Assume mapping to AF_INET6, and we're a host.
+				 * XXX some miscreants may still make classful
+				 * assumptions.  If this is a problem, fix it
+				 * here.
+				 */
+				prefix = 128;
+			}
+			sa_len = parseaddr(pstr, &isrchp,
 			    (token == TOK_PROXYADDR6));
+			if (pstr != *argv)
+				free(pstr);
 			argv++;
-			alloclen = sizeof (*proxy) + roundup(sa_len, 8);
-			proxy = malloc(alloclen);
-			if (proxy == NULL)
-				Bail("malloc(proxy)");
+			alloclen = sizeof (*isrc) + roundup(sa_len, 8);
+			isrc = malloc(alloclen);
+			if (isrc == NULL)
+				Bail("malloc(isrc)");
 			totallen += alloclen;
-			proxy->sadb_address_len = SADB_8TO64(alloclen);
-			proxy->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
-			proxy->sadb_address_reserved = 0;
-			proxy->sadb_address_prefixlen = 0;
-			proxy->sadb_address_proto = 0;
-			if (proxyhp == &dummy.he ||
-			    proxyhp->h_addr_list[1] == NULL) {
+			isrc->sadb_address_len = SADB_8TO64(alloclen);
+			isrc->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
+			isrc->sadb_address_reserved = 0;
+			isrc->sadb_address_prefixlen = prefix;
+			isrc->sadb_address_proto = 0;
+			if (isrchp == &dummy.he ||
+			    isrchp->h_addr_list[1] == NULL) {
 				/*
 				 * Single address with -n flag or single name.
 				 */
-				sin6 = (struct sockaddr_in6 *)(proxy + 1);
+				sin6 = (struct sockaddr_in6 *)(isrc + 1);
 				bzero(sin6, sizeof (*sin6));
 				sin6->sin6_family = AF_INET6;
-				bcopy(proxyhp->h_addr_list[0], &sin6->sin6_addr,
+				bcopy(isrchp->h_addr_list[0], &sin6->sin6_addr,
 				    sizeof (struct in6_addr));
+				/*
+				 * normalize prefixlen for IPv4-mapped
+				 * addresses.
+				 */
+				if (prefix <= 32 &&
+				    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
+					isrc->sadb_address_prefixlen += 96;
+				alloc_inner = B_TRUE;
 			} else {
 				/*
-				 * If the proxy address is vague, don't bother.
+				 * If the proxy/isrc address is vague, don't
+				 * bother.
 				 */
 				totallen -= alloclen;
-				free(proxy);
-				proxy = NULL;
-				warnx(gettext("Proxy address %s is vague, not"
-					" using."), proxyhp->h_name);
-				freehostent(proxyhp);
-				proxyhp = NULL;
+				free(isrc);
+				isrc = NULL;
+				warnx(gettext("Proxy/inner-source address %s "
+				    "is vague, not using."), isrchp->h_name);
+				freehostent(isrchp);
+				isrchp = NULL;
+			}
+			break;
+		case TOK_IDSTADDR:
+		case TOK_IDSTADDR6:
+			if (idst != NULL) {
+				warnx(gettext("Can only specify single "
+					"inner-destination address."));
+				usage();
+			}
+			if ((pstr = strchr(*argv, '/')) != NULL) {
+				/* Parse out the prefix. */
+				errno = 0;
+				prefix = strtol(pstr + 1, NULL, 10);
+				if (errno != 0) {
+					warnx(gettext("Invalid prefix %s."),
+					    pstr);
+					usage();
+				}
+				/* Recycle pstr */
+				alloclen = (int)(pstr - *argv);
+				pstr = malloc(alloclen + 1);
+				if (pstr == NULL) {
+					Bail("malloc(pstr)");
+				}
+				(void) strlcpy(pstr, *argv, alloclen + 1);
+			} else {
+				pstr = *argv;
+				/*
+				 * Assume mapping to AF_INET6, and we're a host.
+				 * XXX some miscreants may still make classful
+				 * assumptions.  If this is a problem, fix it
+				 * here.
+				 */
+				prefix = 128;
+			}
+			sa_len = parseaddr(pstr, &idsthp,
+			    (token == TOK_IDSTADDR6));
+			if (pstr != *argv)
+				free(pstr);
+			argv++;
+			alloclen = sizeof (*idst) + roundup(sa_len, 8);
+			idst = malloc(alloclen);
+			if (idst == NULL)
+				Bail("malloc(idst)");
+			totallen += alloclen;
+			idst->sadb_address_len = SADB_8TO64(alloclen);
+			idst->sadb_address_exttype =
+			    SADB_X_EXT_ADDRESS_INNER_DST;
+			idst->sadb_address_reserved = 0;
+			idst->sadb_address_prefixlen = prefix;
+			idst->sadb_address_proto = 0;
+			if (idsthp == &dummy.he ||
+			    idsthp->h_addr_list[1] == NULL) {
+				/*
+				 * Single address with -n flag or single name.
+				 */
+				sin6 = (struct sockaddr_in6 *)(idst + 1);
+				bzero(sin6, sizeof (*sin6));
+				sin6->sin6_family = AF_INET6;
+				bcopy(idsthp->h_addr_list[0], &sin6->sin6_addr,
+				    sizeof (struct in6_addr));
+				/*
+				 * normalize prefixlen for IPv4-mapped
+				 * addresses.
+				 */
+				if (prefix <= 32 &&
+				    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
+					idst->sadb_address_prefixlen += 96;
+				alloc_inner = B_TRUE;
+			} else {
+				/*
+				 * If the idst address is vague, don't bother.
+				 */
+				totallen -= alloclen;
+				free(idst);
+				idst = NULL;
+				warnx(gettext("Inner destination address %s "
+				    "is vague, not using."), idsthp->h_name);
+				freehostent(idsthp);
+				idsthp = NULL;
 			}
 			break;
 		case TOK_NATLOC:
@@ -2872,15 +1850,28 @@
 			natt_local->sadb_address_reserved = 0;
 			natt_local->sadb_address_prefixlen = 0;
 			natt_local->sadb_address_proto = 0;
-			if (natt_lhp == &dummy.he) {
+			if (natt_lhp == &dummy.he ||
+			    natt_lhp->h_addr_list[1] == NULL) {
 				/*
-				 * Single address with -n flag.
+				 * Single address with -n flag or single name.
 				 */
 				sin6 = (struct sockaddr_in6 *)(natt_local + 1);
 				bzero(sin6, sizeof (*sin6));
 				sin6->sin6_family = AF_INET6;
 				bcopy(natt_lhp->h_addr_list[0],
 				    &sin6->sin6_addr, sizeof (struct in6_addr));
+			} else {
+				/*
+				 * If the nat-local address is vague, don't
+				 * bother.
+				 */
+				totallen -= alloclen;
+				free(natt_local);
+				natt_local = NULL;
+				warnx(gettext("Proxy/inner-source address %s "
+				    "is vague, not using."), natt_lhp->h_name);
+				freehostent(natt_lhp);
+				natt_lhp = NULL;
 			}
 			break;
 		case TOK_NATREM:
@@ -2906,15 +1897,28 @@
 			natt_remote->sadb_address_reserved = 0;
 			natt_remote->sadb_address_prefixlen = 0;
 			natt_remote->sadb_address_proto = 0;
-			if (natt_rhp == &dummy.he) {
+			if (natt_rhp == &dummy.he ||
+			    natt_rhp->h_addr_list[1] == NULL) {
 				/*
-				 * Single address with -n flag.
+				 * Single address with -n flag or single name.
 				 */
 				sin6 = (struct sockaddr_in6 *)(natt_remote + 1);
 				bzero(sin6, sizeof (*sin6));
 				sin6->sin6_family = AF_INET6;
 				bcopy(natt_rhp->h_addr_list[0],
 				    &sin6->sin6_addr, sizeof (struct in6_addr));
+			} else {
+				/*
+				 * If the nat-local address is vague, don't
+				 * bother.
+				 */
+				totallen -= alloclen;
+				free(natt_remote);
+				natt_remote = NULL;
+				warnx(gettext("Proxy/inner-source address %s "
+				    "is vague, not using."), natt_rhp->h_name);
+				freehostent(natt_rhp);
+				natt_rhp = NULL;
 			}
 			break;
 		case TOK_ENCRKEY:
@@ -3112,6 +2116,39 @@
 	} while (token != TOK_EOF);
 
 	/*
+	 * If we specify inner ports w/o addresses, we still need to
+	 * allocate.  Also, if we have one inner address, we need the
+	 * other, even if we don't specify anything.
+	 */
+	if (alloc_inner && idst == NULL) {
+		/* Allocate zeroed-out. */
+		alloclen = sizeof (*idst) + sizeof (struct sockaddr_in6);
+		idst = calloc(1, alloclen);
+		if (idst == NULL) {
+			Bail("malloc(implicit idst)");
+		}
+		totallen += alloclen;
+		idst->sadb_address_len = SADB_8TO64(alloclen);
+		idst->sadb_address_exttype = SADB_X_EXT_ADDRESS_INNER_DST;
+		sin6 = (struct sockaddr_in6 *)(idst + 1);
+		sin6->sin6_family = AF_INET6;
+	}
+
+	if (alloc_inner && isrc == NULL) {
+		/* Allocate zeroed-out. */
+		alloclen = sizeof (*isrc) + sizeof (struct sockaddr_in6);
+		isrc = calloc(1, alloclen);
+		if (isrc == NULL) {
+			Bail("malloc(implicit isrc)");
+		}
+		totallen += alloclen;
+		isrc->sadb_address_len = SADB_8TO64(alloclen);
+		isrc->sadb_address_exttype = SADB_X_EXT_ADDRESS_INNER_SRC;
+		sin6 = (struct sockaddr_in6 *)(isrc + 1);
+		sin6->sin6_family = AF_INET6;
+	}
+
+	/*
 	 * Okay, so now I have all of the potential extensions!
 	 * Allocate a single contiguous buffer.  Keep in mind that it'll
 	 * be enough because the key itself will be yanked.
@@ -3135,6 +2172,7 @@
 		bzero(sin6, sizeof (*sin6));
 		sin6->sin6_family = AF_INET6;
 	}
+
 	msg.sadb_msg_len = SADB_8TO64(totallen);
 
 	buffer = malloc(totallen);
@@ -3174,6 +2212,21 @@
 				assoc->sadb_sa_flags |= SADB_X_SAFLAGS_NATT_LOC;
 		}
 
+		if (alloc_inner) {
+			/*
+			 * For now, assume RFC 3884's dream of transport-mode
+			 * SAs with inner IP address selectors will not
+			 * happen.
+			 */
+			assoc->sadb_sa_flags |= SADB_X_SAFLAGS_TUNNEL;
+			if (proto != 0 && proto != IPPROTO_ENCAP &&
+			    proto != IPPROTO_IPV6) {
+				warnx(gettext("WARNING: Protocol type %d not "
+					"for use with Tunnel-Mode SA."), proto);
+				/* Continue and let PF_KEY scream... */
+			}
+		}
+
 		bcopy(assoc, nexthdr, SADB_64TO8(assoc->sadb_sa_len));
 		nexthdr += assoc->sadb_sa_len;
 		/* Save the SPI for the case of an error. */
@@ -3231,6 +2284,8 @@
 		bcopy(dst, nexthdr, SADB_64TO8(dst->sadb_address_len));
 		free(dst);
 		dst = (struct sadb_address *)nexthdr;
+		dst->sadb_address_proto = proto;
+		((struct sockaddr_in6 *)(dst + 1))->sin6_port = htons(dstport);
 		nexthdr += dst->sadb_address_len;
 	} else {
 		warnx(gettext("Need destination address for %s."), thiscmd);
@@ -3258,16 +2313,23 @@
 		}
 
 		if (natt_remote != NULL) {
+			bcopy(natt_remote, nexthdr,
+			    SADB_64TO8(natt_remote->sadb_address_len));
 			free(natt_remote);
 			natt_remote = (struct sadb_address *)nexthdr;
 			nexthdr += natt_remote->sadb_address_len;
+			((struct sockaddr_in6 *)(natt_remote + 1))->sin6_port =
+			    htons(natt_rport);
 		}
+
 		if (natt_local != NULL) {
 			bcopy(natt_local, nexthdr,
 			    SADB_64TO8(natt_local->sadb_address_len));
 			free(natt_local);
 			natt_local = (struct sadb_address *)nexthdr;
 			nexthdr += natt_local->sadb_address_len;
+			((struct sockaddr_in6 *)(natt_local + 1))->sin6_port =
+			    htons(natt_lport);
 		}
 	}
 	/*
@@ -3278,24 +2340,38 @@
 	bcopy(src, nexthdr, SADB_64TO8(src->sadb_address_len));
 	free(src);
 	src = (struct sadb_address *)nexthdr;
+	src->sadb_address_proto = proto;
+	((struct sockaddr_in6 *)(src + 1))->sin6_port = htons(srcport);
 	nexthdr += src->sadb_address_len;
 
-	if (proxy != NULL) {
-		bcopy(proxy, nexthdr, SADB_64TO8(proxy->sadb_address_len));
-		free(proxy);
-		proxy = (struct sadb_address *)nexthdr;
-		nexthdr += proxy->sadb_address_len;
+	if (isrc != NULL) {
+		bcopy(isrc, nexthdr, SADB_64TO8(isrc->sadb_address_len));
+		free(isrc);
+		isrc = (struct sadb_address *)nexthdr;
+		isrc->sadb_address_proto = iproto;
+		((struct sockaddr_in6 *)(isrc + 1))->sin6_port =
+		    htons(isrcport);
+		nexthdr += isrc->sadb_address_len;
+	}
+
+	if (idst != NULL) {
+		bcopy(idst, nexthdr, SADB_64TO8(idst->sadb_address_len));
+		free(idst);
+		idst = (struct sadb_address *)nexthdr;
+		idst->sadb_address_proto = iproto;
+		((struct sockaddr_in6 *)(idst + 1))->sin6_port =
+		    htons(idstport);
+		nexthdr += idst->sadb_address_len;
 	}
 
 	doaddresses((cmd == CMD_ADD) ? SADB_ADD : SADB_UPDATE, satype, cmd,
-	    srchp, dsthp, src, dst, unspec_src, buffer, totallen, spi,
-	    srcport, dstport, proto, natt_lhp, natt_rhp,
-	    natt_local, natt_remote, natt_lport, natt_rport);
-
+	    srchp, dsthp, src, dst, unspec_src, buffer, totallen, spi);
 	free(buffer);
 
-	if (proxyhp != NULL && proxyhp != &dummy.he)
-		freehostent(proxyhp);
+	if (isrchp != NULL && isrchp != &dummy.he)
+		freehostent(isrchp);
+	if (idsthp != NULL && idsthp != &dummy.he)
+		freehostent(idsthp);
 	if (srchp != NULL && srchp != &dummy.he)
 		freehostent(srchp);
 	if (dsthp != NULL && dsthp != &dummy.he)
@@ -3460,15 +2536,19 @@
 	if ((srcport != 0) && (src == NULL)) {
 		ALLOC_ADDR_EXT(src, SADB_EXT_ADDRESS_SRC);
 		sin6 = (struct sockaddr_in6 *)(src + 1);
+		src->sadb_address_proto = proto;
 		bzero(sin6, sizeof (*sin6));
 		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = htons(srcport);
 	}
 
 	if ((dstport != 0) && (dst == NULL)) {
 		ALLOC_ADDR_EXT(dst, SADB_EXT_ADDRESS_DST);
 		sin6 = (struct sockaddr_in6 *)(dst + 1);
+		src->sadb_address_proto = proto;
 		bzero(sin6, sizeof (*sin6));
 		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = htons(dstport);
 	}
 
 	/* So I have enough of the message to send it down! */
@@ -3476,8 +2556,7 @@
 
 	doaddresses((cmd == CMD_GET) ? SADB_GET : SADB_DELETE, satype, cmd,
 	    srchp, dsthp, src, dst, unspec_src, get_buffer,
-	    sizeof (get_buffer), spi, srcport, dstport, proto,
-	    NULL, NULL, NULL, NULL, 0, 0);
+	    sizeof (get_buffer), spi);
 
 	if (srchp != NULL && srchp != &dummy.he)
 		freehostent(srchp);
@@ -3531,67 +2610,12 @@
 		 * Q:  Should I use the same method of printing as GET does?
 		 * A:  For now, yes.
 		 */
-		print_samsg(get_buffer, B_TRUE);
+		print_samsg(get_buffer, B_TRUE, vflag);
 		(void) putchar('\n');
 	}
 }
 
 /*
- * Open the output file for the "save" command.
- */
-static FILE *
-opensavefile(char *filename)
-{
-	int fd;
-	FILE *retval;
-	struct stat buf;
-
-	/*
-	 * If the user specifies "-" or doesn't give a filename, then
-	 * dump to stdout.  Make sure to document the dangers of files
-	 * that are NFS, directing your output to strange places, etc.
-	 */
-	if (filename == NULL || strcmp("-", filename) == 0)
-		return (stdout);
-
-	/*
-	 * open the file with the create bits set.  Since I check for
-	 * real UID == root in main(), I won't worry about the ownership
-	 * problem.
-	 */
-	fd = open(filename, O_WRONLY | O_EXCL | O_CREAT | O_TRUNC, S_IRUSR);
-	if (fd == -1) {
-		if (errno != EEXIST)
-			bail_msg("%s %s: %s", filename, gettext("open error"),
-			    strerror(errno));
-		fd = open(filename, O_WRONLY | O_TRUNC, 0);
-		if (fd == -1)
-			bail_msg("%s %s: %s", filename, gettext("open error"),
-			    strerror(errno));
-		if (fstat(fd, &buf) == -1) {
-			(void) close(fd);
-			bail_msg("%s fstat: %s", filename, strerror(errno));
-		}
-		if (S_ISREG(buf.st_mode) &&
-		    ((buf.st_mode & S_IAMB) != S_IRUSR)) {
-			warnx(gettext("WARNING: Save file already exists with "
-				"permission %o."), buf.st_mode & S_IAMB);
-			warnx(gettext("Normal users may be able to read IPsec "
-				"keying material."));
-		}
-	}
-
-	/* Okay, we have an FD.  Assign it to a stdio FILE pointer. */
-	retval = fdopen(fd, "w");
-	if (retval == NULL) {
-		(void) close(fd);
-		bail_msg("%s %s: %s", filename, gettext("fdopen error"),
-		    strerror(errno));
-	}
-	return (retval);
-}
-
-/*
  * Either mask or unmask all relevant signals.
  */
 static void
@@ -3609,27 +2633,6 @@
 }
 
 /*
- * Wrapper for inet_ntop(3SOCKET). Expects AF_INET6 address.
- * Process the address as a AF_INET address if it is a IPv4 mapped
- * address.
- */
-static const char *
-do_inet_ntop(const void *addr, char *cp, size_t size)
-{
-	boolean_t isv4;
-	struct in6_addr *inaddr6 = (struct in6_addr *)addr;
-	struct in_addr inaddr;
-
-	if ((isv4 = IN6_IS_ADDR_V4MAPPED(inaddr6)) == B_TRUE) {
-		IN6_V4MAPPED_TO_INADDR(inaddr6, &inaddr);
-	}
-
-	return (inet_ntop(isv4 ? AF_INET : AF_INET6,
-	    isv4 ? (void *)&inaddr : inaddr6, cp, size));
-}
-
-
-/*
  * Assorted functions to print help text.
  */
 #define	puts_tr(s) (void) puts(gettext(s))
--- a/usr/src/lib/libipsecutil/Makefile.com	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/lib/libipsecutil/Makefile.com	Fri Nov 03 07:10:24 2006 -0800
@@ -37,7 +37,7 @@
 SRCS = 		$(SRCDIR)/ipsec_util.c $(SRCDIR)/algs.c $(SRCDIR)/err.c
 
 $(LINTLIB):=	SRCS = $(SRCDIR)/$(LINTSRC)
-LDLIBS +=	-lnsl -lc
+LDLIBS +=	-lsocket -lnsl -lc
 
 CFLAGS +=	$(CCVERBOSE)
 CPPFLAGS +=	-I$(SRCDIR)
--- a/usr/src/lib/libipsecutil/common/ipsec_util.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/lib/libipsecutil/common/ipsec_util.c	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -118,13 +117,15 @@
  */
 
 int
-dump_sockaddr(struct sockaddr *sa, boolean_t addr_only, FILE *where)
+dump_sockaddr(struct sockaddr *sa, uint8_t prefixlen, boolean_t addr_only,
+    FILE *where)
 {
 	struct sockaddr_in	*sin;
 	struct sockaddr_in6	*sin6;
 	char			*printable_addr, *protocol;
 	uint8_t			*addrptr;
-	char			storage[INET6_ADDRSTRLEN];
+	/* Add 4 chars to hold '/nnn' for prefixes. */
+	char			storage[INET6_ADDRSTRLEN + 4];
 	uint16_t		port;
 	boolean_t		unspec;
 	struct hostent		*hp;
@@ -157,7 +158,14 @@
 	    NULL) {
 		printable_addr = gettext("<inet_ntop() failed>");
 	} else {
+		char prefix[5];	/* "/nnn" with terminator. */
+
+		(void) snprintf(prefix, sizeof (prefix), "/%d", prefixlen);
 		printable_addr = storage;
+		if (prefixlen != 0) {
+			(void) strlcat(printable_addr, prefix,
+			    sizeof (storage));
+		}
 	}
 	if (addr_only) {
 		if (fprintf(where, "%s", printable_addr) < 0)
@@ -983,6 +991,10 @@
 		return (gettext("number of block sizes inconsistent"));
 	case SPD_DIAGNOSTIC_ALG_MECH_NAME_LEN:
 		return (gettext("invalid mechanism name length"));
+	case SPD_DIAGNOSTIC_NOT_GLOBAL_OP:
+		return (gettext("operation not applicable to all policies"));
+	case SPD_DIAGNOSTIC_NO_TUNNEL_SELECTORS:
+		return (gettext("using selectors on a transport-mode tunnel"));
 	default:
 		return (gettext("unknown diagnostic"));
 	}
@@ -999,7 +1011,7 @@
 keysock_diag(int diagnostic)
 {
 	switch (diagnostic) {
-	case  SADB_X_DIAGNOSTIC_NONE:
+	case SADB_X_DIAGNOSTIC_NONE:
 		return (gettext("No diagnostic"));
 	case SADB_X_DIAGNOSTIC_UNKNOWN_MSG:
 		return (gettext("Unknown message type"));
@@ -1020,7 +1032,7 @@
 	case SADB_X_DIAGNOSTIC_BAD_DST_AF:
 		return (gettext("Bad destination address family"));
 	case SADB_X_DIAGNOSTIC_BAD_PROXY_AF:
-		return (gettext("Bad proxy address family"));
+		return (gettext("Bad inner-source address family"));
 	case SADB_X_DIAGNOSTIC_AF_MISMATCH:
 		return (gettext("Source/destination address family mismatch"));
 	case SADB_X_DIAGNOSTIC_BAD_SRC:
@@ -1104,20 +1116,1396 @@
 	case SADB_X_DIAGNOSTIC_DUPLICATE_KMC:
 		return (gettext("Duplicate key management cookie"));
 	case SADB_X_DIAGNOSTIC_MISSING_NATT_LOC:
-		return (gettext("Missing NATT local address"));
+		return (gettext("Missing NAT-T local address"));
 	case SADB_X_DIAGNOSTIC_MISSING_NATT_REM:
-		return (gettext("Missing NATT remote address"));
+		return (gettext("Missing NAT-T remote address"));
 	case SADB_X_DIAGNOSTIC_DUPLICATE_NATT_LOC:
-		return (gettext("Duplicate NATT local address"));
+		return (gettext("Duplicate NAT-T local address"));
 	case SADB_X_DIAGNOSTIC_DUPLICATE_NATT_REM:
-		return (gettext("Duplicate NATT remote address"));
+		return (gettext("Duplicate NAT-T remote address"));
 	case SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC:
-		return (gettext("Malformed NATT local address"));
+		return (gettext("Malformed NAT-T local address"));
 	case SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM:
-		return (gettext("Malformed NATT remote address"));
+		return (gettext("Malformed NAT-T remote address"));
 	case SADB_X_DIAGNOSTIC_DUPLICATE_NATT_PORTS:
-		return (gettext("Duplicate NATT ports"));
+		return (gettext("Duplicate NAT-T ports"));
+	case SADB_X_DIAGNOSTIC_MISSING_INNER_SRC:
+		return (gettext("Missing inner source address"));
+	case SADB_X_DIAGNOSTIC_MISSING_INNER_DST:
+		return (gettext("Missing inner destination address"));
+	case SADB_X_DIAGNOSTIC_DUPLICATE_INNER_SRC:
+		return (gettext("Duplicate inner source address"));
+	case SADB_X_DIAGNOSTIC_DUPLICATE_INNER_DST:
+		return (gettext("Duplicate inner destination address"));
+	case SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC:
+		return (gettext("Malformed inner source address"));
+	case SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST:
+		return (gettext("Malformed inner destination address"));
+	case SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC:
+		return (gettext("Invalid inner-source prefix length "));
+	case SADB_X_DIAGNOSTIC_PREFIX_INNER_DST:
+		return (gettext("Invalid inner-destination prefix length"));
+	case SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF:
+		return (gettext("Bad inner-destination address family"));
+	case SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH:
+		return (gettext(
+		    "Inner source/destination address family mismatch"));
+	case SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF:
+		return (gettext("Bad NAT-T remote address family"));
+	case SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF:
+		return (gettext("Bad NAT-T local address family"));
+	case SADB_X_DIAGNOSTIC_PROTO_MISMATCH:
+		return (gettext("Source/desination protocol mismatch"));
+	case SADB_X_DIAGNOSTIC_INNER_PROTO_MISMATCH:
+		return (gettext("Inner source/desination protocol mismatch"));
+	case SADB_X_DIAGNOSTIC_DUAL_PORT_SETS:
+		return (gettext("Both inner ports and outer ports are set"));
 	default:
 		return (gettext("Unknown diagnostic code"));
 	}
 }
+
+/*
+ * Convert an IPv6 mask to a prefix len.  I assume all IPv6 masks are
+ * contiguous, so I stop at the first zero bit!
+ */
+int
+in_masktoprefix(uint8_t *mask, boolean_t is_v4mapped)
+{
+	int rc = 0;
+	uint8_t last;
+	int limit = IPV6_ABITS;
+
+	if (is_v4mapped) {
+		mask += ((IPV6_ABITS - IP_ABITS)/8);
+		limit = IP_ABITS;
+	}
+
+	while (*mask == 0xff) {
+		rc += 8;
+		if (rc == limit)
+			return (limit);
+		mask++;
+	}
+
+	last = *mask;
+	while (last != 0) {
+		rc++;
+		last = (last << 1) & 0xff;
+	}
+
+	return (rc);
+}
+
+/*
+ * Expand the diagnostic code into a message.
+ */
+void
+print_diagnostic(FILE *file, uint16_t diagnostic)
+{
+	/* Use two spaces so above strings can fit on the line. */
+	(void) fprintf(file, gettext("  Diagnostic code %u:  %s.\n"),
+	    diagnostic, keysock_diag(diagnostic));
+}
+
+/*
+ * Prints the base PF_KEY message.
+ */
+void
+print_sadb_msg(struct sadb_msg *samsg, time_t wallclock, boolean_t vflag)
+{
+	if (wallclock != 0)
+		printsatime(wallclock, gettext("%sTimestamp: %s\n"), "", NULL,
+		    vflag);
+
+	(void) printf(gettext("Base message (version %u) type "),
+	    samsg->sadb_msg_version);
+	switch (samsg->sadb_msg_type) {
+	case SADB_RESERVED:
+		(void) printf(gettext("RESERVED (warning: set to 0)"));
+		break;
+	case SADB_GETSPI:
+		(void) printf("GETSPI");
+		break;
+	case SADB_UPDATE:
+		(void) printf("UPDATE");
+		break;
+	case SADB_ADD:
+		(void) printf("ADD");
+		break;
+	case SADB_DELETE:
+		(void) printf("DELETE");
+		break;
+	case SADB_GET:
+		(void) printf("GET");
+		break;
+	case SADB_ACQUIRE:
+		(void) printf("ACQUIRE");
+		break;
+	case SADB_REGISTER:
+		(void) printf("REGISTER");
+		break;
+	case SADB_EXPIRE:
+		(void) printf("EXPIRE");
+		break;
+	case SADB_FLUSH:
+		(void) printf("FLUSH");
+		break;
+	case SADB_DUMP:
+		(void) printf("DUMP");
+		break;
+	case SADB_X_PROMISC:
+		(void) printf("X_PROMISC");
+		break;
+	case SADB_X_INVERSE_ACQUIRE:
+		(void) printf("X_INVERSE_ACQUIRE");
+		break;
+	default:
+		(void) printf(gettext("Unknown (%u)"), samsg->sadb_msg_type);
+		break;
+	}
+	(void) printf(gettext(", SA type "));
+
+	switch (samsg->sadb_msg_satype) {
+	case SADB_SATYPE_UNSPEC:
+		(void) printf(gettext("<unspecified/all>"));
+		break;
+	case SADB_SATYPE_AH:
+		(void) printf("AH");
+		break;
+	case SADB_SATYPE_ESP:
+		(void) printf("ESP");
+		break;
+	case SADB_SATYPE_RSVP:
+		(void) printf("RSVP");
+		break;
+	case SADB_SATYPE_OSPFV2:
+		(void) printf("OSPFv2");
+		break;
+	case SADB_SATYPE_RIPV2:
+		(void) printf("RIPv2");
+		break;
+	case SADB_SATYPE_MIP:
+		(void) printf(gettext("Mobile IP"));
+		break;
+	default:
+		(void) printf(gettext("<unknown %u>"), samsg->sadb_msg_satype);
+		break;
+	}
+
+	(void) printf(".\n");
+
+	if (samsg->sadb_msg_errno != 0) {
+		(void) printf(gettext("Error %s from PF_KEY.\n"),
+		    strerror(samsg->sadb_msg_errno));
+		print_diagnostic(stdout, samsg->sadb_x_msg_diagnostic);
+	}
+
+	(void) printf(gettext("Message length %u bytes, seq=%u, pid=%u.\n"),
+	    SADB_64TO8(samsg->sadb_msg_len), samsg->sadb_msg_seq,
+	    samsg->sadb_msg_pid);
+}
+
+/*
+ * Print the SA extension for PF_KEY.
+ */
+void
+print_sa(char *prefix, struct sadb_sa *assoc)
+{
+	if (assoc->sadb_sa_len != SADB_8TO64(sizeof (*assoc))) {
+		warnx(gettext("WARNING: SA info extension length (%u) is bad."),
+		    SADB_64TO8(assoc->sadb_sa_len));
+	}
+
+	(void) printf(gettext("%sSADB_ASSOC spi=0x%x, replay=%u, state="),
+	    prefix, ntohl(assoc->sadb_sa_spi), assoc->sadb_sa_replay);
+	switch (assoc->sadb_sa_state) {
+	case SADB_SASTATE_LARVAL:
+		(void) printf(gettext("LARVAL"));
+		break;
+	case SADB_SASTATE_MATURE:
+		(void) printf(gettext("MATURE"));
+		break;
+	case SADB_SASTATE_DYING:
+		(void) printf(gettext("DYING"));
+		break;
+	case SADB_SASTATE_DEAD:
+		(void) printf(gettext("DEAD"));
+		break;
+	default:
+		(void) printf(gettext("<unknown %u>"), assoc->sadb_sa_state);
+	}
+
+	if (assoc->sadb_sa_auth != SADB_AALG_NONE) {
+		(void) printf(gettext("\n%sAuthentication algorithm = "),
+		    prefix);
+		(void) dump_aalg(assoc->sadb_sa_auth, stdout);
+	}
+
+	if (assoc->sadb_sa_encrypt != SADB_EALG_NONE) {
+		(void) printf(gettext("\n%sEncryption algorithm = "), prefix);
+		(void) dump_ealg(assoc->sadb_sa_encrypt, stdout);
+	}
+
+	(void) printf(gettext("\n%sflags=0x%x < "), prefix,
+	    assoc->sadb_sa_flags);
+	if (assoc->sadb_sa_flags & SADB_SAFLAGS_PFS)
+		(void) printf("PFS ");
+	if (assoc->sadb_sa_flags & SADB_SAFLAGS_NOREPLAY)
+		(void) printf("NOREPLAY ");
+
+	/* BEGIN Solaris-specific flags. */
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_USED)
+		(void) printf("X_USED ");
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_UNIQUE)
+		(void) printf("X_UNIQUE ");
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_AALG1)
+		(void) printf("X_AALG1 ");
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_AALG2)
+		(void) printf("X_AALG2 ");
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_EALG1)
+		(void) printf("X_EALG1 ");
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_EALG2)
+		(void) printf("X_EALG2 ");
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC)
+		(void) printf("X_NATT_LOC ");
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM)
+		(void) printf("X_NATT_REM ");
+	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_TUNNEL)
+		(void) printf("X_TUNNEL ");
+	/* END Solaris-specific flags. */
+
+	(void) printf(">\n");
+}
+
+void
+printsatime(int64_t lt, const char *msg, const char *pfx, const char *pfx2,
+    boolean_t vflag)
+{
+	char tbuf[TBUF_SIZE]; /* For strftime() call. */
+	const char *tp = tbuf;
+	time_t t = lt;
+	struct tm res;
+
+	if (t != lt) {
+		if (lt > 0)
+			t = LONG_MAX;
+		else
+			t = LONG_MIN;
+	}
+
+	if (strftime(tbuf, TBUF_SIZE, NULL, localtime_r(&t, &res)) == 0)
+		tp = gettext("<time conversion failed>");
+	(void) printf(msg, pfx, tp);
+	if (vflag && (pfx2 != NULL))
+		(void) printf(gettext("%s\t(raw time value %llu)\n"), pfx2, lt);
+}
+
+/*
+ * Print the SA lifetime information.  (An SADB_EXT_LIFETIME_* extension.)
+ */
+void
+print_lifetimes(time_t wallclock, struct sadb_lifetime *current,
+    struct sadb_lifetime *hard, struct sadb_lifetime *soft, boolean_t vflag)
+{
+	int64_t scratch;
+	char *soft_prefix = gettext("SLT: ");
+	char *hard_prefix = gettext("HLT: ");
+	char *current_prefix = gettext("CLT: ");
+
+	if (current != NULL &&
+	    current->sadb_lifetime_len != SADB_8TO64(sizeof (*current))) {
+		warnx(gettext("WARNING: CURRENT lifetime extension length "
+			"(%u) is bad."),
+		    SADB_64TO8(current->sadb_lifetime_len));
+	}
+
+	if (hard != NULL &&
+	    hard->sadb_lifetime_len != SADB_8TO64(sizeof (*hard))) {
+		warnx(gettext("WARNING: HARD lifetime "
+			"extension length (%u) is bad."),
+		    SADB_64TO8(hard->sadb_lifetime_len));
+	}
+
+	if (soft != NULL &&
+	    soft->sadb_lifetime_len != SADB_8TO64(sizeof (*soft))) {
+		warnx(gettext("WARNING: SOFT lifetime "
+		    "extension length (%u) is bad."),
+		    SADB_64TO8(soft->sadb_lifetime_len));
+	}
+
+	(void) printf(" LT: Lifetime information\n");
+
+	if (current != NULL) {
+		/* Express values as current values. */
+		(void) printf(gettext(
+		    "%s%llu bytes protected, %u allocations used.\n"),
+		    current_prefix, current->sadb_lifetime_bytes,
+		    current->sadb_lifetime_allocations);
+		printsatime(current->sadb_lifetime_addtime,
+		    gettext("%sSA added at time %s\n"),
+		    current_prefix, current_prefix, vflag);
+		if (current->sadb_lifetime_usetime != 0) {
+			printsatime(current->sadb_lifetime_usetime,
+			    gettext("%sSA first used at time %s\n"),
+			    current_prefix, current_prefix, vflag);
+		}
+		printsatime(wallclock, gettext("%sTime now is %s\n"),
+		    current_prefix, current_prefix, vflag);
+	}
+
+	if (soft != NULL) {
+		(void) printf(gettext("%sSoft lifetime information:  "),
+		    soft_prefix);
+		(void) printf(gettext("%llu bytes of lifetime, %u "
+		    "allocations.\n"), soft->sadb_lifetime_bytes,
+		    soft->sadb_lifetime_allocations);
+		(void) printf(gettext("%s%llu seconds of post-add lifetime.\n"),
+		    soft_prefix, soft->sadb_lifetime_addtime);
+		(void) printf(gettext("%s%llu seconds of post-use lifetime.\n"),
+		    soft_prefix, soft->sadb_lifetime_usetime);
+		/* If possible, express values as time remaining. */
+		if (current != NULL) {
+			if (soft->sadb_lifetime_bytes != 0)
+				(void) printf(gettext(
+				    "%s%llu more bytes can be protected.\n"),
+				    soft_prefix,
+				    (soft->sadb_lifetime_bytes >
+					current->sadb_lifetime_bytes) ?
+				    (soft->sadb_lifetime_bytes -
+					current->sadb_lifetime_bytes) : (0));
+			if (soft->sadb_lifetime_addtime != 0 ||
+			    (soft->sadb_lifetime_usetime != 0 &&
+				current->sadb_lifetime_usetime != 0)) {
+				int64_t adddelta, usedelta;
+
+				if (soft->sadb_lifetime_addtime != 0) {
+					adddelta =
+					    current->sadb_lifetime_addtime +
+					    soft->sadb_lifetime_addtime -
+					    wallclock;
+				} else {
+					adddelta = TIME_MAX;
+				}
+
+				if (soft->sadb_lifetime_usetime != 0 &&
+				    current->sadb_lifetime_usetime != 0) {
+					usedelta =
+					    current->sadb_lifetime_usetime +
+					    soft->sadb_lifetime_usetime -
+					    wallclock;
+				} else {
+					usedelta = TIME_MAX;
+				}
+				(void) printf("%s", soft_prefix);
+				scratch = MIN(adddelta, usedelta);
+				if (scratch >= 0) {
+					(void) printf(gettext("Soft expiration "
+					    "occurs in %lld seconds, "),
+					    scratch);
+				} else {
+					(void) printf(gettext(
+					    "Soft expiration occurred "));
+				}
+				scratch += wallclock;
+				printsatime(scratch, gettext("%sat %s.\n"), "",
+				    soft_prefix, vflag);
+			}
+		}
+	}
+
+	if (hard != NULL) {
+		(void) printf(gettext("%sHard lifetime information:  "),
+		    hard_prefix);
+		(void) printf(gettext("%llu bytes of lifetime, "
+		    "%u allocations.\n"), hard->sadb_lifetime_bytes,
+		    hard->sadb_lifetime_allocations);
+		(void) printf(gettext("%s%llu seconds of post-add lifetime.\n"),
+		    hard_prefix, hard->sadb_lifetime_addtime);
+		(void) printf(gettext("%s%llu seconds of post-use lifetime.\n"),
+		    hard_prefix, hard->sadb_lifetime_usetime);
+		/* If possible, express values as time remaining. */
+		if (current != NULL) {
+			if (hard->sadb_lifetime_bytes != 0)
+				(void) printf(gettext(
+				    "%s%llu more bytes can be protected.\n"),
+				    hard_prefix,
+				    (hard->sadb_lifetime_bytes >
+					current->sadb_lifetime_bytes) ?
+				    (hard->sadb_lifetime_bytes -
+					current->sadb_lifetime_bytes) : (0));
+			if (hard->sadb_lifetime_addtime != 0 ||
+			    (hard->sadb_lifetime_usetime != 0 &&
+				current->sadb_lifetime_usetime != 0)) {
+				int64_t adddelta, usedelta;
+
+				if (hard->sadb_lifetime_addtime != 0) {
+					adddelta =
+					    current->sadb_lifetime_addtime +
+					    hard->sadb_lifetime_addtime -
+					    wallclock;
+				} else {
+					adddelta = TIME_MAX;
+				}
+
+				if (hard->sadb_lifetime_usetime != 0 &&
+				    current->sadb_lifetime_usetime != 0) {
+					usedelta =
+					    current->sadb_lifetime_usetime +
+					    hard->sadb_lifetime_usetime -
+					    wallclock;
+				} else {
+					usedelta = TIME_MAX;
+				}
+				(void) printf("%s", hard_prefix);
+				scratch = MIN(adddelta, usedelta);
+				if (scratch >= 0) {
+					(void) printf(gettext("Hard expiration "
+					    "occurs in %lld seconds, "),
+					    scratch);
+				} else {
+					(void) printf(gettext(
+					    "Hard expiration occured "));
+				}
+				scratch += wallclock;
+				printsatime(scratch, gettext("%sat %s.\n"), "",
+				    hard_prefix, vflag);
+			}
+		}
+	}
+}
+
+/*
+ * Print an SADB_EXT_ADDRESS_* extension.
+ */
+void
+print_address(char *prefix, struct sadb_address *addr)
+{
+	struct protoent *pe;
+
+	(void) printf("%s", prefix);
+	switch (addr->sadb_address_exttype) {
+	case SADB_EXT_ADDRESS_SRC:
+		(void) printf(gettext("Source address "));
+		break;
+	case SADB_X_EXT_ADDRESS_INNER_SRC:
+		(void) printf(gettext("Inner source address "));
+		break;
+	case SADB_EXT_ADDRESS_DST:
+		(void) printf(gettext("Destination address "));
+		break;
+	case SADB_X_EXT_ADDRESS_INNER_DST:
+		(void) printf(gettext("Inner destination address "));
+		break;
+	case SADB_X_EXT_ADDRESS_NATT_LOC:
+		(void) printf(gettext("NATT local address "));
+		break;
+	case SADB_X_EXT_ADDRESS_NATT_REM:
+		(void) printf(gettext("NATT remote address "));
+		break;
+	}
+
+	(void) printf(gettext("(proto=%d"), addr->sadb_address_proto);
+	if (!nflag) {
+		if (addr->sadb_address_proto == 0) {
+			(void) printf(gettext("/<unspecified>"));
+		} else if ((pe = getprotobynumber(addr->sadb_address_proto))
+		    != NULL) {
+			(void) printf("/%s", pe->p_name);
+		} else {
+			(void) printf(gettext("/<unknown>"));
+		}
+	}
+	(void) printf(gettext(")\n%s"), prefix);
+	(void) dump_sockaddr((struct sockaddr *)(addr + 1),
+	    addr->sadb_address_prefixlen, B_FALSE, stdout);
+}
+
+/*
+ * Print an SADB_EXT_KEY extension.
+ */
+void
+print_key(char *prefix, struct sadb_key *key)
+{
+	(void) printf("%s", prefix);
+
+	switch (key->sadb_key_exttype) {
+	case SADB_EXT_KEY_AUTH:
+		(void) printf(gettext("Authentication"));
+		break;
+	case SADB_EXT_KEY_ENCRYPT:
+		(void) printf(gettext("Encryption"));
+		break;
+	}
+
+	(void) printf(gettext(" key.\n%s"), prefix);
+	(void) dump_key((uint8_t *)(key + 1), key->sadb_key_bits, stdout);
+	(void) putchar('\n');
+}
+
+/*
+ * Print an SADB_EXT_IDENTITY_* extension.
+ */
+void
+print_ident(char *prefix, struct sadb_ident *id)
+{
+	boolean_t canprint = B_TRUE;
+
+	(void) printf("%s", prefix);
+	switch (id->sadb_ident_exttype) {
+	case SADB_EXT_IDENTITY_SRC:
+		(void) printf(gettext("Source"));
+		break;
+	case SADB_EXT_IDENTITY_DST:
+		(void) printf(gettext("Destination"));
+		break;
+	}
+
+	(void) printf(gettext(" identity, uid=%d, type "), id->sadb_ident_id);
+	canprint = dump_sadb_idtype(id->sadb_ident_type, stdout, NULL);
+	(void) printf("\n%s", prefix);
+	if (canprint)
+		(void) printf("%s\n", (char *)(id + 1));
+	else
+		(void) printf(gettext("<cannot print>\n"));
+}
+
+/*
+ * Print an SADB_SENSITIVITY extension.
+ */
+void
+print_sens(char *prefix, struct sadb_sens *sens)
+{
+	uint64_t *bitmap = (uint64_t *)(sens + 1);
+	int i;
+
+	(void) printf(
+	    gettext("%sSensitivity DPD %d, sens level=%d, integ level=%d\n"),
+	    prefix, sens->sadb_sens_dpd, sens->sadb_sens_sens_level,
+	    sens->sadb_sens_integ_level);
+	for (i = 0; sens->sadb_sens_sens_len-- > 0; i++, bitmap++)
+		(void) printf(
+		    gettext("%s Sensitivity BM extended word %d 0x%llx\n"),
+		    i, *bitmap);
+	for (i = 0; sens->sadb_sens_integ_len-- > 0; i++, bitmap++)
+		(void) printf(
+		    gettext("%s Integrity BM extended word %d 0x%llx\n"),
+		    i, *bitmap);
+}
+
+/*
+ * Print an SADB_EXT_PROPOSAL extension.
+ */
+void
+print_prop(char *prefix, struct sadb_prop *prop)
+{
+	struct sadb_comb *combs;
+	int i, numcombs;
+
+	(void) printf(gettext("%sProposal, replay counter = %u.\n"), prefix,
+	    prop->sadb_prop_replay);
+
+	numcombs = prop->sadb_prop_len - SADB_8TO64(sizeof (*prop));
+	numcombs /= SADB_8TO64(sizeof (*combs));
+
+	combs = (struct sadb_comb *)(prop + 1);
+
+	for (i = 0; i < numcombs; i++) {
+		(void) printf(gettext("%s Combination #%u "), prefix, i + 1);
+		if (combs[i].sadb_comb_auth != SADB_AALG_NONE) {
+			(void) printf(gettext("Authentication = "));
+			(void) dump_aalg(combs[i].sadb_comb_auth, stdout);
+			(void) printf(gettext("  minbits=%u, maxbits=%u.\n%s "),
+			    combs[i].sadb_comb_auth_minbits,
+			    combs[i].sadb_comb_auth_maxbits, prefix);
+		}
+
+		if (combs[i].sadb_comb_encrypt != SADB_EALG_NONE) {
+			(void) printf(gettext("Encryption = "));
+			(void) dump_ealg(combs[i].sadb_comb_encrypt, stdout);
+			(void) printf(gettext("  minbits=%u, maxbits=%u.\n%s "),
+			    combs[i].sadb_comb_encrypt_minbits,
+			    combs[i].sadb_comb_encrypt_maxbits, prefix);
+		}
+
+		(void) printf(gettext("HARD: "));
+		if (combs[i].sadb_comb_hard_allocations)
+			(void) printf(gettext("alloc=%u "),
+			    combs[i].sadb_comb_hard_allocations);
+		if (combs[i].sadb_comb_hard_bytes)
+			(void) printf(gettext("bytes=%llu "),
+			    combs[i].sadb_comb_hard_bytes);
+		if (combs[i].sadb_comb_hard_addtime)
+			(void) printf(gettext("post-add secs=%llu "),
+			    combs[i].sadb_comb_hard_addtime);
+		if (combs[i].sadb_comb_hard_usetime)
+			(void) printf(gettext("post-use secs=%llu"),
+			    combs[i].sadb_comb_hard_usetime);
+
+		(void) printf(gettext("\n%s SOFT: "), prefix);
+		if (combs[i].sadb_comb_soft_allocations)
+			(void) printf(gettext("alloc=%u "),
+			    combs[i].sadb_comb_soft_allocations);
+		if (combs[i].sadb_comb_soft_bytes)
+			(void) printf(gettext("bytes=%llu "),
+			    combs[i].sadb_comb_soft_bytes);
+		if (combs[i].sadb_comb_soft_addtime)
+			(void) printf(gettext("post-add secs=%llu "),
+			    combs[i].sadb_comb_soft_addtime);
+		if (combs[i].sadb_comb_soft_usetime)
+			(void) printf(gettext("post-use secs=%llu"),
+			    combs[i].sadb_comb_soft_usetime);
+		(void) putchar('\n');
+	}
+}
+
+/*
+ * Print an extended proposal (SADB_X_EXT_EPROP).
+ */
+void
+print_eprop(char *prefix, struct sadb_prop *eprop)
+{
+	uint64_t *sofar;
+	struct sadb_x_ecomb *ecomb;
+	struct sadb_x_algdesc *algdesc;
+	int i, j;
+
+	(void) printf(gettext("%sExtended Proposal, replay counter = %u, "),
+	    prefix, eprop->sadb_prop_replay);
+	(void) printf(gettext("number of combinations = %u.\n"),
+	    eprop->sadb_x_prop_numecombs);
+
+	sofar = (uint64_t *)(eprop + 1);
+	ecomb = (struct sadb_x_ecomb *)sofar;
+
+	for (i = 0; i < eprop->sadb_x_prop_numecombs; ) {
+		(void) printf(gettext("%s Extended combination #%u:\n"),
+		    prefix, ++i);
+
+		(void) printf(gettext("%s HARD: "), prefix);
+		(void) printf(gettext("alloc=%u, "),
+		    ecomb->sadb_x_ecomb_hard_allocations);
+		(void) printf(gettext("bytes=%llu, "),
+		    ecomb->sadb_x_ecomb_hard_bytes);
+		(void) printf(gettext("post-add secs=%llu, "),
+		    ecomb->sadb_x_ecomb_hard_addtime);
+		(void) printf(gettext("post-use secs=%llu\n"),
+		    ecomb->sadb_x_ecomb_hard_usetime);
+
+		(void) printf(gettext("%s SOFT: "), prefix);
+		(void) printf(gettext("alloc=%u, "),
+		    ecomb->sadb_x_ecomb_soft_allocations);
+		(void) printf(gettext("bytes=%llu, "),
+		    ecomb->sadb_x_ecomb_soft_bytes);
+		(void) printf(gettext("post-add secs=%llu, "),
+		    ecomb->sadb_x_ecomb_soft_addtime);
+		(void) printf(gettext("post-use secs=%llu\n"),
+		    ecomb->sadb_x_ecomb_soft_usetime);
+
+		sofar = (uint64_t *)(ecomb + 1);
+		algdesc = (struct sadb_x_algdesc *)sofar;
+
+		for (j = 0; j < ecomb->sadb_x_ecomb_numalgs; ) {
+			(void) printf(gettext("%s Alg #%u "), prefix, ++j);
+			switch (algdesc->sadb_x_algdesc_satype) {
+			case SADB_SATYPE_ESP:
+				(void) printf(gettext("for ESP "));
+				break;
+			case SADB_SATYPE_AH:
+				(void) printf(gettext("for AH "));
+				break;
+			default:
+				(void) printf(gettext("for satype=%d "),
+				    algdesc->sadb_x_algdesc_satype);
+			}
+			switch (algdesc->sadb_x_algdesc_algtype) {
+			case SADB_X_ALGTYPE_CRYPT:
+				(void) printf(gettext("Encryption = "));
+				(void) dump_ealg(algdesc->sadb_x_algdesc_alg,
+				    stdout);
+				break;
+			case SADB_X_ALGTYPE_AUTH:
+				(void) printf(gettext("Authentication = "));
+				(void) dump_aalg(algdesc->sadb_x_algdesc_alg,
+				    stdout);
+				break;
+			default:
+				(void) printf(gettext("algtype(%d) = alg(%d)"),
+				    algdesc->sadb_x_algdesc_algtype,
+				    algdesc->sadb_x_algdesc_alg);
+				break;
+			}
+
+			(void) printf(gettext("  minbits=%u, maxbits=%u.\n"),
+			    algdesc->sadb_x_algdesc_minbits,
+			    algdesc->sadb_x_algdesc_maxbits);
+
+			sofar = (uint64_t *)(++algdesc);
+		}
+		ecomb = (struct sadb_x_ecomb *)sofar;
+	}
+}
+
+/*
+ * Print an SADB_EXT_SUPPORTED extension.
+ */
+void
+print_supp(char *prefix, struct sadb_supported *supp)
+{
+	struct sadb_alg *algs;
+	int i, numalgs;
+
+	(void) printf(gettext("%sSupported "), prefix);
+	switch (supp->sadb_supported_exttype) {
+	case SADB_EXT_SUPPORTED_AUTH:
+		(void) printf(gettext("authentication"));
+		break;
+	case SADB_EXT_SUPPORTED_ENCRYPT:
+		(void) printf(gettext("encryption"));
+		break;
+	}
+	(void) printf(gettext(" algorithms.\n"));
+
+	algs = (struct sadb_alg *)(supp + 1);
+	numalgs = supp->sadb_supported_len - SADB_8TO64(sizeof (*supp));
+	numalgs /= SADB_8TO64(sizeof (*algs));
+	for (i = 0; i < numalgs; i++) {
+		(void) printf("%s", prefix);
+		switch (supp->sadb_supported_exttype) {
+		case SADB_EXT_SUPPORTED_AUTH:
+			(void) dump_aalg(algs[i].sadb_alg_id, stdout);
+			break;
+		case SADB_EXT_SUPPORTED_ENCRYPT:
+			(void) dump_ealg(algs[i].sadb_alg_id, stdout);
+			break;
+		}
+		(void) printf(gettext(" minbits=%u, maxbits=%u, ivlen=%u.\n"),
+		    algs[i].sadb_alg_minbits, algs[i].sadb_alg_maxbits,
+		    algs[i].sadb_alg_ivlen);
+	}
+}
+
+/*
+ * Print an SADB_EXT_SPIRANGE extension.
+ */
+void
+print_spirange(char *prefix, struct sadb_spirange *range)
+{
+	(void) printf(gettext("%sSPI Range, min=0x%x, max=0x%x\n"), prefix,
+	    htonl(range->sadb_spirange_min),
+	    htonl(range->sadb_spirange_max));
+}
+
+/*
+ * Print an SADB_X_EXT_KM_COOKIE extension.
+ */
+
+void
+print_kmc(char *prefix, struct sadb_x_kmc *kmc)
+{
+	char *cookie_label;
+
+	if ((cookie_label = kmc_lookup_by_cookie(kmc->sadb_x_kmc_cookie)) ==
+	    NULL)
+		cookie_label = gettext("<Label not found.>");
+
+	(void) printf(gettext("%sProtocol %u, cookie=\"%s\" (%u)\n"), prefix,
+	    kmc->sadb_x_kmc_proto, cookie_label, kmc->sadb_x_kmc_cookie);
+}
+
+/*
+ * Take a PF_KEY message pointed to buffer and print it.  Useful for DUMP
+ * and GET.
+ */
+void
+print_samsg(uint64_t *buffer, boolean_t want_timestamp, boolean_t vflag)
+{
+	uint64_t *current;
+	struct sadb_msg *samsg = (struct sadb_msg *)buffer;
+	struct sadb_ext *ext;
+	struct sadb_lifetime *currentlt = NULL, *hardlt = NULL, *softlt = NULL;
+	int i;
+	time_t wallclock;
+
+	(void) time(&wallclock);
+
+	print_sadb_msg(samsg, want_timestamp ? wallclock : 0, vflag);
+	current = (uint64_t *)(samsg + 1);
+	while (current - buffer < samsg->sadb_msg_len) {
+		int lenbytes;
+
+		ext = (struct sadb_ext *)current;
+		lenbytes = SADB_64TO8(ext->sadb_ext_len);
+		switch (ext->sadb_ext_type) {
+		case SADB_EXT_SA:
+			print_sa(gettext("SA: "), (struct sadb_sa *)current);
+			break;
+		/*
+		 * Pluck out lifetimes and print them at the end.  This is
+		 * to show relative lifetimes.
+		 */
+		case SADB_EXT_LIFETIME_CURRENT:
+			currentlt = (struct sadb_lifetime *)current;
+			break;
+		case SADB_EXT_LIFETIME_HARD:
+			hardlt = (struct sadb_lifetime *)current;
+			break;
+		case SADB_EXT_LIFETIME_SOFT:
+			softlt = (struct sadb_lifetime *)current;
+			break;
+
+		case SADB_EXT_ADDRESS_SRC:
+			print_address(gettext("SRC: "),
+			    (struct sadb_address *)current);
+			break;
+		case SADB_X_EXT_ADDRESS_INNER_SRC:
+			print_address(gettext("INS: "),
+			    (struct sadb_address *)current);
+			break;
+		case SADB_EXT_ADDRESS_DST:
+			print_address(gettext("DST: "),
+			    (struct sadb_address *)current);
+			break;
+		case SADB_X_EXT_ADDRESS_INNER_DST:
+			print_address(gettext("IND: "),
+			    (struct sadb_address *)current);
+			break;
+		case SADB_EXT_KEY_AUTH:
+			print_key(gettext("AKY: "), (struct sadb_key *)current);
+			break;
+		case SADB_EXT_KEY_ENCRYPT:
+			print_key(gettext("EKY: "), (struct sadb_key *)current);
+			break;
+		case SADB_EXT_IDENTITY_SRC:
+			print_ident(gettext("SID: "),
+			    (struct sadb_ident *)current);
+			break;
+		case SADB_EXT_IDENTITY_DST:
+			print_ident(gettext("DID: "),
+			    (struct sadb_ident *)current);
+			break;
+		case SADB_EXT_SENSITIVITY:
+			print_sens(gettext("SNS: "),
+			    (struct sadb_sens *)current);
+			break;
+		case SADB_EXT_PROPOSAL:
+			print_prop(gettext("PRP: "),
+			    (struct sadb_prop *)current);
+			break;
+		case SADB_EXT_SUPPORTED_AUTH:
+			print_supp(gettext("SUA: "),
+			    (struct sadb_supported *)current);
+			break;
+		case SADB_EXT_SUPPORTED_ENCRYPT:
+			print_supp(gettext("SUE: "),
+			    (struct sadb_supported *)current);
+			break;
+		case SADB_EXT_SPIRANGE:
+			print_spirange(gettext("SPR: "),
+			    (struct sadb_spirange *)current);
+			break;
+		case SADB_X_EXT_EPROP:
+			print_eprop(gettext("EPR: "),
+			    (struct sadb_prop *)current);
+			break;
+		case SADB_X_EXT_KM_COOKIE:
+			print_kmc(gettext("KMC: "),
+			    (struct sadb_x_kmc *)current);
+			break;
+		case SADB_X_EXT_ADDRESS_NATT_REM:
+			print_address(gettext("NRM: "),
+			    (struct sadb_address *)current);
+			break;
+		case SADB_X_EXT_ADDRESS_NATT_LOC:
+			print_address(gettext("NLC: "),
+			    (struct sadb_address *)current);
+			break;
+		default:
+			(void) printf(gettext(
+			    "UNK: Unknown ext. %d, len %d.\n"),
+			    ext->sadb_ext_type, lenbytes);
+			for (i = 0; i < ext->sadb_ext_len; i++)
+				(void) printf(gettext("UNK: 0x%llx\n"),
+				    ((uint64_t *)ext)[i]);
+			break;
+		}
+		current += (lenbytes == 0) ?
+		    SADB_8TO64(sizeof (struct sadb_ext)) : ext->sadb_ext_len;
+	}
+	/*
+	 * Print lifetimes NOW.
+	 */
+	if (currentlt != NULL || hardlt != NULL || softlt != NULL)
+		print_lifetimes(wallclock, currentlt, hardlt, softlt, vflag);
+
+	if (current - buffer != samsg->sadb_msg_len) {
+		warnx(gettext("WARNING: insufficient buffer "
+			"space or corrupt message."));
+	}
+
+	(void) fflush(stdout);	/* Make sure our message is out there. */
+}
+
+/*
+ * save_XXX functions are used when "saving" the SA tables to either a
+ * file or standard output.  They use the dump_XXX functions where needed,
+ * but mostly they use the rparseXXX functions.
+ */
+
+/*
+ * Print save information for a lifetime extension.
+ *
+ * NOTE : It saves the lifetime in absolute terms.  For example, if you
+ * had a hard_usetime of 60 seconds, you'll save it as 60 seconds, even though
+ * there may have been 59 seconds burned off the clock.
+ */
+boolean_t
+save_lifetime(struct sadb_lifetime *lifetime, FILE *ofile)
+{
+	char *prefix;
+
+	prefix = (lifetime->sadb_lifetime_exttype == SADB_EXT_LIFETIME_SOFT) ?
+	    "soft" : "hard";
+
+	if (putc('\t', ofile) == EOF)
+		return (B_FALSE);
+
+	if (lifetime->sadb_lifetime_allocations != 0 && fprintf(ofile,
+	    "%s_alloc %u ", prefix, lifetime->sadb_lifetime_allocations) < 0)
+		return (B_FALSE);
+
+	if (lifetime->sadb_lifetime_bytes != 0 && fprintf(ofile,
+	    "%s_bytes %llu ", prefix, lifetime->sadb_lifetime_bytes) < 0)
+		return (B_FALSE);
+
+	if (lifetime->sadb_lifetime_addtime != 0 && fprintf(ofile,
+	    "%s_addtime %llu ", prefix, lifetime->sadb_lifetime_addtime) < 0)
+		return (B_FALSE);
+
+	if (lifetime->sadb_lifetime_usetime != 0 && fprintf(ofile,
+	    "%s_usetime %llu ", prefix, lifetime->sadb_lifetime_usetime) < 0)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/*
+ * Print save information for an address extension.
+ */
+boolean_t
+save_address(struct sadb_address *addr, FILE *ofile)
+{
+	char *printable_addr, buf[INET6_ADDRSTRLEN];
+	const char *prefix, *pprefix;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(addr + 1);
+	struct sockaddr_in *sin = (struct sockaddr_in *)sin6;
+	int af = sin->sin_family;
+
+	/*
+	 * Address-family reality check.
+	 */
+	if (af != AF_INET6 && af != AF_INET)
+		return (B_FALSE);
+
+	switch (addr->sadb_address_exttype) {
+	case SADB_EXT_ADDRESS_SRC:
+		prefix = "src";
+		pprefix = "sport";
+		break;
+	case SADB_X_EXT_ADDRESS_INNER_SRC:
+		prefix = "isrc";
+		pprefix = "isport";
+		break;
+	case SADB_EXT_ADDRESS_DST:
+		prefix = "dst";
+		pprefix = "dport";
+		break;
+	case SADB_X_EXT_ADDRESS_INNER_DST:
+		prefix = "idst";
+		pprefix = "idport";
+		break;
+	case SADB_X_EXT_ADDRESS_NATT_LOC:
+		prefix = "nat_loc ";
+		pprefix = "nat_lport";
+		break;
+	case SADB_X_EXT_ADDRESS_NATT_REM:
+		prefix = "nat_rem ";
+		pprefix = "nat_rport";
+		break;
+	}
+
+	if (fprintf(ofile, "    %s ", prefix) < 0)
+		return (B_FALSE);
+
+	/*
+	 * Do not do address-to-name translation, given that we live in
+	 * an age of names that explode into many addresses.
+	 */
+	printable_addr = (char *)inet_ntop(af,
+	    (af == AF_INET) ? (char *)&sin->sin_addr : (char *)&sin6->sin6_addr,
+	    buf, sizeof (buf));
+	if (printable_addr == NULL)
+		printable_addr = "<inet_ntop() failed>";
+	if (fprintf(ofile, "%s", printable_addr) < 0)
+		return (B_FALSE);
+	if (addr->sadb_address_prefixlen != 0 &&
+	    !((addr->sadb_address_prefixlen == 32 && af == AF_INET) ||
+		(addr->sadb_address_prefixlen == 128 && af == AF_INET6))) {
+		if (fprintf(ofile, "/%d", addr->sadb_address_prefixlen) < 0)
+			return (B_FALSE);
+	}
+
+	/*
+	 * The port is in the same position for struct sockaddr_in and
+	 * struct sockaddr_in6.  We exploit that property here.
+	 */
+	if ((pprefix != NULL) && (sin->sin_port != 0))
+		(void) fprintf(ofile, " %s %d", pprefix, ntohs(sin->sin_port));
+
+	return (B_TRUE);
+}
+
+/*
+ * Print save information for a key extension. Returns whether writing
+ * to the specified output file was successful or not.
+ */
+boolean_t
+save_key(struct sadb_key *key, FILE *ofile)
+{
+	char *prefix;
+
+	if (putc('\t', ofile) == EOF)
+		return (B_FALSE);
+
+	prefix = (key->sadb_key_exttype == SADB_EXT_KEY_AUTH) ? "auth" : "encr";
+
+	if (fprintf(ofile, "%skey ", prefix) < 0)
+		return (B_FALSE);
+
+	if (dump_key((uint8_t *)(key + 1), key->sadb_key_bits, ofile) == -1)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+/*
+ * Print save information for an identity extension.
+ */
+boolean_t
+save_ident(struct sadb_ident *ident, FILE *ofile)
+{
+	char *prefix;
+
+	if (putc('\t', ofile) == EOF)
+		return (B_FALSE);
+
+	prefix = (ident->sadb_ident_exttype == SADB_EXT_IDENTITY_SRC) ? "src" :
+	    "dst";
+
+	if (fprintf(ofile, "%sidtype %s ", prefix,
+	    rparseidtype(ident->sadb_ident_type)) < 0)
+		return (B_FALSE);
+
+	if (ident->sadb_ident_type == SADB_X_IDENTTYPE_DN ||
+	    ident->sadb_ident_type == SADB_X_IDENTTYPE_GN) {
+		if (fprintf(ofile, gettext("<can-not-print>")) < 0)
+			return (B_FALSE);
+	} else {
+		if (fprintf(ofile, "%s", (char *)(ident + 1)) < 0)
+			return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+/*
+ * "Save" a security association to an output file.
+ *
+ * NOTE the lack of calls to gettext() because I'm outputting parseable stuff.
+ * ALSO NOTE that if you change keywords (see parsecmd()), you'll have to
+ * change them here as well.
+ */
+void
+save_assoc(uint64_t *buffer, FILE *ofile)
+{
+	int seen_proto = 0;
+	uint64_t *current;
+	struct sadb_address *addr;
+	struct sadb_msg *samsg = (struct sadb_msg *)buffer;
+	struct sadb_ext *ext;
+#define	bail2(s)	do { \
+				int t = errno; \
+				(void) fclose(ofile); \
+				errno = t; \
+				interactive = B_FALSE;	/* Guarantees exit. */ \
+				Bail(s); \
+			} while (B_FALSE)	/* How do I lint-clean this? */
+
+#define	savenl() if (fputs(" \\\n", ofile) == EOF) { bail2("savenl"); }
+
+	if (fputs("# begin assoc\n", ofile) == EOF)
+		Bail("save_assoc: Opening comment of SA");
+	if (fprintf(ofile, "add %s ", rparsesatype(samsg->sadb_msg_satype)) < 0)
+		Bail("save_assoc: First line of SA");
+	/* LINTED E_CONST_COND */
+	savenl();
+
+	current = (uint64_t *)(samsg + 1);
+	while (current - buffer < samsg->sadb_msg_len) {
+		struct sadb_sa *assoc;
+
+		ext = (struct sadb_ext *)current;
+		switch (ext->sadb_ext_type) {
+		case SADB_EXT_SA:
+			assoc = (struct sadb_sa *)ext;
+			if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
+				if (fprintf(ofile, "# WARNING: SA was dying "
+				    "or dead.\n") < 0) {
+					/* LINTED E_CONST_COND */
+					bail2("save_assoc: fprintf not mature");
+				}
+			}
+			if (fprintf(ofile, "    spi 0x%x ",
+			    ntohl(assoc->sadb_sa_spi)) < 0)
+				/* LINTED E_CONST_COND */
+				bail2("save_assoc: fprintf spi");
+			if (assoc->sadb_sa_encrypt != SADB_EALG_NONE) {
+				if (fprintf(ofile, "encr_alg %s ",
+				    rparsealg(assoc->sadb_sa_encrypt,
+					IPSEC_PROTO_ESP)) < 0)
+					/* LINTED E_CONST_COND */
+					bail2("save_assoc: fprintf encrypt");
+			}
+			if (assoc->sadb_sa_auth != SADB_AALG_NONE) {
+				if (fprintf(ofile, "auth_alg %s ",
+				    rparsealg(assoc->sadb_sa_auth,
+					IPSEC_PROTO_AH)) < 0)
+					/* LINTED E_CONST_COND */
+					bail2("save_assoc: fprintf auth");
+			}
+			if (fprintf(ofile, "replay %d ",
+			    assoc->sadb_sa_replay) < 0)
+				/* LINTED E_CONST_COND */
+				bail2("save_assoc: fprintf replay");
+			if (assoc->sadb_sa_flags & (SADB_X_SAFLAGS_NATT_LOC |
+			    SADB_X_SAFLAGS_NATT_REM)) {
+				if (fprintf(ofile, "encap udp") < 0)
+					/* LINTED E_CONST_COND */
+					bail2("save_assoc: fprintf encap");
+			}
+			/* LINTED E_CONST_COND */
+			savenl();
+			break;
+		case SADB_EXT_LIFETIME_HARD:
+		case SADB_EXT_LIFETIME_SOFT:
+			if (!save_lifetime((struct sadb_lifetime *)ext, ofile))
+				/* LINTED E_CONST_COND */
+				bail2("save_lifetime");
+			/* LINTED E_CONST_COND */
+			savenl();
+			break;
+		case SADB_EXT_ADDRESS_SRC:
+		case SADB_EXT_ADDRESS_DST:
+		case SADB_X_EXT_ADDRESS_INNER_SRC:
+		case SADB_X_EXT_ADDRESS_INNER_DST:
+		case SADB_X_EXT_ADDRESS_NATT_REM:
+		case SADB_X_EXT_ADDRESS_NATT_LOC:
+			addr = (struct sadb_address *)ext;
+			if (!seen_proto && addr->sadb_address_proto) {
+				(void) fprintf(ofile, "    proto %d",
+				    addr->sadb_address_proto);
+				/* LINTED E_CONST_COND */
+				savenl();
+				seen_proto = 1;
+			}
+			if (!save_address(addr, ofile))
+				/* LINTED E_CONST_COND */
+				bail2("save_address");
+			/* LINTED E_CONST_COND */
+			savenl();
+			break;
+		case SADB_EXT_KEY_AUTH:
+		case SADB_EXT_KEY_ENCRYPT:
+			if (!save_key((struct sadb_key *)ext, ofile))
+				/* LINTED E_CONST_COND */
+				bail2("save_address");
+			/* LINTED E_CONST_COND */
+			savenl();
+			break;
+		case SADB_EXT_IDENTITY_SRC:
+		case SADB_EXT_IDENTITY_DST:
+			if (!save_ident((struct sadb_ident *)ext, ofile))
+				/* LINTED E_CONST_COND */
+				bail2("save_address");
+			/* LINTED E_CONST_COND */
+			savenl();
+			break;
+		case SADB_EXT_SENSITIVITY:
+		default:
+			/* Skip over irrelevant extensions. */
+			break;
+		}
+		current += ext->sadb_ext_len;
+	}
+
+	if (fputs(gettext("\n# end assoc\n\n"), ofile) == EOF)
+		/* LINTED E_CONST_COND */
+		bail2("save_assoc: last fputs");
+}
+
+/*
+ * Open the output file for the "save" command.
+ */
+FILE *
+opensavefile(char *filename)
+{
+	int fd;
+	FILE *retval;
+	struct stat buf;
+
+	/*
+	 * If the user specifies "-" or doesn't give a filename, then
+	 * dump to stdout.  Make sure to document the dangers of files
+	 * that are NFS, directing your output to strange places, etc.
+	 */
+	if (filename == NULL || strcmp("-", filename) == 0)
+		return (stdout);
+
+	/*
+	 * open the file with the create bits set.  Since I check for
+	 * real UID == root in main(), I won't worry about the ownership
+	 * problem.
+	 */
+	fd = open(filename, O_WRONLY | O_EXCL | O_CREAT | O_TRUNC, S_IRUSR);
+	if (fd == -1) {
+		if (errno != EEXIST)
+			bail_msg("%s %s: %s", filename, gettext("open error"),
+			    strerror(errno));
+		fd = open(filename, O_WRONLY | O_TRUNC, 0);
+		if (fd == -1)
+			bail_msg("%s %s: %s", filename, gettext("open error"),
+			    strerror(errno));
+		if (fstat(fd, &buf) == -1) {
+			(void) close(fd);
+			bail_msg("%s fstat: %s", filename, strerror(errno));
+		}
+		if (S_ISREG(buf.st_mode) &&
+		    ((buf.st_mode & S_IAMB) != S_IRUSR)) {
+			warnx(gettext("WARNING: Save file already exists with "
+				"permission %o."), buf.st_mode & S_IAMB);
+			warnx(gettext("Normal users may be able to read IPsec "
+				"keying material."));
+		}
+	}
+
+	/* Okay, we have an FD.  Assign it to a stdio FILE pointer. */
+	retval = fdopen(fd, "w");
+	if (retval == NULL) {
+		(void) close(fd);
+		bail_msg("%s %s: %s", filename, gettext("fdopen error"),
+		    strerror(errno));
+	}
+	return (retval);
+}
+
+const char *
+do_inet_ntop(const void *addr, char *cp, size_t size)
+{
+	boolean_t isv4;
+	struct in6_addr *inaddr6 = (struct in6_addr *)addr;
+	struct in_addr inaddr;
+
+	if ((isv4 = IN6_IS_ADDR_V4MAPPED(inaddr6)) == B_TRUE) {
+		IN6_V4MAPPED_TO_INADDR(inaddr6, &inaddr);
+	}
+
+	return (inet_ntop(isv4 ? AF_INET : AF_INET6,
+	    isv4 ? (void *)&inaddr : inaddr6, cp, size));
+}
+
+char numprint[NBUF_SIZE];
+
+/*
+ * Parse and reverse parse a specific SA type (AH, ESP, etc.).
+ */
+static struct typetable {
+	char *type;
+	int token;
+} type_table[] = {
+	{"all", SADB_SATYPE_UNSPEC},
+	{"ah",  SADB_SATYPE_AH},
+	{"esp", SADB_SATYPE_ESP},
+	/* PF_KEY NOTE:  More to come if net/pfkeyv2.h gets updated. */
+	{NULL, 0}	/* Token value is irrelevant for this entry. */
+};
+
+char *
+rparsesatype(int type)
+{
+	struct typetable *tt = type_table;
+
+	while (tt->type != NULL && type != tt->token)
+		tt++;
+
+	if (tt->type == NULL) {
+		(void) snprintf(numprint, NBUF_SIZE, "%d", type);
+	} else {
+		return (tt->type);
+	}
+
+	return (numprint);
+}
+
+
+/*
+ * Return a string containing the name of the specified numerical algorithm
+ * identifier.
+ */
+char *
+rparsealg(uint8_t alg, int proto_num)
+{
+	static struct ipsecalgent *holder = NULL; /* we're single-threaded */
+
+	if (holder != NULL)
+		freeipsecalgent(holder);
+
+	holder = getipsecalgbynum(alg, proto_num, NULL);
+	if (holder == NULL) {
+		(void) snprintf(numprint, NBUF_SIZE, "%d", alg);
+		return (numprint);
+	}
+
+	return (*(holder->a_names));
+}
+
+/*
+ * Parse and reverse parse out a source/destination ID type.
+ */
+static struct idtypes {
+	char *idtype;
+	uint8_t retval;
+} idtypes[] = {
+	{"prefix",	SADB_IDENTTYPE_PREFIX},
+	{"fqdn",	SADB_IDENTTYPE_FQDN},
+	{"domain",	SADB_IDENTTYPE_FQDN},
+	{"domainname",	SADB_IDENTTYPE_FQDN},
+	{"user_fqdn",	SADB_IDENTTYPE_USER_FQDN},
+	{"mailbox",	SADB_IDENTTYPE_USER_FQDN},
+	{"der_dn",	SADB_X_IDENTTYPE_DN},
+	{"der_gn",	SADB_X_IDENTTYPE_GN},
+	{NULL,		0}
+};
+
+char *
+rparseidtype(uint16_t type)
+{
+	struct idtypes *idp;
+
+	for (idp = idtypes; idp->idtype != NULL; idp++) {
+		if (type == idp->retval)
+			return (idp->idtype);
+	}
+
+	(void) snprintf(numprint, NBUF_SIZE, "%d", type);
+	return (numprint);
+}
--- a/usr/src/lib/libipsecutil/common/ipsec_util.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/lib/libipsecutil/common/ipsec_util.h	Fri Nov 03 07:10:24 2006 -0800
@@ -39,6 +39,9 @@
 
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <net/pfkeyv2.h>
+#include <netinet/in.h>
+#include <inet/ip.h>
 #include <setjmp.h>
 #include <stdio.h>
 #include <err.h>
@@ -109,7 +112,7 @@
  * take a FILE pointer.
  */
 
-extern int dump_sockaddr(struct sockaddr *, boolean_t, FILE *);
+extern int dump_sockaddr(struct sockaddr *, uint8_t, boolean_t, FILE *);
 
 extern int dump_key(uint8_t *, uint_t, FILE *);
 
@@ -172,6 +175,7 @@
 extern boolean_t interactive;
 extern boolean_t readfile;
 extern uint_t lineno;
+extern char numprint[NBUF_SIZE];
 
 /* For error recovery in interactive or read-file mode. */
 extern jmp_buf env;
@@ -283,6 +287,37 @@
 
 /* PF_KEY (keysock) support functions */
 extern const char *keysock_diag(int);
+extern int in_masktoprefix(uint8_t *, boolean_t);
+
+/* SA support functions */
+
+extern void print_diagnostic(FILE *, uint16_t);
+extern void print_sadb_msg(struct sadb_msg *, time_t, boolean_t);
+extern void print_sa(char *, struct sadb_sa *);
+extern void printsatime(int64_t, const char *, const char *, const char *,
+    boolean_t);
+extern void print_lifetimes(time_t, struct sadb_lifetime *,
+    struct sadb_lifetime *, struct sadb_lifetime *, boolean_t vflag);
+extern void print_address(char *, struct sadb_address *);
+extern void print_key(char *, struct sadb_key *);
+extern void print_ident(char *, struct sadb_ident *);
+extern void print_sens(char *, struct sadb_sens *);
+extern void print_prop(char *, struct sadb_prop *);
+extern void print_eprop(char *, struct sadb_prop *);
+extern void print_supp(char *, struct sadb_supported *);
+extern void print_spirange(char *, struct sadb_spirange *);
+extern void print_kmc(char *, struct sadb_x_kmc *);
+extern void print_samsg(uint64_t *, boolean_t, boolean_t);
+extern char *rparsesatype(int);
+extern char *rparsealg(uint8_t, int);
+extern char *rparseidtype(uint16_t);
+extern boolean_t save_lifetime(struct sadb_lifetime *, FILE *);
+extern boolean_t save_address(struct sadb_address *, FILE *);
+extern boolean_t save_key(struct sadb_key *, FILE *);
+extern boolean_t save_ident(struct sadb_ident *, FILE *);
+extern void save_assoc(uint64_t *, FILE *);
+extern FILE *opensavefile(char *);
+extern const char *do_inet_ntop(const void *, char *, size_t);
 
 #ifdef __cplusplus
 }
--- a/usr/src/lib/libipsecutil/common/mapfile-vers	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/lib/libipsecutil/common/mapfile-vers	Fri Nov 03 07:10:24 2006 -0800
@@ -36,6 +36,7 @@
 	delipsecalgbynum;
 	delipsecprotobyname;
 	delipsecprotobynum;
+	do_inet_ntop;
 	do_interactive;
 	dump_aalg;
 	dump_ealg;
@@ -47,6 +48,7 @@
 	errx;
 	getipsecalgs;
 	getipsecprotos;
+	in_masktoprefix;
 	interactive;
 	ipsecalgs_diag;
 	ipsecproto_get_exec_mode;
@@ -57,10 +59,34 @@
 	lineno;
 	list_ints;
 	nflag;
+	opensavefile;
 	parsedbgopts;
 	pflag;
+	print_address;
+	print_diagnostic;
+	print_eprop;
+	print_ident;
+	print_key;
+	print_kmc;
+	print_lifetimes;
+	print_prop;
+	print_sa;
+	print_sadb_msg;
+	print_samsg;
+	print_sens;
+	print_spirange;
+	print_supp;
+	printsatime;
 	privstr2num;
 	readfile;
+	rparsealg;
+	rparseidtype;
+	rparsesatype;
+	save_address;
+	save_assoc;
+	save_ident;
+	save_key;
+	save_lifetime;
 	spdsock_diag;
 	spdsock_get_ext;
 	verr;
--- a/usr/src/uts/common/inet/ip.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip.h	Fri Nov 03 07:10:24 2006 -0800
@@ -2638,10 +2638,12 @@
 	uint_t		ipp_rthdrlen;
 	uint_t		ipp_dstoptslen;
 	uint_t		ipp_pathmtulen;
+	uint_t		ipp_fraghdrlen;
 	ip6_hbh_t	*ipp_hopopts;
 	ip6_dest_t	*ipp_rtdstopts;
 	ip6_rthdr_t	*ipp_rthdr;
 	ip6_dest_t	*ipp_dstopts;
+	ip6_frag_t	*ipp_fraghdr;
 	struct ip6_mtuinfo *ipp_pathmtu;
 	in6_addr_t	ipp_nexthop;		/* Transmit only */
 	uint8_t		ipp_tclass;
@@ -2690,7 +2692,9 @@
 #define	IPPF_DONTFRAG	0x2000
 #define	IPPF_USE_MIN_MTU	0x04000
 #define	IPPF_MULTICAST_HOPS	0x08000
+
 #define	IPPF_UNICAST_HOPS	0x10000
+#define	IPPF_FRAGHDR		0x20000
 
 #define	IPPF_HAS_IP6I \
 	(IPPF_IFINDEX|IPPF_ADDR|IPPF_NEXTHOP|IPPF_SCOPE_ID| \
--- a/usr/src/uts/common/inet/ip/ip.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/ip.c	Fri Nov 03 07:10:24 2006 -0800
@@ -6235,21 +6235,21 @@
 		 */
 		if (protocol == IPPROTO_ENCAP && ip_g_mrouter) {
 			/*
-			 * XXX If an IPsec mblk is here on a multicast
-			 * tunnel (using ip_mroute stuff), what should
-			 * I do?
-			 *
-			 * For now, just free the IPsec mblk before
-			 * passing it up to the multicast routing
-			 * stuff.
+			 * If an IPsec mblk is here on a multicast
+			 * tunnel (using ip_mroute stuff), check policy here,
+			 * THEN ship off to ip_mroute_decap().
 			 *
 			 * BTW,  If I match a configured IP-in-IP
-			 * tunnel, ip_mroute_decap will never be
-			 * called.
-			 */
-			if (mp != first_mp)
-				freeb(first_mp);
-			ip_mroute_decap(q, mp);
+			 * tunnel, this path will not be reached, and
+			 * ip_mroute_decap will never be called.
+			 */
+			first_mp = ipsec_check_global_policy(first_mp, connp,
+			    ipha, NULL, mctl_present);
+			if (first_mp != NULL) {
+				if (mctl_present)
+					freeb(first_mp);
+				ip_mroute_decap(q, mp);
+			} /* Else we already freed everything! */
 		} else {
 			/*
 			 * Otherwise send an ICMP protocol unreachable.
@@ -6308,7 +6308,12 @@
 
 			freemsg(first_mp1);
 		} else {
-			if (CONN_INBOUND_POLICY_PRESENT(connp) || secure) {
+			/*
+			 * Don't enforce here if we're an actual tunnel -
+			 * let "tun" do it instead.
+			 */
+			if (!IPCL_IS_IPTUN(connp) &&
+			    (CONN_INBOUND_POLICY_PRESENT(connp) || secure)) {
 				first_mp1 = ipsec_check_inbound_policy
 				    (first_mp1, connp, ipha, NULL,
 				    mctl_present);
@@ -6378,10 +6383,24 @@
 
 		freemsg(first_mp);
 	} else {
-		if (CONN_INBOUND_POLICY_PRESENT(connp) || secure) {
+		if (IPCL_IS_IPTUN(connp)) {
+			/*
+			 * Tunneled packet.  We enforce policy in the tunnel
+			 * module itself.
+			 *
+			 * Send the WHOLE packet up (incl. IPSEC_IN) without
+			 * a policy check.
+			 */
+			putnext(rq, first_mp);
+			CONN_DEC_REF(connp);
+			return;
+		}
+
+		if ((CONN_INBOUND_POLICY_PRESENT(connp) || secure)) {
 			first_mp = ipsec_check_inbound_policy(first_mp, connp,
 			    ipha, NULL, mctl_present);
 		}
+
 		if (first_mp != NULL) {
 			/*
 			 * ip_fanout_proto also gets called
@@ -9999,11 +10018,9 @@
 	/*
 	 * If we have already cached policies in ip_bind_connected*(), don't
 	 * let them change now. We cache policies for connections
-	 * whose src,dst [addr, port] is known.  The exception to this is
-	 * tunnels.  Tunnels are allowed to change policies after having
-	 * become fully bound.
-	 */
-	if (connp->conn_policy_cached && !IPCL_IS_IPTUN(connp)) {
+	 * whose src,dst [addr, port] is known.
+	 */
+	if (connp->conn_policy_cached) {
 		mutex_exit(&connp->conn_lock);
 		return (EINVAL);
 	}
@@ -10040,11 +10057,11 @@
 	bzero(&sel, sizeof (sel));
 	sel.ipsl_valid = IPSL_IPV4;
 
-	pin4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET);
+	pin4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, NULL);
 	if (pin4 == NULL)
 		goto enomem;
 
-	pout4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET);
+	pout4 = ipsec_policy_create(&sel, actp, nact, IPSEC_PRIO_SOCKET, NULL);
 	if (pout4 == NULL)
 		goto enomem;
 
@@ -10055,12 +10072,12 @@
 		 */
 		sel.ipsl_valid = IPSL_IPV6;
 		pin6 = ipsec_policy_create(&sel, actp, nact,
-		    IPSEC_PRIO_SOCKET);
+		    IPSEC_PRIO_SOCKET, NULL);
 		if (pin6 == NULL)
 			goto enomem;
 
 		pout6 = ipsec_policy_create(&sel, actp, nact,
-		    IPSEC_PRIO_SOCKET);
+		    IPSEC_PRIO_SOCKET, NULL);
 		if (pout6 == NULL)
 			goto enomem;
 
@@ -10109,13 +10126,6 @@
 		connp->conn_flags |= IPCL_CHECK_POLICY;
 	}
 
-	/*
-	 * Tunnels are allowed to set policy after having been fully bound.
-	 * If that's the case, cache policy here.
-	 */
-	if (IPCL_IS_IPTUN(connp) && connp->conn_fully_bound)
-		error = ipsec_conn_cache_policy(connp, !connp->conn_af_isv6);
-
 	mutex_exit(&connp->conn_lock);
 	return (error);
 #undef REQ_MASK
@@ -17241,7 +17251,8 @@
 				return;
 			}
 			ii->ipsec_in_decaps = B_TRUE;
-			ip_proto_input(q, first_mp, ipha, ire, recv_ill);
+			ip_fanout_proto_again(first_mp, recv_ill, recv_ill,
+			    ire);
 			return;
 		}
 		break;
@@ -21228,7 +21239,7 @@
 		 * ip_wput[_v6] attaches an IPSEC_OUT in two cases.
 		 *
 		 * 1) There is per-socket policy (including cached global
-		 *    policy).
+		 *    policy) or a policy on the IP-in-IP tunnel.
 		 * 2) There is no per-socket policy, but it is
 		 *    a multicast packet that needs to go out
 		 *    on a specific interface. This is the case
@@ -24194,6 +24205,10 @@
 			first_mp = mp;
 			mctl_present = B_FALSE;
 		} else {
+			/*
+			 * Convert IPSEC_OUT to IPSEC_IN, preserving all
+			 * security properties for the looped-back packet.
+			 */
 			mctl_present = B_TRUE;
 			mp = first_mp->b_cont;
 			ASSERT(mp != NULL);
@@ -25914,7 +25929,7 @@
 		ipsec_mp->b_cont = outer_mp;
 
 		io->ipsec_out_se_done = B_TRUE;
-		io->ipsec_out_encaps = B_TRUE;
+		io->ipsec_out_tunnel = B_TRUE;
 	}
 
 	if (((ap->ipa_want_ah && (io->ipsec_out_ah_sa == NULL)) ||
@@ -26576,6 +26591,14 @@
 		if (mp->b_wptr - mp->b_rptr < sizeof (uint32_t))
 			break;
 
+		if (((ipsec_info_t *)mp->b_rptr)->ipsec_info_type ==
+		    TUN_HELLO) {
+			ASSERT(connp != NULL);
+			connp->conn_flags |= IPCL_IPTUN;
+			freeb(mp);
+			return;
+		}
+
 		if (connp != NULL && *(uint32_t *)mp->b_rptr ==
 		    IP_ULP_OUT_LABELED) {
 			out_labeled_t *olp;
--- a/usr/src/uts/common/inet/ip/ip6.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/ip6.c	Fri Nov 03 07:10:24 2006 -0800
@@ -3337,7 +3337,12 @@
 
 			freemsg(mp1);
 		} else {
-			if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) {
+			/*
+			 * Don't enforce here if we're a tunnel - let "tun" do
+			 * it instead.
+			 */
+			if (!IPCL_IS_IPTUN(connp) &&
+			    (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure)) {
 				first_mp1 = ipsec_check_inbound_policy
 				    (first_mp1, connp, NULL, ip6h,
 				    mctl_present);
@@ -3406,7 +3411,24 @@
 
 		freemsg(first_mp);
 	} else {
-		if (CONN_INBOUND_POLICY_PRESENT_V6(connp) || secure) {
+		if (IPCL_IS_IPTUN(connp)) {
+			/*
+			 * Tunneled packet.  We enforce policy in the tunnel
+			 * module itself.
+			 *
+			 * Send the WHOLE packet up (incl. IPSEC_IN) without
+			 * a policy check.
+			 */
+			putnext(rq, first_mp);
+			CONN_DEC_REF(connp);
+			return;
+		}
+		/*
+		 * Don't enforce here if we're a tunnel - let "tun" do
+		 * it instead.
+		 */
+		if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 &&
+		    (CONN_INBOUND_POLICY_PRESENT(connp) || secure)) {
 			first_mp = ipsec_check_inbound_policy(first_mp, connp,
 			    NULL, ip6h, mctl_present);
 			if (first_mp == NULL) {
@@ -4127,18 +4149,16 @@
 			}
 			break;
 		case IPPROTO_FRAGMENT:
-			/*
-			 * Fragment headers are skipped.  Currently, only
-			 * IP cares for their existence.  If anyone other
-			 * than IP ever has the need to know about the
-			 * location of fragment headers, support can be
-			 * added to the ip6_pkt_t at that time.
-			 */
 			tmpfraghdr = (ip6_frag_t *)whereptr;
 			ehdrlen = sizeof (ip6_frag_t);
 			if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
 				goto done;
 			nexthdr = tmpfraghdr->ip6f_nxt;
+			if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
+				ipp->ipp_fields |= IPPF_FRAGHDR;
+				ipp->ipp_fraghdr = tmpfraghdr;
+				ipp->ipp_fraghdrlen = ehdrlen;
+			}
 			break;
 		case IPPROTO_NONE:
 		default:
@@ -4194,6 +4214,7 @@
 	ip6_rthdr_t *rthdr;
 	ip6_frag_t *fraghdr;
 
+	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
 	length = IPV6_HDR_LEN;
 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
 	endptr = mp->b_wptr;
@@ -9264,7 +9285,7 @@
 #endif
 
 	/*
-	 * M_CTL comes from 5 places
+	 * M_CTL comes from 6 places
 	 *
 	 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections
 	 *    both V4 and V6 datagrams.
@@ -9280,6 +9301,8 @@
 	 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for
 	 *    IPsec hardware acceleration support.
 	 *
+	 * 6) TUN_HELLO.
+	 *
 	 * We need to handle (1)'s IPv6 case and (3) here.  For the
 	 * IPv4 case in (1), and (2), IPSEC processing has already
 	 * started. The code in ip_wput() already knows how to handle
--- a/usr/src/uts/common/inet/ip/ip_sadb.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/ip_sadb.c	Fri Nov 03 07:10:24 2006 -0800
@@ -28,6 +28,7 @@
 #include <sys/types.h>
 #include <sys/stream.h>
 #include <sys/sunddi.h>
+#include <sys/ddi.h>
 #include <sys/strlog.h>
 
 #include <inet/common.h>
@@ -100,9 +101,9 @@
 
 /*
  * Look up a security association based on the unique ID generated by IP and
- * transport information, such as ports and upper-layer protocol, and the
- * address(es).	 Used for uniqueness testing and outbound packets.  The
- * source address may be ignored.
+ * transport or tunnel information, such as ports and upper-layer protocol,
+ * and the inner and outer address(es).	 Used for uniqueness testing and
+ * outbound packets.  The outer source address may be ignored.
  *
  * I expect an SA hash bucket, and that its per-bucket mutex is held.
  * The SA ptr I return will have its reference count incremented by one.
@@ -114,6 +115,7 @@
 	ipsa_t *retval, *candidate;
 	ipsec_action_t *candact;
 	boolean_t need_unique;
+	boolean_t tunnel_mode = io->ipsec_out_tunnel;
 	uint64_t unique_id;
 	uint32_t old_flags, excludeflags;
 	ipsec_policy_t *pp = io->ipsec_out_policy;
@@ -121,10 +123,22 @@
 	ipsec_action_t *act;
 	ipsec_latch_t *ipl = io->ipsec_out_latch;
 	ipsa_ref_t *ipr = NULL;
+	sa_family_t inaf = io->ipsec_out_inaf;
+	uint32_t *insrc = io->ipsec_out_insrc;
+	uint32_t *indst = io->ipsec_out_indst;
+	uint8_t insrcpfx = io->ipsec_out_insrcpfx;
+	uint8_t indstpfx = io->ipsec_out_indstpfx;
 
 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
 
 	/*
+	 * Caller must set ipsec_out_t structure such that we know
+	 * whether this is tunnel mode or transport mode based on
+	 * io->ipsec_out_tunnel.  If this flag is set, we assume that
+	 * there are valid inner src and destination addresses to compare.
+	 */
+
+	/*
 	 * Fast path: do we have a latch structure, is it for this bucket,
 	 * and does the generation number match?  If so, refhold and return.
 	 */
@@ -175,6 +189,8 @@
 	 * - destination
 	 * - source
 	 * - algorithms
+	 * - inner dst
+	 * - inner src
 	 * - <MORE TBD>
 	 *
 	 * Make sure that wildcard sources are inserted at the end of the hash
@@ -203,12 +219,48 @@
 		mutex_enter(&retval->ipsa_lock);
 
 		/* My apologies for the use of goto instead of continue. */
+
+		/* Outer destination address */
 		if (!IPSA_ARE_ADDR_EQUAL(dst, retval->ipsa_dstaddr, af))
 			goto next_ipsa;	/* Destination mismatch. */
+
+		/* Outer source address */
 		if (!IPSA_ARE_ADDR_EQUAL(src, retval->ipsa_srcaddr, af) &&
 		    !IPSA_IS_ADDR_UNSPEC(retval->ipsa_srcaddr, af))
 			goto next_ipsa;	/* Specific source and not matched. */
 
+		if (tunnel_mode) {
+			/* Check tunnel mode */
+			if (!(retval->ipsa_flags & IPSA_F_TUNNEL))
+				goto next_ipsa; /* Not tunnel mode SA */
+
+			/* Inner destination address */
+			if (!IPSA_IS_ADDR_UNSPEC(retval->ipsa_innerdst, inaf)) {
+				if (!ip_addr_match((uint8_t *)indst,
+				    min(indstpfx, retval->ipsa_innerdstpfx),
+				    (in6_addr_t *)retval->ipsa_innerdst))
+					goto next_ipsa; /* not matched. */
+			}
+
+			/* Inner source address */
+			if (!IPSA_IS_ADDR_UNSPEC(retval->ipsa_innersrc, inaf)) {
+				if (!ip_addr_match((uint8_t *)insrc,
+				    min(insrcpfx, retval->ipsa_innersrcpfx),
+				    (in6_addr_t *)retval->ipsa_innersrc))
+					goto next_ipsa; /* not matched. */
+			}
+		} else {
+			/* Check transport mode */
+			if (retval->ipsa_flags & IPSA_F_TUNNEL)
+				goto next_ipsa; /* Not transport mode SA */
+
+			/*
+			 * TODO - If we ever do RFC 3884's dream of transport-
+			 * mode SAs with inner IP address selectors, we need
+			 * to put some code here.
+			 */
+		}
+
 		/*
 		 * XXX should be able to use cached/latched action
 		 * to dodge this loop
@@ -282,6 +334,8 @@
 		 *
 		 * - dest
 		 * - source (if source is specified, i.e. non-zeroes)
+		 * - inner dest (if specified)
+		 * - inner source (if specified)
 		 * - auth alg (if auth alg is specified, i.e. non-zero)
 		 * - encrypt. alg (if encrypt. alg is specified, i.e. non-zero)
 		 * and we know that the SA keylengths are appropriate.
@@ -412,7 +466,7 @@
 			retval->ipsa_unique_id = unique_id;
 			retval->ipsa_unique_mask = SA_UNIQUE_MASK(
 			    io->ipsec_out_src_port, io->ipsec_out_dst_port,
-			    protocol);
+			    protocol, 0);
 		}
 
 		/*
--- a/usr/src/uts/common/inet/ip/ipdrop.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/ipdrop.c	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -114,6 +113,18 @@
 	    "spd_ah_innermismatch", KSTAT_DATA_UINT64);
 	kstat_named_init(&ipdrops_spd_esp_innermismatch,
 	    "spd_esp_innermismatch", KSTAT_DATA_UINT64);
+	kstat_named_init(&ipdrops_spd_no_policy, "spd_no_policy",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&ipdrops_spd_malformed_packet, "spd_malformed_packet",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&ipdrops_spd_malformed_frag, "spd_malformed_frag",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&ipdrops_spd_overlap_frag, "spd_overlap_frag",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&ipdrops_spd_evil_frag, "spd_evil_frag",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&ipdrops_spd_max_frags, "spd_max_frags",
+	    KSTAT_DATA_UINT64);
 
 	/* ESP-specific drop statistics. */
 
@@ -281,5 +292,10 @@
 	/* If I haven't queued the packet or some such nonsense, free it. */
 	if (ipsec_mp != NULL)
 		freeb(ipsec_mp);
+	/*
+	 * ASSERT this isn't a b_next linked mblk chain where a
+	 * chained dropper should be used instead
+	 */
+	ASSERT(mp->b_prev == NULL && mp->b_next == NULL);
 	freemsg(mp);
 }
--- a/usr/src/uts/common/inet/ip/ipsecah.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/ipsecah.c	Fri Nov 03 07:10:24 2006 -0800
@@ -576,69 +576,8 @@
 static void
 ipsecah_rput(queue_t *q, mblk_t *mp)
 {
-	keysock_in_t *ksi;
-	int *addrtype;
-	ire_t *ire;
-	mblk_t *ire_mp, *last_mp;
-
+	ASSERT(mp->b_datap->db_type != M_CTL);	/* No more IRE_DB_REQ. */
 	switch (mp->b_datap->db_type) {
-	case M_CTL:
-		/*
-		 * IPsec request of some variety from IP.  IPSEC_{IN,OUT}
-		 * are the common cases, but even ICMP error messages from IP
-		 * may rise up here.
-		 *
-		 * Ummmm, actually, this can also be the reflected KEYSOCK_IN
-		 * message, with an IRE_DB_TYPE hung off at the end.
-		 */
-		switch (((ipsec_info_t *)(mp->b_rptr))->ipsec_info_type) {
-		case KEYSOCK_IN:
-			last_mp = mp;
-			while (last_mp->b_cont != NULL &&
-			    last_mp->b_cont->b_datap->db_type != IRE_DB_TYPE)
-				last_mp = last_mp->b_cont;
-
-			if (last_mp->b_cont == NULL) {
-				freemsg(mp);
-				break;	/* Out of switch. */
-			}
-
-			ire_mp = last_mp->b_cont;
-			last_mp->b_cont = NULL;
-
-			ksi = (keysock_in_t *)mp->b_rptr;
-
-			if (ksi->ks_in_srctype == KS_IN_ADDR_UNKNOWN)
-				addrtype = &ksi->ks_in_srctype;
-			else if (ksi->ks_in_dsttype == KS_IN_ADDR_UNKNOWN)
-				addrtype = &ksi->ks_in_dsttype;
-			else if (ksi->ks_in_proxytype == KS_IN_ADDR_UNKNOWN)
-				addrtype = &ksi->ks_in_proxytype;
-
-			ire = (ire_t *)ire_mp->b_rptr;
-
-			*addrtype = sadb_addrset(ire);
-
-			freemsg(ire_mp);
-			if (ah_pfkey_q != NULL) {
-				/*
-				 * Decrement counter to make up for
-				 * auto-increment in ipsecah_wput().
-				 * I'm running all MT-hot through here, so
-				 * don't worry about perimeters and lateral
-				 * puts.
-				 */
-				AH_DEBUMP_STAT(keysock_in);
-				ipsecah_wput(WR(ah_pfkey_q), mp);
-			} else {
-				freemsg(mp);
-			}
-			break;
-		default:
-			freemsg(mp);
-			break;
-		}
-		break;
 	case M_PROTO:
 	case M_PCPROTO:
 		/* TPI message of some sort. */
@@ -840,7 +779,8 @@
  * send back a reply ADD message.
  */
 static int
-ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi)
+ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
+    int *diagnostic)
 {
 	isaf_t *primary, *secondary, *inbound, *outbound;
 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
@@ -913,7 +853,7 @@
 			clone = B_TRUE;
 		break;
 	default:
-		samsg->sadb_x_msg_diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
+		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
 		return (EINVAL);
 	}
 
@@ -982,7 +922,7 @@
 		lpkt = sadb_clear_lpkt(larval);
 
 	rc = sadb_common_add(ah_sadb.s_ip_q, ah_pfkey_q, mp, samsg, ksi,
-	    primary, secondary, larval, clone, is_inbound);
+	    primary, secondary, larval, clone, is_inbound, diagnostic);
 
 	/*
 	 * How much more stack will I create with all of these
@@ -1045,6 +985,10 @@
 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
 	sadb_address_t *dstext =
 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
+	sadb_address_t *isrcext =
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
+	sadb_address_t *idstext =
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
 	sadb_key_t *key = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
 	struct sockaddr_in *src, *dst;
 	/* We don't need sockaddr_in6 for now. */
@@ -1063,6 +1007,14 @@
 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
 		return (EINVAL);
 	}
+	if (isrcext == NULL && idstext != NULL) {
+		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
+		return (EINVAL);
+	}
+	if (isrcext != NULL && idstext == NULL) {
+		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
+		return (EINVAL);
+	}
 	if (assoc == NULL) {
 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
 		return (EINVAL);
@@ -1086,7 +1038,8 @@
 		*diagnostic = SADB_X_DIAGNOSTIC_ENCR_NOTSUPP;
 		return (EINVAL);
 	}
-	if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY)) {
+	if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY |
+		SADB_X_SAFLAGS_TUNNEL)) {
 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
 		return (EINVAL);
 	}
@@ -1094,10 +1047,7 @@
 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0)
 		return (EINVAL);
 
-	if (src->sin_family != dst->sin_family) {
-		*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
-		return (EINVAL);
-	}
+	ASSERT(src->sin_family == dst->sin_family);
 
 	/* Stuff I don't support, for now.  XXX Diagnostic? */
 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL ||
@@ -1137,7 +1087,8 @@
 
 	mutex_exit(&alg_lock);
 
-	return (ah_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi));
+	return (ah_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
+		    diagnostic));
 }
 
 /*
@@ -1185,10 +1136,9 @@
 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
 			return (EINVAL);
 		}
-		return sadb_purge_sa(mp, ksi,
+		return (sadb_purge_sa(mp, ksi,
 		    (sin->sin_family == AF_INET6) ? &ah_sadb.s_v6 :
-		    &ah_sadb.s_v4,
-		    diagnostic, ah_pfkey_q, ah_sadb.s_ip_q);
+		    &ah_sadb.s_v4, ah_pfkey_q, ah_sadb.s_ip_q));
 	}
 
 	return (sadb_del_sa(mp, ksi, &ah_sadb, diagnostic, ah_pfkey_q));
@@ -1222,6 +1172,39 @@
 }
 
 /*
+ * First-cut reality check for an inbound PF_KEY message.
+ */
+static boolean_t
+ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi)
+{
+	int diagnostic;
+
+	if (mp->b_cont == NULL) {
+		freemsg(mp);
+		return (B_TRUE);
+	}
+
+	if (ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
+		diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
+		goto badmsg;
+	}
+	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
+		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
+		goto badmsg;
+	}
+	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
+	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
+		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
+		goto badmsg;
+	}
+	return (B_FALSE);	/* False ==> no failures */
+
+badmsg:
+	sadb_pfkey_error(ah_pfkey_q, mp, EINVAL, diagnostic, ksi->ks_in_serial);
+	return (B_TRUE);	/* True ==> failures */
+}
+
+/*
  * AH parsing of PF_KEY messages.  Keysock did most of the really silly
  * error cases.  What I receive is a fully-formed, syntactically legal
  * PF_KEY message.  I then need to check semantics...
@@ -1249,7 +1232,10 @@
 	 * If applicable, convert unspecified AF_INET6 to unspecified
 	 * AF_INET.
 	 */
-	sadb_srcaddrfix(ksi);
+	if (!sadb_addrfix(ksi, ah_pfkey_q, mp) ||
+	    ah_pfkey_reality_failures(mp, ksi)) {
+		return;
+	}
 
 	switch (samsg->sadb_msg_type) {
 	case SADB_ADD:
@@ -1381,56 +1367,12 @@
 }
 
 /*
- * First-cut reality check for an inbound PF_KEY message.
- */
-static boolean_t
-ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi)
-{
-	int diagnostic;
-
-	if (mp->b_cont == NULL) {
-		freemsg(mp);
-		return (B_TRUE);
-	}
-
-	if (ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
-		diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
-		goto badmsg;
-	}
-	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
-		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
-		goto badmsg;
-	}
-	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
-	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
-		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
-		goto badmsg;
-	}
-	if (ksi->ks_in_srctype == KS_IN_ADDR_MBCAST) {
-		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
-		goto badmsg;
-	}
-	if (ksi->ks_in_dsttype == KS_IN_ADDR_UNSPEC) {
-		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
-		goto badmsg;
-	}
-
-	return (B_FALSE);	/* False ==> no failures */
-
-badmsg:
-	sadb_pfkey_error(ah_pfkey_q, mp, EINVAL, diagnostic, ksi->ks_in_serial);
-	return (B_TRUE);	/* True ==> failures */
-}
-
-/*
  * AH module write put routine.
  */
 static void
 ipsecah_wput(queue_t *q, mblk_t *mp)
 {
 	ipsec_info_t *ii;
-	keysock_in_t *ksi;
-	int rc;
 	struct iocblk *iocp;
 
 	ah3dbg(("In ah_wput().\n"));
@@ -1453,51 +1395,8 @@
 		case KEYSOCK_IN:
 			AH_BUMP_STAT(keysock_in);
 			ah3dbg(("Got KEYSOCK_IN message.\n"));
-			ksi = (keysock_in_t *)ii;
-			/*
-			 * Some common reality checks.
-			 */
-
-			if (ah_pfkey_reality_failures(mp, ksi))
-				return;
-
-			/*
-			 * Use 'q' instead of ah_sadb.s_ip_q, since
-			 * it's the write side already, and it'll go
-			 * down to IP.  Use ah_pfkey_q because we
-			 * wouldn't get here if that weren't set, and
-			 * the RD(q) has been done already.
-			 */
-			if (ksi->ks_in_srctype == KS_IN_ADDR_UNKNOWN) {
-				rc = sadb_addrcheck(q, ah_pfkey_q, mp,
-				    ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
-				    ksi->ks_in_serial);
-				if (rc == KS_IN_ADDR_UNKNOWN)
-					return;
-				else
-					ksi->ks_in_srctype = rc;
-			}
-			if (ksi->ks_in_dsttype == KS_IN_ADDR_UNKNOWN) {
-				rc = sadb_addrcheck(q, ah_pfkey_q, mp,
-				    ksi->ks_in_extv[SADB_EXT_ADDRESS_DST],
-				    ksi->ks_in_serial);
-				if (rc == KS_IN_ADDR_UNKNOWN)
-					return;
-				else
-					ksi->ks_in_dsttype = rc;
-			}
-			/*
-			 * XXX Proxy may be a different address family.
-			 */
-			if (ksi->ks_in_proxytype == KS_IN_ADDR_UNKNOWN) {
-				rc = sadb_addrcheck(q, ah_pfkey_q, mp,
-				    ksi->ks_in_extv[SADB_EXT_ADDRESS_PROXY],
-				    ksi->ks_in_serial);
-				if (rc == KS_IN_ADDR_UNKNOWN)
-					return;
-				else
-					ksi->ks_in_proxytype = rc;
-			}
+
+			/* Parse the message. */
 			ah_parse_pfkey(mp);
 			break;
 		case KEYSOCK_HELLO:
@@ -1831,83 +1730,26 @@
 static void
 ah_send_acquire(ipsacq_t *acqrec, mblk_t *extended)
 {
-	mblk_t *pfkeymp, *msgmp;
-	uint_t allocsize, combs;
+	uint_t combs;
 	sadb_msg_t *samsg;
 	sadb_prop_t *prop;
-	uint8_t *cur, *end;
+	mblk_t *pfkeymp, *msgmp;
 
 	AH_BUMP_STAT(acquire_requests);
 
-	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
-
-	pfkeymp = sadb_keysock_out(0);
-	if (pfkeymp == NULL) {
-		ah1dbg(("ah_send_acquire: 1st allocb() failed.\n"));
-		/* Just bail. */
-		goto done;
-	}
-
-	/*
-	 * First, allocate a basic ACQUIRE message.  Beyond that,
-	 * you need to extract certificate info from
-	 */
-	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
-	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
-
-	switch (acqrec->ipsacq_addrfam) {
-	case AF_INET:
-		allocsize += 2 * sizeof (struct sockaddr_in);
-		break;
-	case AF_INET6:
-		allocsize += 2 * sizeof (struct sockaddr_in6);
-		break;
-	}
-
-	mutex_enter(&alg_lock);
-
-	combs = ipsec_nalgs[IPSEC_ALG_AUTH];
-
-	allocsize += combs * sizeof (sadb_comb_t);
-
-	/*
-	 * XXX If there are:
-	 *	certificate IDs
-	 *	proxy address
-	 *	<Others>
-	 * add additional allocation size.
-	 */
-
-	msgmp = allocb(allocsize, BPRI_HI);
-	if (msgmp == NULL) {
-		ah0dbg(("ah_send_acquire: 2nd allocb() failed.\n"));
-		/* Just bail. */
-		freemsg(pfkeymp);
-		pfkeymp = NULL;
-		goto done;
-	}
-
-	cur = msgmp->b_rptr;
-	end = cur + allocsize;
-	samsg = (sadb_msg_t *)cur;
-	pfkeymp->b_cont = msgmp;
+	if (ah_pfkey_q == NULL)
+		return;
 
 	/* Set up ACQUIRE. */
-	cur = sadb_setup_acquire(cur, end, acqrec);
-	if (cur == NULL) {
+	pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_AH);
+	if (pfkeymp == NULL) {
 		ah0dbg(("sadb_setup_acquire failed.\n"));
-		/* Just bail. */
-		freemsg(pfkeymp);
-		pfkeymp = NULL;
-		goto done;
+		return;
 	}
-	samsg->sadb_msg_satype = SADB_SATYPE_AH;
-
-	/* XXX Insert proxy address information here. */
-
-	/* XXX Insert identity information here. */
-
-	/* XXXMLS Insert sensitivity information here. */
+	ASSERT(MUTEX_HELD(&alg_lock));
+	combs = ipsec_nalgs[IPSEC_ALG_AUTH];
+	msgmp = pfkeymp->b_cont;
+	samsg = (sadb_msg_t *)(msgmp->b_rptr);
 
 	/* Insert proposal here. */
 
@@ -1916,7 +1758,6 @@
 	samsg->sadb_msg_len += prop->sadb_prop_len;
 	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
 
-done:
 	mutex_exit(&alg_lock);
 
 	/*
@@ -1927,16 +1768,10 @@
 	 * Once I've sent the message, I'm cool anyway.
 	 */
 	mutex_exit(&acqrec->ipsacq_lock);
-	if (ah_pfkey_q != NULL && pfkeymp != NULL) {
-		if (extended != NULL) {
-			putnext(ah_pfkey_q, extended);
-		}
-		putnext(ah_pfkey_q, pfkeymp);
-		return;
+	if (extended != NULL) {
+		putnext(ah_pfkey_q, extended);
 	}
-	/* NOTE: freemsg() works for extended == NULL. */
-	freemsg(extended);
-	freemsg(pfkeymp);
+	putnext(ah_pfkey_q, pfkeymp);
 }
 
 /*
--- a/usr/src/uts/common/inet/ip/ipsecesp.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/ipsecesp.c	Fri Nov 03 07:10:24 2006 -0800
@@ -1257,83 +1257,26 @@
 static void
 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended)
 {
-	mblk_t *pfkeymp, *msgmp;
-	uint_t allocsize, combs;
+	uint_t combs;
 	sadb_msg_t *samsg;
 	sadb_prop_t *prop;
-	uint8_t *cur, *end;
+	mblk_t *pfkeymp, *msgmp;
 
 	ESP_BUMP_STAT(acquire_requests);
 
-	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
-
-	pfkeymp = sadb_keysock_out(0);
-	if (pfkeymp == NULL) {
-		esp0dbg(("esp_send_acquire: 1st allocb() failed.\n"));
-		/* Just bail. */
-		goto done;
-	}
-
-	/*
-	 * First, allocate a basic ACQUIRE message.  Beyond that,
-	 * you need to extract certificate info from
-	 */
-	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
-	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
-
-	switch (acqrec->ipsacq_addrfam) {
-	case AF_INET:
-		allocsize += 2 * sizeof (struct sockaddr_in);
-		break;
-	case AF_INET6:
-		allocsize += 2 * sizeof (struct sockaddr_in6);
-		break;
-	}
-
-	mutex_enter(&alg_lock);
-
-	combs = ipsec_nalgs[IPSEC_ALG_AUTH] * ipsec_nalgs[IPSEC_ALG_ENCR];
-
-	allocsize += combs * sizeof (sadb_comb_t);
-
-	/*
-	 * XXX If there are:
-	 *	certificate IDs
-	 *	proxy address
-	 *	<Others>
-	 * add additional allocation size.
-	 */
-
-	msgmp = allocb(allocsize, BPRI_HI);
-	if (msgmp == NULL) {
-		esp0dbg(("esp_send_acquire: 2nd allocb() failed.\n"));
-		/* Just bail. */
-		freemsg(pfkeymp);
-		pfkeymp = NULL;
-		goto done;
-	}
-
-	cur = msgmp->b_rptr;
-	end = cur + allocsize;
-	samsg = (sadb_msg_t *)cur;
-	pfkeymp->b_cont = msgmp;
+	if (esp_pfkey_q == NULL)
+		return;
 
 	/* Set up ACQUIRE. */
-	cur = sadb_setup_acquire(cur, end, acqrec);
-	if (cur == NULL) {
+	pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP);
+	if (pfkeymp == NULL) {
 		esp0dbg(("sadb_setup_acquire failed.\n"));
-		/* Just bail. */
-		freemsg(pfkeymp);
-		pfkeymp = NULL;
-		goto done;
+		return;
 	}
-	samsg->sadb_msg_satype = SADB_SATYPE_ESP;
-
-	/* XXX Insert proxy address information here. */
-
-	/* XXX Insert identity information here. */
-
-	/* XXXMLS Insert sensitivity information here. */
+	ASSERT(MUTEX_HELD(&alg_lock));
+	combs = ipsec_nalgs[IPSEC_ALG_AUTH] * ipsec_nalgs[IPSEC_ALG_ENCR];
+	msgmp = pfkeymp->b_cont;
+	samsg = (sadb_msg_t *)(msgmp->b_rptr);
 
 	/* Insert proposal here. */
 
@@ -1342,7 +1285,6 @@
 	samsg->sadb_msg_len += prop->sadb_prop_len;
 	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
 
-done:
 	mutex_exit(&alg_lock);
 
 	/*
@@ -1353,16 +1295,10 @@
 	 * Once I've sent the message, I'm cool anyway.
 	 */
 	mutex_exit(&acqrec->ipsacq_lock);
-	if (esp_pfkey_q != NULL && pfkeymp != NULL) {
-		if (extended != NULL) {
-			putnext(esp_pfkey_q, extended);
-		}
-		putnext(esp_pfkey_q, pfkeymp);
-		return;
+	if (extended != NULL) {
+		putnext(esp_pfkey_q, extended);
 	}
-	/* XXX freemsg() works for extended == NULL. */
-	freemsg(extended);
-	freemsg(pfkeymp);
+	putnext(esp_pfkey_q, pfkeymp);
 }
 
 /*
@@ -2468,69 +2404,8 @@
 static void
 ipsecesp_rput(queue_t *q, mblk_t *mp)
 {
-	keysock_in_t *ksi;
-	int *addrtype;
-	ire_t *ire;
-	mblk_t *ire_mp, *last_mp;
-
+	ASSERT(mp->b_datap->db_type != M_CTL);	/* No more IRE_DB_REQ. */
 	switch (mp->b_datap->db_type) {
-	case M_CTL:
-		/*
-		 * IPsec request of some variety from IP.  IPSEC_{IN,OUT}
-		 * are the common cases, but even ICMP error messages from IP
-		 * may rise up here.
-		 *
-		 * Ummmm, actually, this can also be the reflected KEYSOCK_IN
-		 * message, with an IRE_DB_TYPE hung off at the end.
-		 */
-		switch (((ipsec_info_t *)(mp->b_rptr))->ipsec_info_type) {
-		case KEYSOCK_IN:
-			last_mp = mp;
-			while (last_mp->b_cont != NULL &&
-			    last_mp->b_cont->b_datap->db_type != IRE_DB_TYPE)
-				last_mp = last_mp->b_cont;
-
-			if (last_mp->b_cont == NULL) {
-				freemsg(mp);
-				break;	/* Out of switch. */
-			}
-
-			ire_mp = last_mp->b_cont;
-			last_mp->b_cont = NULL;
-
-			ksi = (keysock_in_t *)mp->b_rptr;
-
-			if (ksi->ks_in_srctype == KS_IN_ADDR_UNKNOWN)
-				addrtype = &ksi->ks_in_srctype;
-			else if (ksi->ks_in_dsttype == KS_IN_ADDR_UNKNOWN)
-				addrtype = &ksi->ks_in_dsttype;
-			else if (ksi->ks_in_proxytype == KS_IN_ADDR_UNKNOWN)
-				addrtype = &ksi->ks_in_proxytype;
-
-			ire = (ire_t *)ire_mp->b_rptr;
-
-			*addrtype = sadb_addrset(ire);
-
-			freemsg(ire_mp);
-			if (esp_pfkey_q != NULL) {
-				/*
-				 * Decrement counter to make up for
-				 * auto-increment in ipsecesp_wput().
-				 * I'm running all MT-hot through here, so
-				 * don't worry about perimeters and lateral
-				 * puts.
-				 */
-				ESP_DEBUMP_STAT(keysock_in);
-				ipsecesp_wput(WR(esp_pfkey_q), mp);
-			} else {
-				freemsg(mp);
-			}
-			break;
-		default:
-			freemsg(mp);
-			break;
-		}
-		break;
 	case M_PROTO:
 	case M_PCPROTO:
 		/* TPI message of some sort. */
@@ -2796,7 +2671,8 @@
  * send back a reply ADD message.
  */
 static int
-esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi)
+esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
+    int *diagnostic)
 {
 	isaf_t *primary, *secondary, *inbound, *outbound;
 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
@@ -2868,7 +2744,7 @@
 			clone = B_TRUE;
 		break;
 	default:
-		samsg->sadb_x_msg_diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
+		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
 		return (EINVAL);
 	}
 
@@ -2934,7 +2810,7 @@
 		lpkt = sadb_clear_lpkt(larval);
 
 	rc = sadb_common_add(esp_sadb.s_ip_q, esp_pfkey_q, mp, samsg, ksi,
-	    primary, secondary, larval, clone, is_inbound);
+	    primary, secondary, larval, clone, is_inbound, diagnostic);
 
 	if (rc == 0 && lpkt != NULL) {
 		rc = !taskq_dispatch(esp_taskq, inbound_task,
@@ -2961,13 +2837,14 @@
 				((ipsec_out_t *)(mp->b_rptr))->
 				    ipsec_out_esp_done = B_TRUE;
 				if (esp_outbound(mp) == IPSEC_STATUS_SUCCESS) {
-					ipha_t *ipha = (ipha_t *)
-					    mp->b_cont->b_rptr;
+					ipha_t *ipha;
 
 					/* do AH processing if needed */
 					if (!esp_do_outbound_ah(mp))
 						continue;
 
+					ipha = (ipha_t *)mp->b_cont->b_rptr;
+
 					/* finish IPsec processing */
 					if (is_ipv4) {
 						ip_wput_ipsec_out(NULL, mp,
@@ -3001,6 +2878,10 @@
 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
 	sadb_address_t *dstext =
 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
+	sadb_address_t *isrcext =
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
+	sadb_address_t *idstext =
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
 	sadb_address_t *nttext_loc =
 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
 	sadb_address_t *nttext_rem =
@@ -3025,6 +2906,14 @@
 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
 		return (EINVAL);
 	}
+	if (isrcext == NULL && idstext != NULL) {
+		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
+		return (EINVAL);
+	}
+	if (isrcext != NULL && idstext == NULL) {
+		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
+		return (EINVAL);
+	}
 	if (assoc == NULL) {
 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
 		return (EINVAL);
@@ -3059,7 +2948,8 @@
 	}
 
 	if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY |
-	    SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM)) {
+	    SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM |
+		SADB_X_SAFLAGS_TUNNEL)) {
 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
 		return (EINVAL);
 	}
@@ -3067,11 +2957,7 @@
 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) {
 		return (EINVAL);
 	}
-	if (src->sin_family != dst->sin_family) {
-		*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
-		return (EINVAL);
-	}
-
+	ASSERT(src->sin_family == dst->sin_family);
 
 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
 		if (nttext_loc == NULL) {
@@ -3186,7 +3072,8 @@
 	}
 	mutex_exit(&alg_lock);
 
-	return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi));
+	return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
+		    diagnostic));
 }
 
 /*
@@ -3235,10 +3122,9 @@
 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
 			return (EINVAL);
 		}
-		return sadb_purge_sa(mp, ksi,
-		    (sin->sin_family == AF_INET6) ? &esp_sadb.s_v6 :
-		    &esp_sadb.s_v4,
-		    diagnostic, esp_pfkey_q, esp_sadb.s_ip_q);
+		return (sadb_purge_sa(mp, ksi,
+			    (sin->sin_family == AF_INET6) ? &esp_sadb.s_v6 :
+			    &esp_sadb.s_v4, esp_pfkey_q, esp_sadb.s_ip_q));
 	}
 
 	return (sadb_del_sa(mp, ksi, &esp_sadb, diagnostic, esp_pfkey_q));
@@ -3272,6 +3158,31 @@
 }
 
 /*
+ * First-cut reality check for an inbound PF_KEY message.
+ */
+static boolean_t
+esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi)
+{
+	int diagnostic;
+
+	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
+		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
+		goto badmsg;
+	}
+	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
+	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
+		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
+		goto badmsg;
+	}
+	return (B_FALSE);	/* False ==> no failures */
+
+badmsg:
+	sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, diagnostic,
+	    ksi->ks_in_serial);
+	return (B_TRUE);	/* True ==> failures */
+}
+
+/*
  * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
  * error cases.  What I receive is a fully-formed, syntactically legal
  * PF_KEY message.  I then need to check semantics...
@@ -3297,9 +3208,12 @@
 
 	/*
 	 * If applicable, convert unspecified AF_INET6 to unspecified
-	 * AF_INET.
+	 * AF_INET.  And do other address reality checks.
 	 */
-	sadb_srcaddrfix(ksi);
+	if (!sadb_addrfix(ksi, esp_pfkey_q, mp) ||
+	    esp_pfkey_reality_failures(mp, ksi)) {
+		return;
+	}
 
 	switch (samsg->sadb_msg_type) {
 	case SADB_ADD:
@@ -3432,48 +3346,12 @@
 }
 
 /*
- * First-cut reality check for an inbound PF_KEY message.
- */
-static boolean_t
-esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi)
-{
-	int diagnostic;
-
-	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
-		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
-		goto badmsg;
-	}
-	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
-	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
-		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
-		goto badmsg;
-	}
-	if (ksi->ks_in_srctype == KS_IN_ADDR_MBCAST) {
-		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
-		goto badmsg;
-	}
-	if (ksi->ks_in_dsttype == KS_IN_ADDR_UNSPEC) {
-		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
-		goto badmsg;
-	}
-
-	return (B_FALSE);	/* False ==> no failures */
-
-badmsg:
-	sadb_pfkey_error(esp_pfkey_q, mp, EINVAL, diagnostic,
-	    ksi->ks_in_serial);
-	return (B_TRUE);	/* True ==> failures */
-}
-
-/*
  * ESP module write put routine.
  */
 static void
 ipsecesp_wput(queue_t *q, mblk_t *mp)
 {
 	ipsec_info_t *ii;
-	keysock_in_t *ksi;
-	int rc;
 	struct iocblk *iocp;
 
 	esp3dbg(("In esp_wput().\n"));
@@ -3496,51 +3374,8 @@
 		case KEYSOCK_IN:
 			ESP_BUMP_STAT(keysock_in);
 			esp3dbg(("Got KEYSOCK_IN message.\n"));
-			ksi = (keysock_in_t *)ii;
-			/*
-			 * Some common reality checks.
-			 */
-
-			if (esp_pfkey_reality_failures(mp, ksi))
-				return;
-
-			/*
-			 * Use 'q' instead of esp_sadb.s_ip_q, since
-			 * it's the write side already, and it'll go
-			 * down to IP.  Use esp_pfkey_q because we
-			 * wouldn't get here if that weren't set, and
-			 * the RD(q) has been done already.
-			 */
-			if (ksi->ks_in_srctype == KS_IN_ADDR_UNKNOWN) {
-				rc = sadb_addrcheck(q, esp_pfkey_q, mp,
-				    ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
-				    ksi->ks_in_serial);
-				if (rc == KS_IN_ADDR_UNKNOWN)
-					return;
-				else
-					ksi->ks_in_srctype = rc;
-			}
-			if (ksi->ks_in_dsttype == KS_IN_ADDR_UNKNOWN) {
-				rc = sadb_addrcheck(q, esp_pfkey_q, mp,
-				    ksi->ks_in_extv[SADB_EXT_ADDRESS_DST],
-				    ksi->ks_in_serial);
-				if (rc == KS_IN_ADDR_UNKNOWN)
-					return;
-				else
-					ksi->ks_in_dsttype = rc;
-			}
-			/*
-			 * XXX Proxy may be a different address family.
-			 */
-			if (ksi->ks_in_proxytype == KS_IN_ADDR_UNKNOWN) {
-				rc = sadb_addrcheck(q, esp_pfkey_q, mp,
-				    ksi->ks_in_extv[SADB_EXT_ADDRESS_PROXY],
-				    ksi->ks_in_serial);
-				if (rc == KS_IN_ADDR_UNKNOWN)
-					return;
-				else
-					ksi->ks_in_proxytype = rc;
-			}
+
+			/* Parse the message. */
 			esp_parse_pfkey(mp);
 			break;
 		case KEYSOCK_HELLO:
--- a/usr/src/uts/common/inet/ip/keysock.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/keysock.c	Fri Nov 03 07:10:24 2006 -0800
@@ -970,9 +970,6 @@
 	if (extv[SADB_EXT_ADDRESS_DST] != NULL)
 		ksi->ks_in_dsttype = KS_IN_ADDR_UNKNOWN;
 	else ksi->ks_in_dsttype = KS_IN_ADDR_NOTTHERE;
-	if (extv[SADB_EXT_ADDRESS_PROXY] != NULL)
-		ksi->ks_in_proxytype = KS_IN_ADDR_UNKNOWN;
-	else ksi->ks_in_proxytype = KS_IN_ADDR_NOTTHERE;
 	for (i = 0; i <= SADB_EXT_MAX; i++)
 		ksi->ks_in_extv[i] = extv[i];
 	ksi->ks_in_serial = ks->keysock_serial;
@@ -1024,7 +1021,8 @@
 	switch (ext->sadb_ext_type) {
 	case SADB_EXT_ADDRESS_SRC:
 	case SADB_EXT_ADDRESS_DST:
-	case SADB_EXT_ADDRESS_PROXY:
+	case SADB_X_EXT_ADDRESS_INNER_SRC:
+	case SADB_X_EXT_ADDRESS_INNER_DST:
 		/* Check for at least enough addtl length for a sockaddr. */
 		if (ext->sadb_ext_len <= SADB_8TO64(sizeof (sadb_address_t)))
 			return (B_FALSE);
@@ -1313,6 +1311,12 @@
 	case SADB_EXT_ADDRESS_DST:
 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_DST;
 		break;
+	case SADB_X_EXT_ADDRESS_INNER_SRC:
+		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_SRC;
+		break;
+	case SADB_X_EXT_ADDRESS_INNER_DST:
+		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_DST;
+		break;
 	case SADB_EXT_SA:
 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SA;
 		break;
@@ -1345,6 +1349,12 @@
 	case SADB_EXT_ADDRESS_DST:
 		rc = SADB_X_DIAGNOSTIC_MALFORMED_DST;
 		break;
+	case SADB_X_EXT_ADDRESS_INNER_SRC:
+		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
+		break;
+	case SADB_X_EXT_ADDRESS_INNER_DST:
+		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
+		break;
 	case SADB_EXT_SA:
 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SA;
 		break;
@@ -1380,6 +1390,21 @@
 	}
 	if (extv[SADB_EXT_ADDRESS_DST] == NULL) {
 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_DST);
+		return;
+	}
+
+	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
+	    extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
+		keysock_error(ks, mp, EINVAL,
+		    SADB_X_DIAGNOSTIC_MISSING_INNER_DST);
+		return;
+	}
+
+	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL &&
+	    extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
+		keysock_error(ks, mp, EINVAL,
+		    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC);
+		return;
 	}
 
 	reply_mp = ipsec_construct_inverse_acquire(samsg, extv);
--- a/usr/src/uts/common/inet/ip/nattymod.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/nattymod.c	Fri Nov 03 07:10:24 2006 -0800
@@ -545,11 +545,24 @@
 		    AF_INET);
 		mutex_exit(&bucket->isaf_lock);
 
-		if (ipsa == NULL || ipsa->ipsa_state == IPSA_STATE_DEAD) {
+		if (ipsa == NULL || ipsa->ipsa_state == IPSA_STATE_DEAD ||
+		    (!(ipsa->ipsa_flags & IPSA_F_NATT) &&
+			ipsa->ipsa_state != IPSA_STATE_LARVAL)) {
 			/* no associated sa error */
 
-			if (ipsa != NULL)
+			if (ipsa != NULL) {
+				/*
+				 * While we give LARVALs the benefit of the
+				 * doubt, full SAs that aren't NAT-T shouldn't
+				 * be dealing with inbound NAT-T traffic.
+				 */
+				if (!(ipsa->ipsa_flags & IPSA_F_NATT)) {
+					cmn_err(CE_WARN, "UDP-ESP arrived for "
+					    "non-NAT SA, spi 0x%x",
+					    htonl(ipsa->ipsa_spi));
+				}
 				IPSA_REFRELE(ipsa);
+			}
 
 			ip_drop_packet(iph_mp, B_TRUE, NULL, NULL,
 			    &ipdrops_esp_no_sa, &ip_dropper);
--- a/usr/src/uts/common/inet/ip/sadb.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/sadb.c	Fri Nov 03 07:10:24 2006 -0800
@@ -48,6 +48,7 @@
 #include <inet/common.h>
 #include <netinet/ip6.h>
 #include <inet/ip.h>
+#include <inet/ip_ire.h>
 #include <inet/ip6.h>
 #include <inet/ipsec_info.h>
 #include <inet/ipsec_impl.h>
@@ -62,6 +63,7 @@
 #include <inet/ipdrop.h>
 #include <inet/ipclassifier.h>
 #include <inet/sctp_ip.h>
+#include <inet/tun.h>
 
 /*
  * This source file contains Security Association Database (SADB) common
@@ -73,14 +75,15 @@
 static ipdropper_t sadb_dropper;
 
 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
-    ipsec_action_t *, uint32_t, uint32_t);
+    ipsec_action_t *, boolean_t, uint32_t, uint32_t);
 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
-static void sadb_drain_torchq(queue_t *q, mblk_t *);
+static void sadb_drain_torchq(queue_t *, mblk_t *);
 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t);
-static void sadb_destroy(sadb_t *sp);
-
-static time_t sadb_add_time(time_t base, uint64_t delta);
+static void sadb_destroy(sadb_t *);
+static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
+
+static time_t sadb_add_time(time_t, uint64_t);
 
 /*
  * ipsacq_maxpackets is defined here to make it tunable
@@ -278,9 +281,6 @@
 	if (ipsa->ipsa_dst_cid != NULL) {
 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
 	}
-	if (ipsa->ipsa_proxy_cid != NULL) {
-		IPSID_REFRELE(ipsa->ipsa_proxy_cid);
-	}
 	if (ipsa->ipsa_integ != NULL)
 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
 	if (ipsa->ipsa_sens != NULL)
@@ -1192,13 +1192,6 @@
 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
 	}
 
-#if 0 /* XXX PROXY  - Proxy identities not supported yet. */
-	if (ipsa->ipsa_proxy_cid != NULL) {
-		newbie->ipsa_proxy_cid = ipsa->ipsa_proxy_cid;
-		IPSID_REFHOLD(ipsa->ipsa_proxy_cid);
-	}
-#endif /* XXX PROXY */
-
 	if (error) {
 		sadb_freeassoc(newbie);
 		return (NULL);
@@ -1213,7 +1206,7 @@
  */
 static uint8_t *
 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
-    sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto)
+    sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
 {
 	struct sockaddr_in *sin;
 	struct sockaddr_in6 *sin6;
@@ -1230,7 +1223,7 @@
 		return (NULL);
 
 	addrext->sadb_address_proto = proto;
-	addrext->sadb_address_prefixlen = 0;
+	addrext->sadb_address_prefixlen = prefix;
 	addrext->sadb_address_reserved = 0;
 	addrext->sadb_address_exttype = exttype;
 
@@ -1302,7 +1295,7 @@
  * SA, construct a full PF_KEY message with all of the relevant extensions.
  * This is mostly used for SADB_GET, and SADB_DUMP.
  */
-mblk_t *
+static mblk_t *
 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
 {
 	int alloclen, addrsize, paddrsize, authsize, encrsize;
@@ -1324,11 +1317,7 @@
 	uint64_t *bitmap;
 	uint8_t *cur, *end;
 	/* These indicate the presence of the above extension fields. */
-	boolean_t soft, hard, proxy, auth, encr, sensinteg, srcid, dstid;
-#if 0 /* XXX PROXY see below... */
-	boolean_t proxyid, iv;
-	int proxyidsize, ivsize;
-#endif /* XXX PROXY */
+	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
 
 	/* First off, figure out the allocation length for this message. */
 
@@ -1380,9 +1369,12 @@
 		hard = B_FALSE;
 	}
 
-	/* Proxy address? */
-	if (!IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_proxysrc, ipsa->ipsa_proxyfam)) {
-		pfam = ipsa->ipsa_proxyfam;
+	/* Inner addresses. */
+	if (ipsa->ipsa_innerfam == 0) {
+		isrc = B_FALSE;
+		idst = B_FALSE;
+	} else {
+		pfam = ipsa->ipsa_innerfam;
 		switch (pfam) {
 		case AF_INET6:
 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
@@ -1397,10 +1389,9 @@
 			    "IPsec SADB: Proxy length failure.\n");
 			break;
 		}
-		proxy = B_TRUE;
-		alloclen += paddrsize;
-	} else {
-		proxy = B_FALSE;
+		isrc = B_TRUE;
+		idst = B_TRUE;
+		alloclen += 2 * paddrsize;
 	}
 
 	/* For the following fields, assume that length != 0 ==> stuff */
@@ -1455,17 +1446,6 @@
 		dstid = B_FALSE;
 	}
 
-#if 0 /* XXX PROXY not yet. */
-	if (ipsa->ipsa_proxy_cid != NULL) {
-		proxyidsize = roundup(sizeof (sadb_ident_t) +
-		    strlen(ipsa->ipsa_proxy_cid->ipsid_cid) + 1,
-		    sizeof (uint64_t));
-		alloclen += proxyidsize;
-		proxyid = B_TRUE;
-	} else {
-		proxyid = B_FALSE;
-	}
-#endif /* XXX PROXY */
 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
 		alloclen += sizeof (sadb_x_kmc_t);
 
@@ -1527,8 +1507,10 @@
 
 	cur = (uint8_t *)(lt + 1);
 
+	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
-	    ipsa->ipsa_srcaddr, SA_SRCPORT(ipsa), SA_PROTO(ipsa));
+	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
+	    SA_PROTO(ipsa), 0);
 	if (cur == NULL) {
 		freemsg(mp);
 		mp = NULL;
@@ -1536,7 +1518,8 @@
 	}
 
 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
-	    ipsa->ipsa_dstaddr, SA_DSTPORT(ipsa), SA_PROTO(ipsa));
+	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
+	    SA_PROTO(ipsa), 0);
 	if (cur == NULL) {
 		freemsg(mp);
 		mp = NULL;
@@ -1545,7 +1528,7 @@
 
 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
-		    fam, ipsa->ipsa_natt_addr_loc, 0, 0);
+		    fam, ipsa->ipsa_natt_addr_loc, 0, 0, 0);
 		if (cur == NULL) {
 			freemsg(mp);
 			mp = NULL;
@@ -1556,7 +1539,7 @@
 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
 		    fam, ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_port,
-		    IPPROTO_UDP);
+		    IPPROTO_UDP, 0);
 		if (cur == NULL) {
 			freemsg(mp);
 			mp = NULL;
@@ -1564,14 +1547,22 @@
 		}
 	}
 
-	if (proxy) {
-		/*
-		 * XXX PROXY When we expand the definition of proxy to include
-		 * both inner and outer IP addresses, this will have to
-		 * be expanded.
-		 */
-		cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_PROXY,
-		    pfam, ipsa->ipsa_proxysrc, 0, 0);
+	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
+	if (isrc) {
+		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
+		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
+		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
+		if (cur == NULL) {
+			freemsg(mp);
+			mp = NULL;
+			goto bail;
+		}
+	}
+
+	if (idst) {
+		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
+		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
+		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
 		if (cur == NULL) {
 			freemsg(mp);
 			mp = NULL;
@@ -1638,20 +1629,6 @@
 		    walker->sadb_ext_len);
 	}
 
-#if 0 /* XXX PROXY not yet */
-	if (proxyid) {
-		ident = (sadb_ident_t *)walker;
-		ident->sadb_ident_len = SADB_8TO64(proxyidsize);
-		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_PROXY;
-		ident->sadb_ident_type = ipsa->ipsa_pcid_type;
-		ident->sadb_ident_id = 0;
-		ident->sadb_ident_reserved = 0;
-		(void) strcpy((char *)(ident + 1), ipsa->ipsa_proxy_cid);
-		walker = (sadb_ext_t *)((uint64_t *)walker +
-		    walker->sadb_ext_len);
-	}
-#endif /* XXX PROXY */
-
 	if (sensinteg) {
 		sens = (sadb_sens_t *)walker;
 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
@@ -1856,8 +1833,8 @@
 	case SADB_GET:
 		/*
 		 * Do a lot of work here, because of the ipsa I just found.
-		 * First abandon the PF_KEY message, then construct
-		 * the new one.
+		 * First construct the new PF_KEY message, then abandon
+		 * the old one.
 		 */
 		mp1 = sadb_sa2msg(ipsa, samsg);
 		if (mp1 == NULL) {
@@ -1950,96 +1927,52 @@
 }
 
 /*
- * Send IRE_DB_REQ down to IP to get properties of address.
- * If I can determine the address, return the proper type.  If an error
- * occurs, or if I have to send down an IRE_DB_REQ, return UNKNOWN, and
- * the caller will just let go of mp w/o freeing it.
+ * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
  *
- * To handle the compatible IPv6 addresses (i.e. ::FFFF:<v4-address>),
- * this function will also convert such AF_INET6 addresses into AF_INET
- * addresses.
- *
- * Whomever called the function will handle the return message that IP sends
- * in response to the message this function generates.
+ * Check addresses themselves for wildcard or multicast.
+ * Check ire table for local/non-local/broadcast.
  */
 int
-sadb_addrcheck(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext,
-    uint_t serial)
+sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial)
 {
 	sadb_address_t *addr = (sadb_address_t *)ext;
 	struct sockaddr_in *sin;
 	struct sockaddr_in6 *sin6;
-	mblk_t *ire_db_req_mp;
 	ire_t *ire;
-	int diagnostic;
+	int diagnostic, type;
+	boolean_t normalized = B_FALSE;
 
 	ASSERT(ext != NULL);
 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
-	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_PROXY));
-
-	ire_db_req_mp = allocb(sizeof (ire_t), BPRI_HI);
-	if (ire_db_req_mp == NULL) {
-		/* cmn_err(CE_WARN, "sadb_addrcheck: allocb() failed.\n"); */
-		sadb_pfkey_error(pfkey_q, mp, ENOMEM, SADB_X_DIAGNOSTIC_NONE,
-		    serial);
-		return (KS_IN_ADDR_UNKNOWN);
-	}
-
-	ire_db_req_mp->b_datap->db_type = IRE_DB_REQ_TYPE;
-	ire_db_req_mp->b_wptr += sizeof (ire_t);
-	ire = (ire_t *)ire_db_req_mp->b_rptr;
+	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
+	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
+	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
+	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
 
 	/* Assign both sockaddrs, the compiler will do the right thing. */
 	sin = (struct sockaddr_in *)(addr + 1);
 	sin6 = (struct sockaddr_in6 *)(addr + 1);
 
-	switch (sin->sin_family) {
-	case AF_INET6:
-		/* Because of the longer IPv6 addrs, do check first. */
-		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
-			if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
-				freemsg(ire_db_req_mp);
-				return (KS_IN_ADDR_MBCAST);
-			}
-			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
-				freemsg(ire_db_req_mp);
-				return (KS_IN_ADDR_UNSPEC);
-			}
-			ire->ire_ipversion = IPV6_VERSION;
-			ire->ire_addr_v6 = sin6->sin6_addr;
-			break;	/* Out of switch. */
+	if (sin6->sin6_family == AF_INET6) {
+		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+			/*
+			 * Convert to an AF_INET sockaddr.  This means the
+			 * return messages will have the extra space, but have
+			 * AF_INET sockaddrs instead of AF_INET6.
+			 *
+			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
+			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
+			 * equal to AF_INET <v4>, it shouldnt be a huge
+			 * problem.
+			 */
+			sin->sin_family = AF_INET;
+			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
+			    &sin->sin_addr);
+			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
+			normalized = B_TRUE;
 		}
-		/*
-		 * Convert to an AF_INET sockaddr.  This means
-		 * the return messages will have the extra space, but
-		 * have AF_INET sockaddrs instead of AF_INET6.
-		 *
-		 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
-		 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
-		 * equal to AF_INET <v4>, it shouldnt be a huge
-		 * problem.
-		 */
-		ASSERT(&sin->sin_port == &sin6->sin6_port);
-		sin->sin_family = AF_INET;
-		IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr, &sin->sin_addr);
-		bzero(&sin->sin_zero, sizeof (sin->sin_zero));
-		/* FALLTHRU */
-	case AF_INET:
-		ire->ire_ipversion = IPV4_VERSION;
-		ire->ire_addr = sin->sin_addr.s_addr;
-		if (ire->ire_addr == INADDR_ANY) {
-			freemsg(ire_db_req_mp);
-			return (KS_IN_ADDR_UNSPEC);
-		}
-		if (CLASSD(ire->ire_addr)) {
-			freemsg(ire_db_req_mp);
-			return (KS_IN_ADDR_MBCAST);
-		}
-		break;
-	default:
-		freemsg(ire_db_req_mp);
-
+	} else if (sin->sin_family != AF_INET) {
 		switch (ext->sadb_ext_type) {
 		case SADB_EXT_ADDRESS_SRC:
 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
@@ -2047,50 +1980,319 @@
 		case SADB_EXT_ADDRESS_DST:
 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
 			break;
-		case SADB_EXT_ADDRESS_PROXY:
+		case SADB_X_EXT_ADDRESS_INNER_SRC:
 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
 			break;
+		case SADB_X_EXT_ADDRESS_INNER_DST:
+			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
+			break;
+		case SADB_X_EXT_ADDRESS_NATT_LOC:
+			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
+			break;
+		case SADB_X_EXT_ADDRESS_NATT_REM:
+			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
+			break;
 			/* There is no default, see above ASSERT. */
 		}
-
-		sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic, serial);
+bail:
+		if (pfkey_q != NULL) {
+			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
+			    serial);
+		} else {
+			/*
+			 * Scribble in sadb_msg that we got passed in.
+			 * Overload "mp" to be an sadb_msg pointer.
+			 */
+			sadb_msg_t *samsg = (sadb_msg_t *)mp;
+
+			samsg->sadb_msg_errno = EINVAL;
+			samsg->sadb_x_msg_diagnostic = diagnostic;
+		}
 		return (KS_IN_ADDR_UNKNOWN);
 	}
-	ire_db_req_mp->b_cont = mp;
-
-	ASSERT(ip_q != NULL);
-	putnext(ip_q, ire_db_req_mp);
-	return (KS_IN_ADDR_UNKNOWN);
+
+	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
+	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
+		/*
+		 * We need only check for prefix issues.
+		 */
+
+		/* Set diagnostic now, in case we need it later. */
+		diagnostic =
+		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
+		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
+		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
+
+		if (normalized)
+			addr->sadb_address_prefixlen -= 96;
+
+		/*
+		 * Verify and mask out inner-addresses based on prefix length.
+		 */
+		if (sin->sin_family == AF_INET) {
+			if (addr->sadb_address_prefixlen > 32)
+				goto bail;
+			sin->sin_addr.s_addr &=
+			    ip_plen_to_mask(addr->sadb_address_prefixlen);
+		} else {
+			in6_addr_t mask;
+
+			ASSERT(sin->sin_family == AF_INET6);
+			/*
+			 * ip_plen_to_mask_v6() returns NULL if the value in
+			 * question is out of range.
+			 */
+			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
+				&mask) == NULL)
+				goto bail;
+			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
+			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
+			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
+			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
+		}
+
+		/* We don't care in these cases. */
+		return (KS_IN_ADDR_DONTCARE);
+	}
+
+	if (sin->sin_family == AF_INET6) {
+		/* Check the easy ones now. */
+		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+			return (KS_IN_ADDR_MBCAST);
+		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+			return (KS_IN_ADDR_UNSPEC);
+		/*
+		 * At this point, we're a unicast IPv6 address.
+		 *
+		 * A ctable lookup for local is sufficient here.  If we're
+		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
+		 *
+		 * XXX Zones alert -> me/notme decision needs to be tempered
+		 * by what zone we're in when we go to zone-aware IPsec.
+		 */
+		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
+		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE);
+		if (ire != NULL) {
+			/* Hey hey, it's local. */
+			IRE_REFRELE(ire);
+			return (KS_IN_ADDR_ME);
+		}
+	} else {
+		ASSERT(sin->sin_family == AF_INET);
+		if (sin->sin_addr.s_addr == INADDR_ANY)
+			return (KS_IN_ADDR_UNSPEC);
+		if (CLASSD(sin->sin_addr.s_addr))
+			return (KS_IN_ADDR_MBCAST);
+		/*
+		 * At this point we're a unicast or broadcast IPv4 address.
+		 *
+		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
+		 * A NULL return value is NOTME, otherwise, look at the
+		 * returned ire for broadcast or not and return accordingly.
+		 *
+		 * XXX Zones alert -> me/notme decision needs to be tempered
+		 * by what zone we're in when we go to zone-aware IPsec.
+		 */
+		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
+		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
+		    MATCH_IRE_TYPE);
+		if (ire != NULL) {
+			/* Check for local or broadcast */
+			type = ire->ire_type;
+			IRE_REFRELE(ire);
+			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
+			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
+			    KS_IN_ADDR_MBCAST);
+		}
+	}
+
+	return (KS_IN_ADDR_NOTME);
 }
 
 /*
+ * Address normalizations and reality checks for inbound PF_KEY messages.
+ *
  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
- * the source to AF_INET.
+ * the source to AF_INET.  Do the same for the inner sources.
  */
-void
-sadb_srcaddrfix(keysock_in_t *ksi)
+boolean_t
+sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp)
 {
-	struct sockaddr_in *src;
-	struct sockaddr_in6 *dst;
+	struct sockaddr_in *src, *isrc;
+	struct sockaddr_in6 *dst, *idst;
 	sadb_address_t *srcext, *dstext;
 	uint16_t sport;
-
-	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC ||
-	    ksi->ks_in_dsttype == KS_IN_ADDR_NOTTHERE)
-		return;
-
-	dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
-	dst = (struct sockaddr_in6 *)(dstext + 1);
-	srcext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
-	src = (struct sockaddr_in *)(srcext + 1);
+	sadb_ext_t **extv = ksi->ks_in_extv;
+	int rc;
+
+	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
+		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
+		    ksi->ks_in_serial);
+		if (rc == KS_IN_ADDR_UNKNOWN)
+			return (B_FALSE);
+		if (rc == KS_IN_ADDR_MBCAST) {
+			sadb_pfkey_error(pfkey_q, mp, EINVAL,
+			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+		ksi->ks_in_srctype = rc;
+	}
+
+	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
+		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
+		    ksi->ks_in_serial);
+		if (rc == KS_IN_ADDR_UNKNOWN)
+			return (B_FALSE);
+		if (rc == KS_IN_ADDR_UNSPEC) {
+			sadb_pfkey_error(pfkey_q, mp, EINVAL,
+			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+		ksi->ks_in_dsttype = rc;
+	}
 
 	/*
-	 * If unspecified IPv4 source, but an IPv6 dest, don't bother
-	 * fixing, as it should be an error.
+	 * NAT-Traversal addrs are simple enough to not require all of
+	 * the checks in sadb_addrcheck().  Just normalize or reject if not
+	 * AF_INET.
 	 */
-	if (dst->sin6_family == src->sin_family ||
-	    src->sin_family == AF_INET)
-		return;
+	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
+		rc = sadb_addrcheck(pfkey_q, mp,
+		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial);
+
+		/*
+		 * NATT addresses never use an IRE_LOCAL, so it should
+		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
+		 */
+		if (rc != KS_IN_ADDR_NOTME &&
+		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
+			rc == KS_IN_ADDR_UNSPEC)) {
+			if (rc != KS_IN_ADDR_UNKNOWN)
+				sadb_pfkey_error(pfkey_q, mp, EINVAL,
+				    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
+				    ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+		src = (struct sockaddr_in *)
+		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
+		if (src->sin_family != AF_INET) {
+			sadb_pfkey_error(pfkey_q, mp, EINVAL,
+			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
+			    ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+	}
+
+	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
+		rc = sadb_addrcheck(pfkey_q, mp,
+		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial);
+
+		/*
+		 * NATT addresses never use an IRE_LOCAL, so it should
+		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
+		 */
+		if (rc != KS_IN_ADDR_NOTME &&
+		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
+			rc == KS_IN_ADDR_UNSPEC)) {
+			if (rc != KS_IN_ADDR_UNKNOWN)
+				sadb_pfkey_error(pfkey_q, mp, EINVAL,
+				    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
+				    ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+		src = (struct sockaddr_in *)
+		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
+		if (src->sin_family != AF_INET) {
+			sadb_pfkey_error(pfkey_q, mp, EINVAL,
+			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
+			    ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+	}
+
+	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
+		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
+			sadb_pfkey_error(pfkey_q, mp, EINVAL,
+			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
+			    ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+
+		if (sadb_addrcheck(pfkey_q, mp,
+			extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial)
+		    == KS_IN_ADDR_UNKNOWN ||
+		    sadb_addrcheck(pfkey_q, mp,
+			extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial)
+		    == KS_IN_ADDR_UNKNOWN)
+			return (B_FALSE);
+
+		isrc = (struct sockaddr_in *)
+		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
+			1);
+		idst = (struct sockaddr_in6 *)
+		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
+			1);
+		if (isrc->sin_family != idst->sin6_family) {
+			sadb_pfkey_error(pfkey_q, mp, EINVAL,
+			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
+			    ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
+			sadb_pfkey_error(pfkey_q, mp, EINVAL,
+			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
+			    ksi->ks_in_serial);
+			return (B_FALSE);
+	} else {
+		isrc = NULL;	/* For inner/outer port check below. */
+	}
+
+	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
+	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
+
+	if (dstext == NULL || srcext == NULL)
+		return (B_TRUE);
+
+	dst = (struct sockaddr_in6 *)(dstext + 1);
+	src = (struct sockaddr_in *)(srcext + 1);
+
+	if (isrc != NULL &&
+	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
+	    (src->sin_port != 0 || dst->sin6_port != 0)) {
+		/* Can't set inner and outer ports in one SA. */
+		sadb_pfkey_error(pfkey_q, mp, EINVAL,
+		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
+		    ksi->ks_in_serial);
+		return (B_FALSE);
+	}
+
+	if (dst->sin6_family == src->sin_family)
+		return (B_TRUE);
+
+	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
+		if (srcext->sadb_address_proto == 0) {
+			srcext->sadb_address_proto = dstext->sadb_address_proto;
+		} else if (dstext->sadb_address_proto == 0) {
+			dstext->sadb_address_proto = srcext->sadb_address_proto;
+		} else {
+			/* Inequal protocols, neither were 0.  Report error. */
+			sadb_pfkey_error(pfkey_q, mp, EINVAL,
+			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
+			    ksi->ks_in_serial);
+			return (B_FALSE);
+		}
+	}
+
+	/*
+	 * With the exception of an unspec IPv6 source and an IPv4
+	 * destination, address families MUST me matched.
+	 */
+	if (src->sin_family == AF_INET ||
+	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
+		sadb_pfkey_error(pfkey_q, mp, EINVAL,
+		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
+		return (B_FALSE);
+	}
 
 	/*
 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
@@ -2100,6 +2302,8 @@
 	bzero(src, sizeof (*src));
 	src->sin_family = AF_INET;
 	src->sin_port = sport;
+
+	return (B_TRUE);
 }
 
 /*
@@ -2177,8 +2381,8 @@
  * Don't kill larval SA's in such a purge.
  */
 int
-sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp,
-    int *diagnostic, queue_t *pfkey_q, queue_t *ip_q)
+sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
+    queue_t *ip_q)
 {
 	sadb_address_t *dstext =
 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
@@ -2232,14 +2436,9 @@
 		} else {
 			ps.src = (uint32_t *)&src->sin_addr;
 		}
-
-		if (dstext != NULL) {
-			if (src->sin_family != dst->sin_family) {
-				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
-				return (EINVAL);
-			}
-		}
-	}
+		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
+	}
+
 	ASSERT(ps.af != (sa_family_t)-1);
 
 	if (dstid != NULL) {
@@ -2331,10 +2530,7 @@
 		if (srcext != NULL) {
 			src6 = (struct sockaddr_in6 *)(srcext + 1);
 			srcaddr = (uint32_t *)&src6->sin6_addr;
-			if (src6->sin6_family != AF_INET6) {
-				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
-				return (EINVAL);
-			}
+			ASSERT(src6->sin6_family == AF_INET6);
 		} else {
 			srcaddr = ALL_ZEROES_PTR;
 		}
@@ -2346,10 +2542,7 @@
 		if (srcext != NULL) {
 			src = (struct sockaddr_in *)(srcext + 1);
 			srcaddr = (uint32_t *)&src->sin_addr;
-			if (src->sin_family != AF_INET) {
-				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
-				return (EINVAL);
-			}
+			ASSERT(src->sin_family == AF_INET);
 		} else {
 			srcaddr = ALL_ZEROES_PTR;
 		}
@@ -2458,110 +2651,17 @@
 	mutex_exit(&alg_lock);
 }
 
-
 /*
- * This function is called from consumers that need to insert a fully-grown
- * security association into its tables.  This function takes into account that
- * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
- * hash bucket parameters are set in order of what the SA will be most of the
- * time.  (For example, an SA with an unspecified source, and a multicast
- * destination will primarily be an outbound SA.  OTOH, if that destination
- * is unicast for this node, then the SA will primarily be inbound.)
- *
- * It takes a lot of parameters because even if clone is B_FALSE, this needs
- * to check both buckets for purposes of collision.
- *
- * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
- * various error conditions.  No need to set samsg->sadb_x_msg_diagnostic with
- * additional diagnostic information because ENOMEM and EEXIST are self-
- * explanitory.
+ * Perform NAT-traversal cached checksum offset calculations here.
  */
-int
-sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
-    keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
-    ipsa_t *newbie, boolean_t clone, boolean_t is_inbound)
+static void
+sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
+    sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
+    uint32_t *dst_addr_ptr)
 {
-	ipsa_t *newbie_clone = NULL, *scratch;
-	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
-	sadb_address_t *srcext =
-	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
-	sadb_address_t *dstext =
-	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
-	sadb_address_t *proxyext =
-	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_PROXY];
-	sadb_address_t *natt_loc_ext =
-	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
-	sadb_address_t *natt_rem_ext =
-	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
-	sadb_x_kmc_t *kmcext =
-	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
-	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
-	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
-#if 0
-	/*
-	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
-	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
-	 */
-	sadb_sens_t *sens = (sadb_sens_t *);
-#endif
-	struct sockaddr_in *src, *dst, *proxy, *natt_loc, *natt_rem;
-	struct sockaddr_in6 *src6, *dst6, *proxy6, *natt_loc6, *natt_rem6;
-	sadb_lifetime_t *soft =
-	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
-	sadb_lifetime_t *hard =
-	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
-	sa_family_t af;
-	int error = 0;
-	boolean_t isupdate = (newbie != NULL);
-	uint32_t *src_addr_ptr, *dst_addr_ptr, *proxy_addr_ptr;
+	struct sockaddr_in *natt_loc, *natt_rem;
 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
 	uint32_t running_sum = 0;
-	mblk_t *ctl_mp = NULL;
-
-	src = (struct sockaddr_in *)(srcext + 1);
-	src6 = (struct sockaddr_in6 *)(srcext + 1);
-	dst = (struct sockaddr_in *)(dstext + 1);
-	dst6 = (struct sockaddr_in6 *)(dstext + 1);
-	if (proxyext != NULL) {
-		proxy = (struct sockaddr_in *)(proxyext + 1);
-		proxy6 = (struct sockaddr_in6 *)(proxyext + 1);
-	} else {
-		proxy = NULL;
-		proxy6 = NULL;
-	}
-
-	af = src->sin_family;
-
-	if (af == AF_INET) {
-		src_addr_ptr = (uint32_t *)&src->sin_addr;
-		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
-	} else {
-		ASSERT(af == AF_INET6);
-		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
-		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
-	}
-
-	if (!isupdate) {
-		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
-		    src_addr_ptr, dst_addr_ptr, af);
-		if (newbie == NULL)
-			return (ENOMEM);
-	}
-
-	mutex_enter(&newbie->ipsa_lock);
-
-	if (proxy != NULL) {
-		if (proxy->sin_family == AF_INET) {
-			proxy_addr_ptr = (uint32_t *)&proxy->sin_addr;
-		} else {
-			ASSERT(proxy->sin_family == AF_INET6);
-			proxy_addr_ptr = (uint32_t *)&proxy6->sin6_addr;
-		}
-		newbie->ipsa_proxyfam = proxy->sin_family;
-
-		IPSA_COPY_ADDR(newbie->ipsa_proxysrc, proxy_addr_ptr,
-		    newbie->ipsa_proxyfam);
-	}
 
 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
 
@@ -2571,26 +2671,17 @@
 		uint32_t l_rem;
 
 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
-		natt_rem6 = (struct sockaddr_in6 *)(natt_rem_ext + 1);
-
-		if (natt_rem->sin_family == AF_INET) {
-			natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
-			newbie->ipsa_remote_port = natt_rem->sin_port;
-			l_src = *src_addr_ptr;
-			l_rem = *natt_rem_ptr;
-		} else {
-			if (!IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
-				goto error;
-			}
-			ASSERT(natt_rem->sin_family == AF_INET6);
-
-			natt_rem_ptr = ((uint32_t *)
-			    (&natt_rem6->sin6_addr)) + 3;
-			newbie->ipsa_remote_port = natt_rem6->sin6_port;
-			l_src = *src_addr_ptr;
-			l_rem = *natt_rem_ptr;
-		}
-		IPSA_COPY_ADDR(newbie->ipsa_natt_addr_rem, natt_rem_ptr, af);
+
+		/* Ensured by sadb_addrfix(). */
+		ASSERT(natt_rem->sin_family == AF_INET);
+
+		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
+		newbie->ipsa_remote_port = natt_rem->sin_port;
+		l_src = *src_addr_ptr;
+		l_rem = *natt_rem_ptr;
+
+		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
+		newbie->ipsa_natt_addr_rem[0] = *natt_rem_ptr;
 
 		l_src = ntohl(l_src);
 		DOWN_SUM(l_src);
@@ -2617,24 +2708,17 @@
 		uint32_t l_loc;
 
 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
-		natt_loc6 = (struct sockaddr_in6 *)(natt_loc_ext + 1);
-
-		if (natt_loc->sin_family == AF_INET) {
-			natt_loc_ptr = (uint32_t *)&natt_loc->sin_addr;
-			l_dst = *dst_addr_ptr;
-			l_loc = *natt_loc_ptr;
-
-		} else {
-			if (!IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
-				goto error;
-			}
-			ASSERT(natt_loc->sin_family == AF_INET6);
-			natt_loc_ptr = ((uint32_t *)&natt_loc6->sin6_addr) + 3;
-			l_dst = *dst_addr_ptr;
-			l_loc = *natt_loc_ptr;
-
-		}
-		IPSA_COPY_ADDR(newbie->ipsa_natt_addr_loc, natt_loc_ptr, af);
+
+		/* Ensured by sadb_addrfix(). */
+		ASSERT(natt_loc->sin_family == AF_INET);
+
+		natt_loc_ptr = (uint32_t *)&natt_loc->sin_addr;
+		/* TODO - future port flexibility beyond 4500. */
+		l_dst = *dst_addr_ptr;
+		l_loc = *natt_loc_ptr;
+
+		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
+		newbie->ipsa_natt_addr_loc[0] = *natt_loc_ptr;
 
 		l_loc = ntohl(l_loc);
 		DOWN_SUM(l_loc);
@@ -2657,12 +2741,195 @@
 
 	newbie->ipsa_inbound_cksum = running_sum;
 #undef DOWN_SUM
+}
+
+/*
+ * This function is called from consumers that need to insert a fully-grown
+ * security association into its tables.  This function takes into account that
+ * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
+ * hash bucket parameters are set in order of what the SA will be most of the
+ * time.  (For example, an SA with an unspecified source, and a multicast
+ * destination will primarily be an outbound SA.  OTOH, if that destination
+ * is unicast for this node, then the SA will primarily be inbound.)
+ *
+ * It takes a lot of parameters because even if clone is B_FALSE, this needs
+ * to check both buckets for purposes of collision.
+ *
+ * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
+ * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
+ * with additional diagnostic information because there is at least one EINVAL
+ * case here.
+ */
+int
+sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
+    keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
+    ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic)
+{
+	ipsa_t *newbie_clone = NULL, *scratch;
+	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
+	sadb_address_t *srcext =
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
+	sadb_address_t *dstext =
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
+	sadb_address_t *isrcext =
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
+	sadb_address_t *idstext =
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
+	sadb_x_kmc_t *kmcext =
+	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
+	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
+	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
+#if 0
+	/*
+	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
+	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
+	 */
+	sadb_sens_t *sens = (sadb_sens_t *);
+#endif
+	struct sockaddr_in *src, *dst, *isrc, *idst;
+	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
+	sadb_lifetime_t *soft =
+	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
+	sadb_lifetime_t *hard =
+	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
+	sa_family_t af;
+	int error = 0;
+	boolean_t isupdate = (newbie != NULL);
+	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
+	mblk_t *ctl_mp = NULL;
+
+	src = (struct sockaddr_in *)(srcext + 1);
+	src6 = (struct sockaddr_in6 *)(srcext + 1);
+	dst = (struct sockaddr_in *)(dstext + 1);
+	dst6 = (struct sockaddr_in6 *)(dstext + 1);
+	if (isrcext != NULL) {
+		isrc = (struct sockaddr_in *)(isrcext + 1);
+		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
+		ASSERT(idstext != NULL);
+		idst = (struct sockaddr_in *)(idstext + 1);
+		idst6 = (struct sockaddr_in6 *)(idstext + 1);
+	} else {
+		isrc = NULL;
+		isrc6 = NULL;
+	}
+
+	af = src->sin_family;
+
+	if (af == AF_INET) {
+		src_addr_ptr = (uint32_t *)&src->sin_addr;
+		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
+	} else {
+		ASSERT(af == AF_INET6);
+		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
+		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
+	}
+
+	if (!isupdate) {
+		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
+		    src_addr_ptr, dst_addr_ptr, af);
+		if (newbie == NULL)
+			return (ENOMEM);
+	}
+
+	mutex_enter(&newbie->ipsa_lock);
+
+	if (isrc != NULL) {
+		if (isrc->sin_family == AF_INET) {
+			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
+				if (srcext->sadb_address_proto != 0) {
+					/*
+					 * Mismatched outer-packet protocol
+					 * and inner-packet address family.
+					 */
+					mutex_exit(&newbie->ipsa_lock);
+					error = EPROTOTYPE;
+					goto error;
+				} else {
+					/* Fill in with explicit protocol. */
+					srcext->sadb_address_proto =
+					    IPPROTO_ENCAP;
+					dstext->sadb_address_proto =
+					    IPPROTO_ENCAP;
+				}
+			}
+			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
+			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
+		} else {
+			ASSERT(isrc->sin_family == AF_INET6);
+			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
+				if (srcext->sadb_address_proto != 0) {
+					/*
+					 * Mismatched outer-packet protocol
+					 * and inner-packet address family.
+					 */
+					mutex_exit(&newbie->ipsa_lock);
+					error = EPROTOTYPE;
+					goto error;
+				} else {
+					/* Fill in with explicit protocol. */
+					srcext->sadb_address_proto =
+					    IPPROTO_IPV6;
+					dstext->sadb_address_proto =
+					    IPPROTO_IPV6;
+				}
+			}
+			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
+			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
+		}
+		newbie->ipsa_innerfam = isrc->sin_family;
+
+		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
+		    newbie->ipsa_innerfam);
+		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
+		    newbie->ipsa_innerfam);
+		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
+		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
+
+		/* Unique value uses inner-ports for Tunnel Mode... */
+		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
+		    idst->sin_port, dstext->sadb_address_proto,
+		    idstext->sadb_address_proto);
+		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
+		    idst->sin_port, dstext->sadb_address_proto,
+		    idstext->sadb_address_proto);
+	} else {
+		/* ... and outer-ports for Transport Mode. */
+		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
+		    dst->sin_port, dstext->sadb_address_proto, 0);
+		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
+		    dst->sin_port, dstext->sadb_address_proto, 0);
+	}
+	if (newbie->ipsa_unique_mask != (uint64_t)0)
+		newbie->ipsa_flags |= IPSA_F_UNIQUE;
+
+
+	sadb_nat_calculations(newbie,
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
+	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
+	    src_addr_ptr, dst_addr_ptr);
 
 	newbie->ipsa_type = samsg->sadb_msg_satype;
 	ASSERT(assoc->sadb_sa_state == SADB_SASTATE_MATURE);
 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
-	newbie->ipsa_flags = assoc->sadb_sa_flags;
+	/*
+	 * Use |= because we set unique fields above.  UNIQUE is filtered
+	 * out before we reach here so it's not like we're sabotaging anything.
+	 * ASSERT we're either 0 or UNIQUE for good measure, though.
+	 */
+	ASSERT((newbie->ipsa_flags & IPSA_F_UNIQUE) == newbie->ipsa_flags);
+	newbie->ipsa_flags |= assoc->sadb_sa_flags;
+	if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
+		ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) ||
+	    (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
+		ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) ||
+	    (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
+		ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) {
+		mutex_exit(&newbie->ipsa_lock);
+		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
+		error = EINVAL;
+		goto error;
+	}
 	/*
 	 * If unspecified source address, force replay_wsize to 0.
 	 * This is because an SA that has multiple sources of secure
@@ -2676,15 +2943,6 @@
 
 	(void) drv_getparm(TIME, &newbie->ipsa_addtime);
 
-	/* Set unique value */
-	newbie->ipsa_unique_id = SA_UNIQUE_ID((uint16_t)src->sin_port,
-	    (uint16_t)dst->sin_port, dstext->sadb_address_proto);
-	newbie->ipsa_unique_mask = SA_UNIQUE_MASK((uint16_t)src->sin_port,
-	    (uint16_t)dst->sin_port, dstext->sadb_address_proto);
-
-	if (newbie->ipsa_unique_mask != 0)
-		newbie->ipsa_flags |= IPSA_F_UNIQUE;
-
 	if (kmcext != NULL) {
 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
@@ -3053,6 +3311,7 @@
 	sadb_lifetime_t *current, *expire;
 	sadb_sa_t *saext;
 	uint8_t *end;
+	boolean_t tunnel_mode;
 
 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
 
@@ -3068,7 +3327,7 @@
 	}
 
 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
-	    2*sizeof (sadb_address_t) + sizeof (*saext);
+	    2 * sizeof (sadb_address_t) + sizeof (*saext);
 
 	af = assoc->ipsa_addrfam;
 	switch (af) {
@@ -3086,6 +3345,25 @@
 		return;
 	}
 
+	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
+	if (tunnel_mode) {
+		alloclen += 2 * sizeof (sadb_address_t);
+		switch (assoc->ipsa_innerfam) {
+		case AF_INET:
+			alloclen += 2 * sizeof (struct sockaddr_in);
+			break;
+		case AF_INET6:
+			alloclen += 2 * sizeof (struct sockaddr_in6);
+			break;
+		default:
+			/* Won't happen unless there's a kernel bug. */
+			freeb(mp);
+			cmn_err(CE_WARN, "sadb_expire_assoc: "
+			    "Unknown inner address length.\n");
+			return;
+		}
+	}
+
 	mp->b_cont = allocb(alloclen, BPRI_HI);
 	if (mp->b_cont == NULL) {
 		freeb(mp);
@@ -3149,13 +3427,28 @@
 	}
 
 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
-	    af, assoc->ipsa_srcaddr, SA_SRCPORT(assoc), SA_PROTO(assoc));
+	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
+	    SA_PROTO(assoc), 0);
 	ASSERT(mp->b_wptr != NULL);
 
 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
-	    af, assoc->ipsa_dstaddr, SA_DSTPORT(assoc), SA_PROTO(assoc));
+	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
+	    SA_PROTO(assoc), 0);
 	ASSERT(mp->b_wptr != NULL);
 
+	if (tunnel_mode) {
+		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
+		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
+		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
+		    assoc->ipsa_innersrcpfx);
+		ASSERT(mp->b_wptr != NULL);
+		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
+		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
+		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
+		    assoc->ipsa_innerdstpfx);
+		ASSERT(mp->b_wptr != NULL);
+	}
+
 	/* Can just putnext, we're ready to go! */
 	putnext(pfkey_q, mp1);
 }
@@ -3708,11 +4001,6 @@
 		srcaddr = (uint32_t *)&src6->sin6_addr;
 		dstaddr = (uint32_t *)&dst6->sin6_addr;
 		outbound = OUTBOUND_BUCKET_V6(sp, *(uint32_t *)dstaddr);
-#if 0
-		/* Not used for now... */
-		if (proxyext != NULL)
-			proxy6 = (struct sockaddr_in6 *)(proxyext + 1);
-#endif
 	} else {
 		srcaddr = (uint32_t *)&src->sin_addr;
 		dstaddr = (uint32_t *)&dst->sin_addr;
@@ -3772,11 +4060,7 @@
 		error = EINVAL;
 		goto bail;
 	}
-	if (src->sin_family != dst->sin_family) {
-		*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
-		error = EINVAL;
-		goto bail;
-	}
+	ASSERT(src->sin_family == dst->sin_family);
 	if (akey != NULL) {
 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
 		error = EINVAL;
@@ -3880,10 +4164,17 @@
  */
 static ipsacq_t *
 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
-    uint32_t *src, uint32_t *dst, uint64_t unique_id)
+    uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
+    uint64_t unique_id)
 {
 	ipsacq_t *walker;
 	sa_family_t fam;
+	uint32_t blank_address[4] = {0, 0, 0, 0};
+
+	if (isrc == NULL) {
+		ASSERT(idst == NULL);
+		isrc = idst = blank_address;
+	}
 
 	/*
 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
@@ -3896,7 +4187,10 @@
 		fam = walker->ipsacq_addrfam;
 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
-		    /* XXX PROXY should check for proxy addr here */
+		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
+			(in6_addr_t *)walker->ipsacq_innersrc) &&
+		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
+			(in6_addr_t *)walker->ipsacq_innerdst) &&
 		    (ap == walker->ipsacq_act) &&
 		    (pp == walker->ipsacq_policy) &&
 		    /* XXX do deep compares of ap/pp? */
@@ -3929,7 +4223,7 @@
 	mblk_t *extended;
 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
-	uint32_t *src, *dst;
+	uint32_t *src, *dst, *isrc, *idst;
 	ipsec_policy_t *pp = io->ipsec_out_policy;
 	ipsec_action_t *ap = io->ipsec_out_act;
 	sa_family_t af;
@@ -3937,6 +4231,7 @@
 	uint32_t seq;
 	uint64_t unique_id = 0;
 	ipsec_selector_t sel;
+	boolean_t tunnel_mode = io->ipsec_out_tunnel;
 
 	ASSERT((pp != NULL) || (ap != NULL));
 
@@ -3950,18 +4245,12 @@
 
 	ASSERT(ap != NULL);
 
-	if (ap->ipa_act.ipa_apply.ipp_use_unique)
+	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
 		unique_id = SA_FORM_UNIQUE_ID(io);
 
 	/*
 	 * Set up an ACQUIRE record.
 	 *
-	 * Will eventually want to pull the PROXY source address from
-	 * either the inner IP header, or from a future extension to the
-	 * IPSEC_OUT message.
-	 *
-	 * Actually, we'll also want to check for duplicates.
-	 *
 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
 	 * below the lowest point allowed in the kernel.  (In other words,
 	 * make sure the high bit on the sequence number is set.)
@@ -3969,19 +4258,9 @@
 
 	seq = keysock_next_seq() | IACQF_LOWEST_SEQ;
 
-	sel.ips_isv4 = io->ipsec_out_v4;
-	sel.ips_protocol = io->ipsec_out_proto;
-	sel.ips_local_port = io->ipsec_out_src_port;
-	sel.ips_remote_port = io->ipsec_out_dst_port;
-	sel.ips_icmp_type = io->ipsec_out_icmp_type;
-	sel.ips_icmp_code = io->ipsec_out_icmp_code;
-	sel.ips_is_icmp_inv_acq = 0;
 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
 		src = (uint32_t *)&ipha->ipha_src;
 		dst = (uint32_t *)&ipha->ipha_dst;
-		/* No compiler dain-bramage (4438087) for IPv4 addresses. */
-		sel.ips_local_addr_v4 = ipha->ipha_src;
-		sel.ips_remote_addr_v4 = ipha->ipha_dst;
 		af = AF_INET;
 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
 		ASSERT(io->ipsec_out_v4 == B_TRUE);
@@ -3989,20 +4268,27 @@
 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
 		src = (uint32_t *)&ip6h->ip6_src;
 		dst = (uint32_t *)&ip6h->ip6_dst;
-		sel.ips_local_addr_v6 = ip6h->ip6_src;
-		sel.ips_remote_addr_v6 = ip6h->ip6_dst;
 		af = AF_INET6;
 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
 		ASSERT(io->ipsec_out_v4 == B_FALSE);
 	}
 
+	if (tunnel_mode) {
+		/* Snag inner addresses. */
+		isrc = io->ipsec_out_insrc;
+		idst = io->ipsec_out_indst;
+	} else {
+		isrc = idst = NULL;
+	}
+
 	/*
 	 * Check buckets to see if there is an existing entry.  If so,
 	 * grab it.  sadb_checkacquire locks newbie if found.
 	 */
 	bucket = &(sp->sdb_acq[hashoffset]);
 	mutex_enter(&bucket->iacqf_lock);
-	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, unique_id);
+	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
+	    unique_id);
 
 	if (newbie == NULL) {
 		/*
@@ -4058,7 +4344,19 @@
 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
-		newbie->ipsacq_proto = io->ipsec_out_proto;
+		if (tunnel_mode) {
+			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
+			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
+			    IPPROTO_IPV6 : IPPROTO_ENCAP;
+			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
+			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
+			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
+			    io->ipsec_out_insrc, io->ipsec_out_inaf);
+			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
+			    io->ipsec_out_indst, io->ipsec_out_inaf);
+		} else {
+			newbie->ipsacq_proto = io->ipsec_out_proto;
+		}
 		newbie->ipsacq_unique_id = unique_id;
 	} else {
 		/* Scan to the end of the list & insert. */
@@ -4104,10 +4402,31 @@
 		 * opportunities here in failure cases.
 		 */
 
+		(void) memset(&sel, 0, sizeof (sel));
+		sel.ips_isv4 = io->ipsec_out_v4;
+		if (tunnel_mode) {
+			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
+			    IPPROTO_ENCAP : IPPROTO_IPV6;
+		} else {
+			sel.ips_protocol = io->ipsec_out_proto;
+			sel.ips_local_port = io->ipsec_out_src_port;
+			sel.ips_remote_port = io->ipsec_out_dst_port;
+		}
+		sel.ips_icmp_type = io->ipsec_out_icmp_type;
+		sel.ips_icmp_code = io->ipsec_out_icmp_code;
+		sel.ips_is_icmp_inv_acq = 0;
+		if (af == AF_INET) {
+			sel.ips_local_addr_v4 = ipha->ipha_src;
+			sel.ips_remote_addr_v4 = ipha->ipha_dst;
+		} else {
+			sel.ips_local_addr_v6 = ip6h->ip6_src;
+			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
+		}
+
 		extended = sadb_keysock_out(0);
 		if (extended != NULL) {
 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
-			    seq, 0);
+			    tunnel_mode, seq, 0);
 			if (extended->b_cont == NULL) {
 				freeb(extended);
 				extended = NULL;
@@ -4324,7 +4643,7 @@
  */
 static mblk_t *
 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
-    ipsec_action_t *act, uint32_t seq, uint32_t pid)
+    ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid)
 {
 	mblk_t *mp;
 	sadb_msg_t *samsg;
@@ -4333,7 +4652,8 @@
 	sa_family_t af;
 	sadb_prop_t *eprop;
 	ipsec_action_t *ap, *an;
-	uint8_t proto;
+	ipsec_selkey_t *ipsl;
+	uint8_t proto, pfxlen;
 	uint16_t lport, rport;
 	uint32_t kmp, kmc;
 
@@ -4358,15 +4678,6 @@
 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
 	if (mp == NULL)
 		return (NULL);
-	if (sel->ips_isv4) {
-		af = AF_INET;
-		saddrptr = (uint32_t *)(&sel->ips_local_addr_v4);
-		daddrptr = (uint32_t *)(&sel->ips_remote_addr_v4);
-	} else {
-		af = AF_INET6;
-		saddrptr = (uint32_t *)(&sel->ips_local_addr_v6);
-		daddrptr = (uint32_t *)(&sel->ips_remote_addr_v6);
-	}
 
 	start = mp->b_rptr;
 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
@@ -4384,32 +4695,103 @@
 	samsg->sadb_msg_seq = seq;
 	samsg->sadb_msg_pid = pid;
 
-	proto = sel->ips_protocol;
-	lport = sel->ips_local_port;
-	rport = sel->ips_remote_port;
-
-	/*
-	 * Unless our policy says "sa unique", drop port/proto
-	 * selectors, then add them back if policy rule includes them..
-	 */
-
-	if ((ap != NULL) && (!ap->ipa_want_unique)) {
+	if (tunnel_mode) {
+		/*
+		 * Form inner address extensions based NOT on the inner
+		 * selectors (i.e. the packet data), but on the policy's
+		 * selector key (i.e. the policy's selector information).
+		 *
+		 * NOTE:  The position of IPv4 and IPv6 addresses is the
+		 * same in ipsec_selkey_t (unless the compiler does very
+		 * strange things with unions, consult your local C language
+		 * lawyer for details).
+		 */
+		ipsl = &(pol->ipsp_sel->ipsl_key);
+		if (ipsl->ipsl_valid & IPSL_IPV4) {
+			af = AF_INET;
+			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
+			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
+		} else {
+			af = AF_INET6;
+			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
+			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
+		}
+
+		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
+			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
+			pfxlen = ipsl->ipsl_local_pfxlen;
+		} else {
+			saddrptr = (uint32_t *)(&ipv6_all_zeros);
+			pfxlen = 0;
+		}
+		/* XXX What about ICMP type/code? */
+		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
+		    ipsl->ipsl_lport : 0;
+		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
+		    ipsl->ipsl_proto : 0;
+
+		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
+		    af, saddrptr, lport, proto, pfxlen);
+		if (cur == NULL) {
+			freeb(mp);
+			return (NULL);
+		}
+
+		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
+			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
+			pfxlen = ipsl->ipsl_remote_pfxlen;
+		} else {
+			daddrptr = (uint32_t *)(&ipv6_all_zeros);
+			pfxlen = 0;
+		}
+		/* XXX What about ICMP type/code? */
+		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
+		    ipsl->ipsl_rport : 0;
+
+		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
+		    af, daddrptr, rport, proto, pfxlen);
+		if (cur == NULL) {
+			freeb(mp);
+			return (NULL);
+		}
+		/*
+		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
+		 * _with_ inner-packet address selectors, we'll need to further
+		 * distinguish tunnel mode here.  For now, having inner
+		 * addresses and/or ports is sufficient.
+		 *
+		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
+		 * outer addresses.
+		 */
+		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
+		lport = rport = 0;
+	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
 		proto = 0;
 		lport = 0;
 		rport = 0;
 		if (pol != NULL) {
-			ipsec_selkey_t *psel = &pol->ipsp_sel->ipsl_key;
-			if (psel->ipsl_valid & IPSL_PROTOCOL)
-				proto = psel->ipsl_proto;
-			if (psel->ipsl_valid & IPSL_REMOTE_PORT)
-				rport = psel->ipsl_rport;
-			if (psel->ipsl_valid & IPSL_LOCAL_PORT)
-				lport = psel->ipsl_lport;
+			ipsl = &(pol->ipsp_sel->ipsl_key);
+			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
+				proto = ipsl->ipsl_proto;
+			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
+				rport = ipsl->ipsl_rport;
+			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
+				lport = ipsl->ipsl_lport;
 		}
-	}
-
+	} else {
+		proto = sel->ips_protocol;
+		lport = sel->ips_local_port;
+		rport = sel->ips_remote_port;
+	}
+
+	af = sel->ips_isv4 ? AF_INET : AF_INET6;
+
+	/*
+	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
+	 * ipsec_selector_t.
+	 */
 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
-	    saddrptr, lport, proto);
+	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
 
 	if (cur == NULL) {
 		freeb(mp);
@@ -4417,7 +4799,7 @@
 	}
 
 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
-	    daddrptr, rport, proto);
+	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
 
 	if (cur == NULL) {
 		freeb(mp);
@@ -4490,30 +4872,73 @@
 	}
 
 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
-	samsg->sadb_msg_len = SADB_8TO64(cur-start);
+	samsg->sadb_msg_len = SADB_8TO64(cur - start);
 	mp->b_wptr = cur;
 
 	return (mp);
 }
 
 /*
- * Generic setup of an ACQUIRE message.	 Caller sets satype.
+ * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
+ *
+ * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
+ * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
+ * maximize code consolidation while preventing algorithm changes from messing
+ * with the callers finishing touches on the ACQUIRE itself.
  */
-uint8_t *
-sadb_setup_acquire(uint8_t *start, uint8_t *end, ipsacq_t *acqrec)
+mblk_t *
+sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype)
 {
+	uint_t allocsize;
+	mblk_t *pfkeymp, *msgmp;
 	sa_family_t af;
-	uint8_t *cur = start;
-	sadb_msg_t *samsg = (sadb_msg_t *)cur;
+	uint8_t *cur, *end;
+	sadb_msg_t *samsg;
 	uint16_t sport_typecode;
 	uint16_t dport_typecode;
 	uint8_t check_proto;
-
+	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
+
+	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
+
+	pfkeymp = sadb_keysock_out(0);
+	if (pfkeymp == NULL)
+		return (NULL);
+
+	/*
+	 * First, allocate a basic ACQUIRE message
+	 */
+	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
+	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
+
+	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
+	allocsize += 2 * sizeof (struct sockaddr_in6);
+
+	mutex_enter(&alg_lock);
+	/* NOTE:  The lock is now held through to this function's return. */
+	allocsize += ipsec_nalgs[IPSEC_ALG_AUTH] *
+	    ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
+
+	if (tunnel_mode) {
+		/* Tunnel mode! */
+		allocsize += 2 * sizeof (sadb_address_t);
+		/* Enough to cover both AF_INET and AF_INET6. */
+		allocsize += 2 * sizeof (struct sockaddr_in6);
+	}
+
+	msgmp = allocb(allocsize, BPRI_HI);
+	if (msgmp == NULL) {
+		freeb(pfkeymp);
+		mutex_exit(&alg_lock);
+		return (NULL);
+	}
+
+	pfkeymp->b_cont = msgmp;
+	cur = msgmp->b_rptr;
+	end = cur + allocsize;
+	samsg = (sadb_msg_t *)cur;
 	cur += sizeof (sadb_msg_t);
-	if (cur > end)
-		return (NULL);
-
-	/* use the address length to find the address family */
+
 	af = acqrec->ipsacq_addrfam;
 	switch (af) {
 	case AF_INET:
@@ -4527,11 +4952,13 @@
 		cmn_err(CE_WARN,
 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
 		ASSERT(0);
+		mutex_exit(&alg_lock);
 		return (NULL);
 	}
 
 	samsg->sadb_msg_version = PF_KEY_V2;
 	samsg->sadb_msg_type = SADB_ACQUIRE;
+	samsg->sadb_msg_satype = satype;
 	samsg->sadb_msg_errno = 0;
 	samsg->sadb_msg_pid = 0;
 	samsg->sadb_msg_reserved = 0;
@@ -4539,7 +4966,7 @@
 
 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
 
-	if (acqrec->ipsacq_proto == check_proto) {
+	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
 		sport_typecode = dport_typecode = 0;
 	} else {
 		sport_typecode = acqrec->ipsacq_srcport;
@@ -4547,15 +4974,34 @@
 	}
 
 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
-	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto);
+	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
 
 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
-	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto);
+	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
+
+	if (tunnel_mode) {
+		sport_typecode = acqrec->ipsacq_srcport;
+		dport_typecode = acqrec->ipsacq_dstport;
+		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
+		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
+		    sport_typecode, acqrec->ipsacq_inner_proto,
+		    acqrec->ipsacq_innersrcpfx);
+		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
+		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
+		    dport_typecode, acqrec->ipsacq_inner_proto,
+		    acqrec->ipsacq_innerdstpfx);
+	}
+
+	/* XXX Insert identity information here. */
+
+	/* XXXMLS Insert sensitivity information here. */
 
 	if (cur != NULL)
-		samsg->sadb_msg_len = SADB_8TO64(cur - start);
-
-	return (cur);
+		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
+	else
+		mutex_exit(&alg_lock);
+
+	return (pfkeymp);
 }
 
 /*
@@ -4600,10 +5046,7 @@
 
 	ssa = (struct sockaddr_in *)(src + 1);
 	ssa6 = (struct sockaddr_in6 *)ssa;
-	if (dsa->sin_family != ssa->sin_family) {
-		*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
-		return ((ipsa_t *)-1);
-	}
+	ASSERT(dsa->sin_family == ssa->sin_family);
 
 	srcaddr = ALL_ZEROES_PTR;
 	af = dsa->sin_family;
@@ -4836,7 +5279,7 @@
  * running an authentication check on the sequence number passed in.
  * this takes into account packets that are below the replay window,
  * and collisions with already replayed packets.  Return B_TRUE if it
- * is okay to proceed, B_FALSE if this packet should be dropped immeidately.
+ * is okay to proceed, B_FALSE if this packet should be dropped immediately.
  * Assume same byte-ordering as sadb_replay_check.
  */
 boolean_t
@@ -4964,11 +5407,8 @@
  * Fills in a reference to the policy, if any, from the conn, in *ppp
  * Releases a reference to the passed conn_t.
  */
-
-/* ARGSUSED */
 static void
-ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp,
-    ipsec_action_t **app)
+ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
 {
 	ipsec_policy_t	*pp;
 	ipsec_latch_t	*ipl = connp->conn_latch;
@@ -4989,7 +5429,7 @@
  * Caller must release the reference.
  */
 static void
-ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ipsec_action_t **app)
+ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp)
 {
 	connf_t *connfp;
 	conn_t *connp = NULL;
@@ -5039,7 +5479,7 @@
 	CONN_INC_REF(connp);
 	mutex_exit(&connfp->connf_lock);
 
-	ipsec_conn_pol(sel, connp, ppp, app);
+	ipsec_conn_pol(sel, connp, ppp);
 }
 
 static conn_t *
@@ -5091,7 +5531,7 @@
 }
 
 static void
-ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ipsec_action_t **app)
+ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp)
 {
 	connf_t 	*connfp;
 	conn_t		*connp;
@@ -5151,12 +5591,11 @@
 			return;
 	}
 
-	ipsec_conn_pol(sel, connp, ppp, app);
+	ipsec_conn_pol(sel, connp, ppp);
 }
 
 static void
-ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
-    ipsec_action_t **app)
+ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp)
 {
 	conn_t		*connp;
 	uint32_t	ports;
@@ -5193,12 +5632,163 @@
 	}
 	if (connp == NULL)
 		return;
-	ipsec_conn_pol(sel, connp, ppp, app);
+	ipsec_conn_pol(sel, connp, ppp);
+}
+
+/*
+ * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
+ * Returns 0 or errno, and always sets *diagnostic to something appropriate
+ * to PF_KEY.
+ *
+ * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
+ * ignore prefix lengths in the address extension.  Since we match on first-
+ * entered policies, this shouldn't matter.  Also, since we normalize prefix-
+ * set addresses to mask out the lower bits, we should get a suitable search
+ * key for the SPD anyway.  This is the function to change if the assumption
+ * about suitable search keys is wrong.
+ */
+static int
+ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
+    sadb_address_t *dstext, int *diagnostic)
+{
+	struct sockaddr_in *src, *dst;
+	struct sockaddr_in6 *src6, *dst6;
+
+	*diagnostic = 0;
+
+	bzero(sel, sizeof (*sel));
+	sel->ips_protocol = srcext->sadb_address_proto;
+	dst = (struct sockaddr_in *)(dstext + 1);
+	if (dst->sin_family == AF_INET6) {
+		dst6 = (struct sockaddr_in6 *)dst;
+		src6 = (struct sockaddr_in6 *)(srcext + 1);
+		if (src6->sin6_family != AF_INET6) {
+			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
+			return (EINVAL);
+		}
+		sel->ips_remote_addr_v6 = dst6->sin6_addr;
+		sel->ips_local_addr_v6 = src6->sin6_addr;
+		if (sel->ips_protocol == IPPROTO_ICMPV6) {
+			sel->ips_is_icmp_inv_acq = 1;
+		} else {
+			sel->ips_remote_port = dst6->sin6_port;
+			sel->ips_local_port = src6->sin6_port;
+		}
+		sel->ips_isv4 = B_FALSE;
+	} else {
+		src = (struct sockaddr_in *)(srcext + 1);
+		if (src->sin_family != AF_INET) {
+			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
+			return (EINVAL);
+		}
+		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
+		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
+		if (sel->ips_protocol == IPPROTO_ICMP) {
+			sel->ips_is_icmp_inv_acq = 1;
+		} else {
+			sel->ips_remote_port = dst->sin_port;
+			sel->ips_local_port = src->sin_port;
+		}
+		sel->ips_isv4 = B_TRUE;
+	}
+	return (0);
+}
+
+/*
+ * We have encapsulation.
+ * - Lookup tun_t by address and look for an associated
+ *   tunnel policy
+ * - If there are inner selectors
+ *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
+ *   - Look up tunnel policy based on selectors
+ * - Else
+ *   - Sanity check the negotation
+ *   - If appropriate, fall through to global policy
+ */
+static int
+ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
+    sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
+    int *diagnostic)
+{
+	int err;
+	ipsec_policy_head_t *polhead;
+
+	/* Check for inner selectors and act appropriately */
+
+	if (innsrcext != NULL) {
+		/* Inner selectors present */
+		ASSERT(inndstext != NULL);
+		if ((itp == NULL) ||
+		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
+		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
+			/*
+			 * If inner packet selectors, we must have negotiate
+			 * tunnel and active policy.  If the tunnel has
+			 * transport-mode policy set on it, or has no policy,
+			 * fail.
+			 */
+			return (ENOENT);
+		} else {
+			/*
+			 * Reset "sel" to indicate inner selectors.  Pass
+			 * inner PF_KEY address extensions for this to happen.
+			 */
+			err = ipsec_get_inverse_acquire_sel(sel,
+			    innsrcext, inndstext, diagnostic);
+			if (err != 0) {
+				ITP_REFRELE(itp);
+				return (err);
+			}
+			/*
+			 * Now look for a tunnel policy based on those inner
+			 * selectors.  (Common code is below.)
+			 */
+		}
+	} else {
+		/* No inner selectors present */
+		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
+			/*
+			 * Transport mode negotiation with no tunnel policy
+			 * configured - return to indicate a global policy
+			 * check is needed.
+			 */
+			if (itp != NULL) {
+				ITP_REFRELE(itp);
+			}
+			return (0);
+		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
+			/* Tunnel mode set with no inner selectors. */
+			ITP_REFRELE(itp);
+			return (ENOENT);
+		}
+		/*
+		 * Else, this is a tunnel policy configured with ifconfig(1m)
+		 * or "negotiate transport" with ipsecconf(1m).  We have an
+		 * itp with policy set based on any match, so don't bother
+		 * changing fields in "sel".
+		 */
+	}
+
+	ASSERT(itp != NULL);
+	polhead = itp->itp_policy;
+	ASSERT(polhead != NULL);
+	rw_enter(&polhead->iph_lock, RW_READER);
+	*ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel);
+	rw_exit(&polhead->iph_lock);
+	ITP_REFRELE(itp);
+
+	/*
+	 * Don't default to global if we didn't find a matching policy entry.
+	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
+	 */
+	if (*ppp == NULL)
+		return (ENOENT);
+
+	return (0);
 }
 
 static void
-ipsec_oth_pol(ipsec_selector_t *sel,
-    ipsec_policy_t **ppp, ipsec_action_t **app)
+ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp)
 {
 	boolean_t	isv4 = sel->ips_isv4;
 	connf_t		*connfp;
@@ -5234,7 +5824,7 @@
 	CONN_INC_REF(connp);
 	mutex_exit(&connfp->connf_lock);
 
-	ipsec_conn_pol(sel, connp, ppp, app);
+	ipsec_conn_pol(sel, connp, ppp);
 }
 
 /*
@@ -5242,6 +5832,7 @@
  *
  * 1.) Current global policy.
  * 2.) An conn_t match depending on what all was passed in the extv[].
+ * 3.) A tunnel's policy head.
  * ...
  * N.) Other stuff TBD (e.g. identities)
  *
@@ -5256,77 +5847,143 @@
 	int err;
 	int diagnostic;
 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
-	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
-	struct sockaddr_in *src, *dst;
-	struct sockaddr_in6 *src6, *dst6;
-	ipsec_policy_t *pp;
-	ipsec_action_t *ap;
-	ipsec_selector_t sel;
+	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
+	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
+	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
+	struct sockaddr_in6 *src, *dst;
+	struct sockaddr_in6 *isrc, *idst;
+	ipsec_tun_pol_t *itp = NULL;
+	ipsec_policy_t *pp = NULL;
+	ipsec_selector_t sel, isel;
 	mblk_t *retmp;
 
-	bzero(&sel, sizeof (sel));
-	sel.ips_protocol = srcext->sadb_address_proto;
-	dst = (struct sockaddr_in *)(dstext + 1);
-	if (dst->sin_family == AF_INET6) {
-		dst6 = (struct sockaddr_in6 *)dst;
-		src6 = (struct sockaddr_in6 *)(srcext + 1);
-		if (src6->sin6_family != AF_INET6) {
-			diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
+	/* Normalize addresses */
+	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0) ==
+	    KS_IN_ADDR_UNKNOWN) {
+		err = EINVAL;
+		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
+		goto bail;
+	}
+	src = (struct sockaddr_in6 *)(srcext + 1);
+	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0) ==
+	    KS_IN_ADDR_UNKNOWN) {
+		err = EINVAL;
+		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
+		goto bail;
+	}
+	dst = (struct sockaddr_in6 *)(dstext + 1);
+	if (src->sin6_family != dst->sin6_family) {
+		err = EINVAL;
+		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
+		goto bail;
+	}
+
+	/* Check for tunnel mode and act appropriately */
+	if (innsrcext != NULL) {
+		if (inndstext == NULL) {
 			err = EINVAL;
+			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
 			goto bail;
 		}
-		sel.ips_remote_addr_v6 = dst6->sin6_addr;
-		sel.ips_local_addr_v6 = src6->sin6_addr;
-		if (sel.ips_protocol == IPPROTO_ICMPV6) {
-			sel.ips_is_icmp_inv_acq = 1;
-		} else {
-			sel.ips_remote_port = dst6->sin6_port;
-			sel.ips_local_port = src6->sin6_port;
+		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
+			(sadb_ext_t *)innsrcext, 0) == KS_IN_ADDR_UNKNOWN) {
+			err = EINVAL;
+			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
+			goto bail;
 		}
-		sel.ips_isv4 = B_FALSE;
-	} else {
-		src = (struct sockaddr_in *)(srcext + 1);
-		if (src->sin_family != AF_INET) {
-			diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
+		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
+		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
+			(sadb_ext_t *)inndstext, 0) == KS_IN_ADDR_UNKNOWN) {
 			err = EINVAL;
+			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
+			goto bail;
+		}
+		idst = (struct sockaddr_in6 *)(inndstext + 1);
+		if (isrc->sin6_family != idst->sin6_family) {
+			err = EINVAL;
+			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
 			goto bail;
 		}
-		sel.ips_remote_addr_v4 = dst->sin_addr.s_addr;
-		sel.ips_local_addr_v4 = src->sin_addr.s_addr;
-		if (sel.ips_protocol == IPPROTO_ICMP) {
-			sel.ips_is_icmp_inv_acq = 1;
-		} else {
-			sel.ips_remote_port = dst->sin_port;
-			sel.ips_local_port = src->sin_port;
+		if (isrc->sin6_family != AF_INET &&
+		    isrc->sin6_family != AF_INET6) {
+			err = EINVAL;
+			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
+			goto bail;
 		}
-		sel.ips_isv4 = B_TRUE;
+	} else if (inndstext != NULL) {
+			err = EINVAL;
+			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
+			goto bail;
+	}
+
+	/* Get selectors first, based on outer addresses */
+	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
+	if (err != 0)
+		goto bail;
+
+	/* Check for tunnel mode mismatches. */
+	if (innsrcext != NULL &&
+	    ((isrc->sin6_family == AF_INET &&
+		sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
+		(isrc->sin6_family == AF_INET6 &&
+		    sel.ips_protocol != IPPROTO_IPV6 &&
+		    sel.ips_protocol != 0))) {
+		err = EPROTOTYPE;
+		goto bail;
 	}
 
 	/*
 	 * Okay, we have the addresses and other selector information.
 	 * Let's first find a conn...
 	 */
-	pp = NULL; ap = NULL;
+	pp = NULL;
 	switch (sel.ips_protocol) {
 	case IPPROTO_TCP:
-		ipsec_tcp_pol(&sel, &pp, &ap);
+		ipsec_tcp_pol(&sel, &pp);
 		break;
 	case IPPROTO_UDP:
-		ipsec_udp_pol(&sel, &pp, &ap);
+		ipsec_udp_pol(&sel, &pp);
 		break;
 	case IPPROTO_SCTP:
-		ipsec_sctp_pol(&sel, &pp, &ap);
+		ipsec_sctp_pol(&sel, &pp);
+		break;
+	case IPPROTO_ENCAP:
+	case IPPROTO_IPV6:
+		rw_enter(&itp_get_byaddr_rw_lock, RW_READER);
+		/*
+		 * Assume sel.ips_remote_addr_* has the right address at
+		 * that exact position.
+		 */
+		itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
+		    (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family);
+		rw_exit(&itp_get_byaddr_rw_lock);
+		if (innsrcext == NULL) {
+			/*
+			 * Transport-mode tunnel, make sure we fake out isel
+			 * to contain something based on the outer protocol.
+			 */
+			bzero(&isel, sizeof (isel));
+			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
+		} /* Else isel is initialized by ipsec_tun_pol(). */
+		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
+		    &diagnostic);
+		/*
+		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
+		 * may be.
+		 */
+		if (err != 0)
+			goto bail;
 		break;
 	default:
-		ipsec_oth_pol(&sel, &pp, &ap);
+		ipsec_oth_pol(&sel, &pp);
 		break;
 	}
 
 	/*
-	 * If we didn't find a matching conn_t, take a look in the global
-	 * policy.
+	 * If we didn't find a matching conn_t or other policy head, take a
+	 * look in the global policy.
 	 */
-	if ((pp == NULL) && (ap == NULL)) {
+	if (pp == NULL) {
 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel);
 		if (pp == NULL) {
 			/* There's no global policy. */
@@ -5341,24 +5998,22 @@
 	 * message based on that, fix fields where appropriate,
 	 * and return the message.
 	 */
-	retmp = sadb_extended_acquire(&sel, pp, ap, samsg->sadb_msg_seq,
-	    samsg->sadb_msg_pid);
+	retmp = sadb_extended_acquire(&sel, pp, NULL,
+	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
+	    samsg->sadb_msg_seq, samsg->sadb_msg_pid);
 	if (pp != NULL) {
 		IPPOL_REFRELE(pp);
 	}
-	if (ap != NULL) {
-		IPACT_REFRELE(ap);
-	}
 	if (retmp != NULL) {
 		return (retmp);
 	} else {
 		err = ENOMEM;
 		diagnostic = 0;
-	bail:
-		samsg->sadb_msg_errno = (uint8_t)err;
-		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
-		return (NULL);
-	}
+	}
+bail:
+	samsg->sadb_msg_errno = (uint8_t)err;
+	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
+	return (NULL);
 }
 
 /*
--- a/usr/src/uts/common/inet/ip/spd.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/spd.c	Fri Nov 03 07:10:24 2006 -0800
@@ -44,6 +44,7 @@
 #include <sys/systm.h>
 #include <sys/param.h>
 #include <sys/kmem.h>
+#include <sys/ddi.h>
 
 #include <sys/crypto/api.h>
 
@@ -66,31 +67,47 @@
 #include <inet/ipsecesp.h>
 #include <inet/ipdrop.h>
 #include <inet/ipclassifier.h>
+#include <inet/tun.h>
 
 static void ipsec_update_present_flags();
 static ipsec_act_t *ipsec_act_wildcard_expand(ipsec_act_t *, uint_t *);
 static void ipsec_out_free(void *);
 static void ipsec_in_free(void *);
-static boolean_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *,
-    ipha_t *, ip6_t *);
 static mblk_t *ipsec_attach_global_policy(mblk_t *, conn_t *,
     ipsec_selector_t *);
 static mblk_t *ipsec_apply_global_policy(mblk_t *, conn_t *,
     ipsec_selector_t *);
 static mblk_t *ipsec_check_ipsecin_policy(queue_t *, mblk_t *,
-    ipsec_policy_t *, ipha_t *, ip6_t *);
+    ipsec_policy_t *, ipha_t *, ip6_t *, uint64_t);
 static void ipsec_in_release_refs(ipsec_in_t *);
 static void ipsec_out_release_refs(ipsec_out_t *);
 static void ipsec_action_reclaim(void *);
 static void ipsid_init(void);
 static void ipsid_fini(void);
+
+/* sel_flags values for ipsec_init_inbound_sel(). */
+#define	SEL_NONE	0x0000
+#define	SEL_PORT_POLICY	0x0001
+#define	SEL_IS_ICMP	0x0002
+#define	SEL_TUNNEL_MODE	0x0004
+
+/* Return values for ipsec_init_inbound_sel(). */
+typedef enum { SELRET_NOMEM, SELRET_BADPKT, SELRET_SUCCESS, SELRET_TUNFRAG}
+    selret_t;
+
+static selret_t ipsec_init_inbound_sel(ipsec_selector_t *, mblk_t *,
+    ipha_t *, ip6_t *, uint8_t);
+
 static boolean_t ipsec_check_ipsecin_action(struct ipsec_in_s *, mblk_t *,
     struct ipsec_action_s *, ipha_t *ipha, ip6_t *ip6h, const char **,
     kstat_named_t **);
-static int32_t ipsec_act_ovhd(const ipsec_act_t *act);
 static void ipsec_unregister_prov_update(void);
 static boolean_t ipsec_compare_action(ipsec_policy_t *, ipsec_policy_t *);
-static uint32_t selector_hash(ipsec_selector_t *);
+static uint32_t selector_hash(ipsec_selector_t *, ipsec_policy_root_t *);
+static int tunnel_compare(const void *, const void *);
+static void ipsec_freemsg_chain(mblk_t *);
+static void ip_drop_packet_chain(mblk_t *, boolean_t, ill_t *, ire_t *,
+    struct kstat_named *, ipdropper_t *);
 
 /*
  * Policy rule index generator.  We assume this won't wrap in the
@@ -108,8 +125,15 @@
 static ipsec_policy_head_t system_policy;
 static ipsec_policy_head_t inactive_policy;
 
+/*
+ * Tunnel policies - AVL tree indexed by tunnel name.
+ */
+krwlock_t tunnel_policy_lock;
+uint64_t tunnel_policy_gen;	/* To keep track of updates w/o searches. */
+avl_tree_t tunnel_policies;
+
 /* Packet dropper for generic SPD drops. */
-static ipdropper_t spd_dropper;
+ipdropper_t spd_dropper;
 
 /*
  * For now, use a trivially sized hash table for actions.
@@ -126,6 +150,11 @@
 #define	IPSEC_SPDHASH_DEFAULT 251
 uint32_t ipsec_spd_hashsize = 0;
 
+/* SPD hash-size tunable per tunnel. */
+#define	TUN_SPDHASH_DEFAULT 5
+uint32_t tun_spd_hashsize;
+
+
 #define	IPSEC_SEL_NOHASH ((uint32_t)(~0))
 
 static HASH_HEAD(ipsec_action_s) ipsec_action_hash[IPSEC_ACTION_HASH_SIZE];
@@ -141,12 +170,22 @@
 boolean_t ipsec_inbound_v6_policy_present = B_FALSE;
 boolean_t ipsec_outbound_v6_policy_present = B_FALSE;
 
+/* Frag cache prototypes */
+static void ipsec_fragcache_clean(ipsec_fragcache_t *);
+static ipsec_fragcache_entry_t *fragcache_delentry(int,
+    ipsec_fragcache_entry_t *, ipsec_fragcache_t *);
+boolean_t ipsec_fragcache_init(ipsec_fragcache_t *);
+void ipsec_fragcache_uninit(ipsec_fragcache_t *);
+mblk_t *ipsec_fragcache_add(ipsec_fragcache_t *, mblk_t *, mblk_t *, int);
+
 /*
  * Because policy needs to know what algorithms are supported, keep the
  * lists of algorithms here.
  */
 
 kmutex_t alg_lock;
+krwlock_t itp_get_byaddr_rw_lock;
+ipsec_tun_pol_t *(*itp_get_byaddr)(uint32_t *, uint32_t *, int);
 uint8_t ipsec_nalgs[IPSEC_NALGTYPES];
 ipsec_alginfo_t *ipsec_alglists[IPSEC_NALGTYPES][IPSEC_MAX_ALGS];
 uint8_t ipsec_sortlist[IPSEC_NALGTYPES][IPSEC_MAX_ALGS];
@@ -168,10 +207,17 @@
 	(((sa1)->ipsa_src_cid == (sa2)->ipsa_src_cid) &&		\
 	    (((sa1)->ipsa_dst_cid == (sa2)->ipsa_dst_cid))))
 
-#define	IPPOL_UNCHAIN(php, ip) 						\
-	HASHLIST_UNCHAIN((ip), ipsp_hash);				\
-	avl_remove(&(php)->iph_rulebyid, (ip));				\
-	IPPOL_REFRELE(ip);
+/*
+ * IPv4 Fragments
+ */
+#define	IS_V4_FRAGMENT(ipha_fragment_offset_and_flags)			\
+	(((ntohs(ipha_fragment_offset_and_flags) & IPH_OFFSET) != 0) ||	\
+	((ntohs(ipha_fragment_offset_and_flags) & IPH_MF) != 0))
+
+/*
+ * IPv6 Fragments
+ */
+#define	IS_V6_FRAGMENT(ipp)	(ipp.ipp_fields & IPPF_FRAGHDR)
 
 /*
  * Policy failure messages.
@@ -227,6 +273,37 @@
  *	entries..
  */
 
+/* Convenient functions for freeing or dropping a b_next linked mblk chain */
+
+/* Free all messages in an mblk chain */
+static void
+ipsec_freemsg_chain(mblk_t *mp)
+{
+	mblk_t *mpnext;
+	while (mp != NULL) {
+		ASSERT(mp->b_prev == NULL);
+		mpnext = mp->b_next;
+		mp->b_next = NULL;
+		freemsg(mp);	/* Always works, even if NULL */
+		mp = mpnext;
+	}
+}
+
+/* ip_drop all messages in an mblk chain */
+static void
+ip_drop_packet_chain(mblk_t *mp, boolean_t inbound, ill_t *arriving,
+    ire_t *outbound_ire, struct kstat_named *counter, ipdropper_t *who_called)
+{
+	mblk_t *mpnext;
+	while (mp != NULL) {
+		ASSERT(mp->b_prev == NULL);
+		mpnext = mp->b_next;
+		mp->b_next = NULL;
+		ip_drop_packet(mp, inbound, arriving, outbound_ire, counter,
+		    who_called);
+		mp = mpnext;
+	}
+}
 
 /*
  * AVL tree comparison function.
@@ -281,12 +358,10 @@
 	return (0);
 }
 
-static void
+void
 ipsec_polhead_free_table(ipsec_policy_head_t *iph)
 {
-	int dir, nchains;
-
-	nchains = ipsec_spd_hashsize;
+	int dir;
 
 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
 		ipsec_policy_root_t *ipr = &iph->iph_root[dir];
@@ -294,12 +369,12 @@
 		if (ipr->ipr_hash == NULL)
 			continue;
 
-		kmem_free(ipr->ipr_hash, nchains *
+		kmem_free(ipr->ipr_hash, ipr->ipr_nchains *
 		    sizeof (ipsec_policy_hash_t));
 	}
 }
 
-static void
+void
 ipsec_polhead_destroy(ipsec_policy_head_t *iph)
 {
 	int dir;
@@ -309,10 +384,9 @@
 
 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
 		ipsec_policy_root_t *ipr = &iph->iph_root[dir];
-		int nchains = ipr->ipr_nchains;
 		int chain;
 
-		for (chain = 0; chain < nchains; chain++)
+		for (chain = 0; chain < ipr->ipr_nchains; chain++)
 			mutex_destroy(&(ipr->ipr_hash[chain].hash_lock));
 
 	}
@@ -326,10 +400,27 @@
 ipsec_policy_destroy(void)
 {
 	int i;
+	void *cookie;
+	ipsec_tun_pol_t *node;
 
 	ip_drop_unregister(&spd_dropper);
 	ip_drop_destroy();
 
+	rw_enter(&tunnel_policy_lock, RW_WRITER);
+	/*
+	 * It's possible we can just ASSERT() the tree is empty.  After all,
+	 * we aren't called until IP is ready to unload (and presumably all
+	 * tunnels have been unplumbed).  But we'll play it safe for now, the
+	 * loop will just exit immediately if it's empty.
+	 */
+	cookie = NULL;
+	while ((node = (ipsec_tun_pol_t *)
+		    avl_destroy_nodes(&tunnel_policies, &cookie)) != NULL) {
+		ITP_REFRELE(node);
+	}
+	avl_destroy(&tunnel_policies);
+	rw_exit(&tunnel_policy_lock);
+	rw_destroy(&tunnel_policy_lock);
 	ipsec_polhead_destroy(&system_policy);
 	ipsec_polhead_destroy(&inactive_policy);
 
@@ -373,20 +464,21 @@
  * Attempt to allocate the tables in a single policy head.
  * Return nonzero on failure after cleaning up any work in progress.
  */
-static int
-ipsec_alloc_table(ipsec_policy_head_t *iph, int kmflag)
+int
+ipsec_alloc_table(ipsec_policy_head_t *iph, int nchains, int kmflag,
+    boolean_t global_cleanup)
 {
-	int dir, nchains;
-
-	nchains = ipsec_spd_hashsize;
+	int dir;
 
 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
 		ipsec_policy_root_t *ipr = &iph->iph_root[dir];
 
+		ipr->ipr_nchains = nchains;
 		ipr->ipr_hash = kmem_zalloc(nchains *
 		    sizeof (ipsec_policy_hash_t), kmflag);
 		if (ipr->ipr_hash == NULL)
-			return (ipsec_alloc_tables_failed());
+			return (global_cleanup ? ipsec_alloc_tables_failed() :
+			    ENOMEM);
 	}
 	return (0);
 }
@@ -400,11 +492,13 @@
 {
 	int error;
 
-	error = ipsec_alloc_table(&system_policy, kmflag);
+	error = ipsec_alloc_table(&system_policy, ipsec_spd_hashsize, kmflag,
+	    B_TRUE);
 	if (error != 0)
 		return (error);
 
-	error = ipsec_alloc_table(&inactive_policy, kmflag);
+	error = ipsec_alloc_table(&inactive_policy, ipsec_spd_hashsize, kmflag,
+	    B_TRUE);
 	if (error != 0)
 		return (error);
 
@@ -420,12 +514,10 @@
 /*
  * After table allocation, initialize a policy head.
  */
-static void
-ipsec_polhead_init(ipsec_policy_head_t *iph)
+void
+ipsec_polhead_init(ipsec_policy_head_t *iph, int nchains)
 {
-	int dir, chain, nchains;
-
-	nchains = ipsec_spd_hashsize;
+	int dir, chain;
 
 	rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL);
 	avl_create(&iph->iph_rulebyid, ipsec_policy_cmpbyid,
@@ -468,9 +560,22 @@
 		(void) ipsec_alloc_tables(KM_SLEEP);
 	}
 
+	/* Just set a default for tunnels. */
+	if (tun_spd_hashsize == 0)
+		tun_spd_hashsize = TUN_SPDHASH_DEFAULT;
+
 	ipsid_init();
-	ipsec_polhead_init(&system_policy);
-	ipsec_polhead_init(&inactive_policy);
+	/*
+	 * Globals need ref == 1 to prevent IPPH_REFRELE() from attempting
+	 * to free them.
+	 */
+	system_policy.iph_refs = 1;
+	inactive_policy.iph_refs = 1;
+	ipsec_polhead_init(&system_policy, ipsec_spd_hashsize);
+	ipsec_polhead_init(&inactive_policy, ipsec_spd_hashsize);
+	rw_init(&tunnel_policy_lock, NULL, RW_DEFAULT, NULL);
+	avl_create(&tunnel_policies, tunnel_compare, sizeof (ipsec_tun_pol_t),
+	    0);
 
 	for (i = 0; i < IPSEC_ACTION_HASH_SIZE; i++)
 		mutex_init(&(ipsec_action_hash[i].hash_lock),
@@ -500,6 +605,12 @@
 
 	ip_drop_init();
 	ip_drop_register(&spd_dropper, "IPsec SPD");
+
+	/* Set function to dummy until tun is loaded */
+	rw_init(&itp_get_byaddr_rw_lock, NULL, RW_DEFAULT, NULL);
+	rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER);
+	itp_get_byaddr = itp_get_byaddr_dummy;
+	rw_exit(&itp_get_byaddr_rw_lock);
 }
 
 /*
@@ -628,52 +739,59 @@
  * pointers.
  */
 void
-ipsec_swap_policy(void)
+ipsec_swap_policy(ipsec_policy_head_t *active, ipsec_policy_head_t *inactive)
 {
 	int af, dir;
 	avl_tree_t r1, r2;
 
-	rw_enter(&inactive_policy.iph_lock, RW_WRITER);
-	rw_enter(&system_policy.iph_lock, RW_WRITER);
-
-	r1 = system_policy.iph_rulebyid;
-	r2 = inactive_policy.iph_rulebyid;
-	system_policy.iph_rulebyid = r2;
-	inactive_policy.iph_rulebyid = r1;
+	rw_enter(&inactive->iph_lock, RW_WRITER);
+	rw_enter(&active->iph_lock, RW_WRITER);
+
+	r1 = active->iph_rulebyid;
+	r2 = inactive->iph_rulebyid;
+	active->iph_rulebyid = r2;
+	inactive->iph_rulebyid = r1;
 
 	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
 		ipsec_policy_hash_t *h1, *h2;
 
-		h1 = system_policy.iph_root[dir].ipr_hash;
-		h2 = inactive_policy.iph_root[dir].ipr_hash;
-		system_policy.iph_root[dir].ipr_hash = h2;
-		inactive_policy.iph_root[dir].ipr_hash = h1;
+		h1 = active->iph_root[dir].ipr_hash;
+		h2 = inactive->iph_root[dir].ipr_hash;
+		active->iph_root[dir].ipr_hash = h2;
+		inactive->iph_root[dir].ipr_hash = h1;
 
 		for (af = 0; af < IPSEC_NAF; af++) {
 			ipsec_policy_t *t1, *t2;
 
-			t1 = system_policy.iph_root[dir].ipr_nonhash[af];
-			t2 = inactive_policy.iph_root[dir].ipr_nonhash[af];
-			system_policy.iph_root[dir].ipr_nonhash[af] = t2;
-			inactive_policy.iph_root[dir].ipr_nonhash[af] = t1;
+			t1 = active->iph_root[dir].ipr_nonhash[af];
+			t2 = inactive->iph_root[dir].ipr_nonhash[af];
+			active->iph_root[dir].ipr_nonhash[af] = t2;
+			inactive->iph_root[dir].ipr_nonhash[af] = t1;
 			if (t1 != NULL) {
 				t1->ipsp_hash.hash_pp =
-				    &(inactive_policy.iph_root[dir].
-				    ipr_nonhash[af]);
+				    &(inactive->iph_root[dir].ipr_nonhash[af]);
 			}
 			if (t2 != NULL) {
 				t2->ipsp_hash.hash_pp =
-				    &(system_policy.iph_root[dir].
-				    ipr_nonhash[af]);
+				    &(active->iph_root[dir].ipr_nonhash[af]);
 			}
 
 		}
 	}
-	system_policy.iph_gen++;
-	inactive_policy.iph_gen++;
+	active->iph_gen++;
+	inactive->iph_gen++;
 	ipsec_update_present_flags();
-	rw_exit(&system_policy.iph_lock);
-	rw_exit(&inactive_policy.iph_lock);
+	rw_exit(&active->iph_lock);
+	rw_exit(&inactive->iph_lock);
+}
+
+/*
+ * Swap global policy primary/secondary.
+ */
+void
+ipsec_swap_global_policy(void)
+{
+	ipsec_swap_policy(&system_policy, &inactive_policy);
 }
 
 /*
@@ -739,7 +857,7 @@
  * the source policy head. Note that we only need to read-lock the source
  * policy head as we are not changing it.
  */
-static int
+int
 ipsec_copy_polhead(ipsec_policy_head_t *sph, ipsec_policy_head_t *dph)
 {
 	int af, dir, chain, nchains;
@@ -793,6 +911,40 @@
 	return (ipsec_copy_polhead(&system_policy, &inactive_policy));
 }
 
+/*
+ * Generic "do we have IPvN policy" answer.
+ */
+boolean_t
+iph_ipvN(ipsec_policy_head_t *iph, boolean_t v6)
+{
+	int i, hval;
+	uint32_t valbit;
+	ipsec_policy_root_t *ipr;
+	ipsec_policy_t *ipp;
+
+	if (v6) {
+		valbit = IPSL_IPV6;
+		hval = IPSEC_AF_V6;
+	} else {
+		valbit = IPSL_IPV4;
+		hval = IPSEC_AF_V4;
+	}
+
+	ASSERT(RW_LOCK_HELD(&iph->iph_lock));
+	for (ipr = iph->iph_root; ipr < &(iph->iph_root[IPSEC_NTYPES]); ipr++) {
+		if (ipr->ipr_nonhash[hval] != NULL)
+			return (B_TRUE);
+		for (i = 0; i < ipr->ipr_nchains; i++) {
+			for (ipp = ipr->ipr_hash[i].hash_head; ipp != NULL;
+			    ipp = ipp->ipsp_hash.hash_next) {
+				if (ipp->ipsp_sel->ipsl_key.ipsl_valid & valbit)
+					return (B_TRUE);
+			}
+		}
+	}
+
+	return (B_FALSE);
+}
 
 /*
  * Extract the string from ipsec_policy_failure_msgs[type] and
@@ -893,12 +1045,14 @@
 			*minbits = algp->alg_default_bits;
 			ASSERT(*minbits >= algp->alg_minbits);
 		} else {
-			*minbits = MAX(*minbits, algp->alg_minbits);
+			*minbits = MAX(MIN(*minbits, algp->alg_maxbits),
+			    algp->alg_minbits);
 		}
 		if (*maxbits == 0)
 			*maxbits = algp->alg_maxbits;
 		else
-			*maxbits = MIN(*maxbits, algp->alg_maxbits);
+			*maxbits = MIN(MAX(*maxbits, algp->alg_minbits),
+			    algp->alg_maxbits);
 		ASSERT(*minbits <= *maxbits);
 	} else {
 		*minbits = 0;
@@ -1190,7 +1344,7 @@
  * Convert a new-style action back to an ipsec_req_t (more backwards compat).
  * We assume caller has already zero'ed *req for us.
  */
-static int
+int
 ipsec_req_from_head(ipsec_policy_head_t *ph, ipsec_req_t *req, int af)
 {
 	ipsec_policy_t *p;
@@ -1201,7 +1355,7 @@
 	for (p = ph->iph_root[IPSEC_INBOUND].ipr_nonhash[af];
 	    p != NULL;
 	    p = p->ipsp_hash.hash_next) {
-		if ((p->ipsp_sel->ipsl_key.ipsl_valid&IPSL_WILDCARD) == 0)
+		if ((p->ipsp_sel->ipsl_key.ipsl_valid & IPSL_WILDCARD) == 0)
 			return (ipsec_req_from_act(p->ipsp_act, req));
 	}
 	return (sizeof (*req));
@@ -1325,14 +1479,12 @@
  * expected by the SAs it traversed on the way in.
  */
 static boolean_t
-ipsec_check_ipsecin_unique(ipsec_in_t *ii, mblk_t *mp,
-    ipha_t *ipha, ip6_t *ip6h,
-    const char **reason, kstat_named_t **counter)
+ipsec_check_ipsecin_unique(ipsec_in_t *ii, const char **reason,
+    kstat_named_t **counter, uint64_t pkt_unique)
 {
-	uint64_t pkt_unique, ah_mask, esp_mask;
+	uint64_t ah_mask, esp_mask;
 	ipsa_t *ah_assoc;
 	ipsa_t *esp_assoc;
-	ipsec_selector_t sel;
 
 	ASSERT(ii->ipsec_in_secure);
 	ASSERT(!ii->ipsec_in_loopback);
@@ -1347,32 +1499,23 @@
 	if ((ah_mask == 0) && (esp_mask == 0))
 		return (B_TRUE);
 
-	if (!ipsec_init_inbound_sel(&sel, mp, ipha, ip6h)) {
-		/*
-		 * Technically not a policy mismatch, but it is
-		 * an internal failure.
-		 */
-		*reason = "ipsec_init_inbound_sel";
-		*counter = &ipdrops_spd_nomem;
+	/*
+	 * The pkt_unique check will also check for tunnel mode on the SA
+	 * vs. the tunneled_packet boolean.  "Be liberal in what you receive"
+	 * should not apply in this case.  ;)
+	 */
+
+	if (ah_mask != 0 &&
+	    ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) {
+		*reason = "AH inner header mismatch";
+		*counter = &ipdrops_spd_ah_innermismatch;
 		return (B_FALSE);
 	}
-
-	pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port,
-	    sel.ips_protocol);
-
-	if (ah_mask != 0) {
-		if (ah_assoc->ipsa_unique_id != (pkt_unique & ah_mask)) {
-			*reason = "AH inner header mismatch";
-			*counter = &ipdrops_spd_ah_innermismatch;
-			return (B_FALSE);
-		}
-	}
-	if (esp_mask != 0) {
-		if (esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) {
-			*reason = "ESP inner header mismatch";
-			*counter = &ipdrops_spd_esp_innermismatch;
-			return (B_FALSE);
-		}
+	if (esp_mask != 0 &&
+	    esp_assoc->ipsa_unique_id != (pkt_unique & esp_mask)) {
+		*reason = "ESP inner header mismatch";
+		*counter = &ipdrops_spd_esp_innermismatch;
+		return (B_FALSE);
 	}
 	return (B_TRUE);
 }
@@ -1555,12 +1698,59 @@
 }
 
 /*
+ * Takes a latched conn and an inbound packet and returns a unique_id suitable
+ * for SA comparisons.  Most of the time we will copy from the conn_t, but
+ * there are cases when the conn_t is latched but it has wildcard selectors,
+ * and then we need to fallback to scooping them out of the packet.
+ *
+ * Assume we'll never have 0 with a conn_t present, so use 0 as a failure.  We
+ * can get away with this because we only have non-zero ports/proto for
+ * latched conn_ts.
+ *
+ * Ideal candidate for an "inline" keyword, as we're JUST convoluted enough
+ * to not be a nice macro.
+ */
+static uint64_t
+conn_to_unique(conn_t *connp, mblk_t *data_mp, ipha_t *ipha, ip6_t *ip6h)
+{
+	ipsec_selector_t sel;
+	uint8_t ulp = connp->conn_ulp;
+
+	ASSERT(connp->conn_latch->ipl_in_policy != NULL);
+
+	if ((ulp == IPPROTO_TCP || ulp == IPPROTO_UDP || ulp == IPPROTO_SCTP) &&
+	    (connp->conn_fport == 0 || connp->conn_lport == 0)) {
+		/* Slow path - we gotta grab from the packet. */
+		if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h,
+			SEL_NONE) != SELRET_SUCCESS) {
+			/* Failure -> have caller free packet with ENOMEM. */
+			return (0);
+		}
+		return (SA_UNIQUE_ID(sel.ips_remote_port, sel.ips_local_port,
+			    sel.ips_protocol, 0));
+	}
+
+#ifdef DEBUG_NOT_UNTIL_6478464
+	if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h, SEL_NONE) ==
+	    SELRET_SUCCESS) {
+		ASSERT(sel.ips_local_port == connp->conn_lport);
+		ASSERT(sel.ips_remote_port == connp->conn_fport);
+		ASSERT(sel.ips_protocol == connp->conn_ulp);
+	}
+	ASSERT(connp->conn_ulp != 0);
+#endif
+
+	return (SA_UNIQUE_ID(connp->conn_fport, connp->conn_lport, ulp, 0));
+}
+
+/*
  * Called to check policy on a latched connection, both from this file
  * and from tcp.c
  */
 boolean_t
 ipsec_check_ipsecin_latch(ipsec_in_t *ii, mblk_t *mp, ipsec_latch_t *ipl,
-    ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter)
+    ipha_t *ipha, ip6_t *ip6h, const char **reason, kstat_named_t **counter,
+    conn_t *connp)
 {
 	ASSERT(ipl->ipl_ids_latched == B_TRUE);
 
@@ -1584,8 +1774,13 @@
 			return (B_FALSE);
 		}
 
-		if (!ipsec_check_ipsecin_unique(ii, mp, ipha, ip6h, reason,
-		    counter)) {
+		/*
+		 * Can fudge pkt_unique from connp because we're latched.
+		 * In DEBUG kernels (see conn_to_unique()'s implementation),
+		 * verify this even if it REALLY slows things down.
+		 */
+		if (!ipsec_check_ipsecin_unique(ii, reason, counter,
+			conn_to_unique(connp, mp, ipha, ip6h))) {
 			return (B_FALSE);
 		}
 	}
@@ -1604,7 +1799,7 @@
  */
 static mblk_t *
 ipsec_check_ipsecin_policy(queue_t *q, mblk_t *first_mp, ipsec_policy_t *ipsp,
-    ipha_t *ipha, ip6_t *ip6h)
+    ipha_t *ipha, ip6_t *ip6h, uint64_t pkt_unique)
 {
 	ipsec_in_t *ii;
 	ipsec_action_t *ap;
@@ -1643,8 +1838,7 @@
 		goto drop;
 	}
 
-	if (!ipsec_check_ipsecin_unique(ii, data_mp, ipha, ip6h,
-	    &reason, &counter))
+	if (!ipsec_check_ipsecin_unique(ii, &reason, &counter, pkt_unique))
 		goto drop;
 
 	/*
@@ -1678,7 +1872,7 @@
  * sleazy prefix-length-based compare.
  * another inlining candidate..
  */
-static boolean_t
+boolean_t
 ip_addr_match(uint8_t *addr1, int pfxlen, in6_addr_t *addr2p)
 {
 	int offset = pfxlen>>3;
@@ -1774,10 +1968,9 @@
  * is not the original "best", we need to release that reference
  * before returning.
  */
-static ipsec_policy_t *
-ipsec_find_policy_head(ipsec_policy_t *best,
-    ipsec_policy_head_t *head, int direction, ipsec_selector_t *sel,
-    int selhash)
+ipsec_policy_t *
+ipsec_find_policy_head(ipsec_policy_t *best, ipsec_policy_head_t *head,
+    int direction, ipsec_selector_t *sel)
 {
 	ipsec_policy_t *curbest;
 	ipsec_policy_root_t *root;
@@ -1807,7 +2000,8 @@
 
 	if (root->ipr_nchains > 0) {
 		curbest = ipsec_find_policy_chain(curbest,
-		    root->ipr_hash[selhash].hash_head, sel, is_icmp_inv_acq);
+		    root->ipr_hash[selector_hash(sel, root)].hash_head, sel,
+		    is_icmp_inv_acq);
 	}
 	curbest = ipsec_find_policy_chain(curbest, root->ipr_nonhash[af], sel,
 	    is_icmp_inv_acq);
@@ -1842,16 +2036,14 @@
     ipsec_selector_t *sel)
 {
 	ipsec_policy_t *p;
-	int selhash = selector_hash(sel);
-
-	p = ipsec_find_policy_head(NULL, &system_policy, direction, sel,
-	    selhash);
+
+	p = ipsec_find_policy_head(NULL, &system_policy, direction, sel);
 	if ((connp != NULL) && (connp->conn_policy != NULL)) {
 		p = ipsec_find_policy_head(p, connp->conn_policy,
-		    direction, sel, selhash);
+		    direction, sel);
 	} else if ((io != NULL) && (io->ipsec_out_polhead != NULL)) {
 		p = ipsec_find_policy_head(p, io->ipsec_out_polhead,
-		    direction, sel, selhash);
+		    direction, sel);
 	}
 
 	return (p);
@@ -1881,6 +2073,7 @@
 	boolean_t policy_present;
 	kstat_named_t *counter;
 	ipsec_in_t *ii = NULL;
+	uint64_t pkt_unique;
 
 	data_mp = mctl_present ? first_mp->b_cont : first_mp;
 	ipsec_mp = mctl_present ? first_mp : NULL;
@@ -1921,9 +2114,14 @@
 		if (p != NULL) {
 			IPPOL_REFHOLD(p);
 		}
+		/*
+		 * Fudge sel for UNIQUE_ID setting below.
+		 */
+		pkt_unique = conn_to_unique(connp, data_mp, ipha, ip6h);
 	} else {
 		/* Initialize the ports in the selector */
-		if (!ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h)) {
+		if (ipsec_init_inbound_sel(&sel, data_mp, ipha, ip6h,
+			SEL_NONE) == SELRET_NOMEM) {
 			/*
 			 * Technically not a policy mismatch, but it is
 			 * an internal failure.
@@ -1946,6 +2144,8 @@
 		 */
 
 		p = ipsec_find_policy(IPSEC_TYPE_INBOUND, connp, NULL, &sel);
+		pkt_unique = SA_UNIQUE_ID(sel.ips_remote_port,
+		    sel.ips_local_port, sel.ips_protocol, 0);
 	}
 
 	if (p == NULL) {
@@ -1964,7 +2164,8 @@
 		}
 	}
 	if ((ii != NULL) && (ii->ipsec_in_secure))
-		return (ipsec_check_ipsecin_policy(q, ipsec_mp, p, ipha, ip6h));
+		return (ipsec_check_ipsecin_policy(q, ipsec_mp, p, ipha, ip6h,
+			    pkt_unique));
 	if (p->ipsp_act->ipa_allow_clear) {
 		BUMP_MIB(&ip_mib, ipsecInSucceeded);
 		IPPOL_REFRELE(p);
@@ -2054,8 +2255,13 @@
 		/*
 		 * If it is not ICMP, fail this request.
 		 */
-		if (ipha->ipha_protocol != IPPROTO_ICMP)
+		if (ipha->ipha_protocol != IPPROTO_ICMP) {
+#ifdef FRAGCACHE_DEBUG
+			cmn_err(CE_WARN, "Dropping - ipha_proto = %d\n",
+			    ipha->ipha_protocol);
+#endif
 			return (B_FALSE);
+		}
 		iph_hdr_length = IPH_HDR_LENGTH(ipha);
 		icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
 		/*
@@ -2099,6 +2305,9 @@
 				 * Be in sync with icmp_inbound, where we have
 				 * already set ire_max_frag.
 				 */
+#ifdef FRAGCACHE_DEBUG
+			cmn_err(CE_WARN, "ICMP frag needed\n");
+#endif
 				return (B_TRUE);
 			case ICMP_HOST_UNREACHABLE:
 			case ICMP_NET_UNREACHABLE:
@@ -2196,6 +2405,7 @@
 	mblk_t *mp = mctl_present ? first_mp->b_cont : first_mp;
 	mblk_t *ipsec_mp = mctl_present ? first_mp : NULL;
 	ipsec_latch_t *ipl;
+	uint64_t unique_id;
 
 	ASSERT(connp != NULL);
 	ipl = connp->conn_latch;
@@ -2273,8 +2483,7 @@
 	 * mp->b_cont could be either a M_CTL message
 	 * for icmp errors being sent up or a M_DATA message.
 	 */
-	ASSERT(mp->b_datap->db_type == M_CTL ||
-	    mp->b_datap->db_type == M_DATA);
+	ASSERT(mp->b_datap->db_type == M_CTL || mp->b_datap->db_type == M_DATA);
 
 	ASSERT(ii->ipsec_in_type == IPSEC_IN);
 
@@ -2294,7 +2503,7 @@
 		const char *reason;
 		kstat_named_t *counter;
 		if (ipsec_check_ipsecin_latch(ii, mp, ipl,
-		    ipha, ip6h, &reason, &counter)) {
+		    ipha, ip6h, &reason, &counter, connp)) {
 			BUMP_MIB(&ip_mib, ipsecInSucceeded);
 			return (first_mp);
 		}
@@ -2314,9 +2523,10 @@
 		return (first_mp);
 	}
 
+	unique_id = conn_to_unique(connp, mp, ipha, ip6h);
 	IPPOL_REFHOLD(ipl->ipl_in_policy);
 	first_mp = ipsec_check_ipsecin_policy(CONNP_TO_WQ(connp), first_mp,
-	    ipl->ipl_in_policy, ipha, ip6h);
+	    ipl->ipl_in_policy, ipha, ip6h, unique_id);
 	/*
 	 * NOTE: ipsecIn{Failed,Succeeeded} bumped by
 	 * ipsec_check_ipsecin_policy().
@@ -2326,43 +2536,70 @@
 	return (first_mp);
 }
 
-boolean_t
-ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp,
-    ipha_t *ipha, ip6_t *ip6h)
+/*
+ * Returns:
+ *
+ * SELRET_NOMEM --> msgpullup() needed to gather things failed.
+ * SELRET_BADPKT --> If we're being called after tunnel-mode fragment
+ *		     gathering, the initial fragment is too short for
+ *		     useful data.  Only returned if SEL_TUNNEL_FIRSTFRAG is
+ *		     set.
+ * SELRET_SUCCESS --> "sel" now has initialized IPsec selector data.
+ * SELRET_TUNFRAG --> This is a fragment in a tunnel-mode packet.  Caller
+ *		      should put this packet in a fragment-gathering queue.
+ *		      Only returned if SEL_TUNNEL_MODE and SEL_PORT_POLICY
+ *		      is set.
+ */
+static selret_t
+ipsec_init_inbound_sel(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
+    ip6_t *ip6h, uint8_t sel_flags)
 {
 	uint16_t *ports;
 	ushort_t hdr_len;
+	int outer_hdr_len = 0;	/* For ICMP tunnel-mode cases... */
 	mblk_t *spare_mp = NULL;
 	uint8_t *nexthdrp;
 	uint8_t nexthdr;
 	uint8_t *typecode;
 	uint8_t check_proto;
+	ip6_pkt_t ipp;
+	boolean_t port_policy_present = (sel_flags & SEL_PORT_POLICY);
+	boolean_t is_icmp = (sel_flags & SEL_IS_ICMP);
+	boolean_t tunnel_mode = (sel_flags & SEL_TUNNEL_MODE);
 
 	ASSERT((ipha == NULL && ip6h != NULL) ||
 	    (ipha != NULL && ip6h == NULL));
 
 	if (ip6h != NULL) {
+		if (is_icmp)
+			outer_hdr_len = ((uint8_t *)ip6h) - mp->b_rptr;
+
 		check_proto = IPPROTO_ICMPV6;
 		sel->ips_isv4 = B_FALSE;
 		sel->ips_local_addr_v6 = ip6h->ip6_dst;
 		sel->ips_remote_addr_v6 = ip6h->ip6_src;
 
+		bzero(&ipp, sizeof (ipp));
+		(void) ip_find_hdr_v6(mp, ip6h, &ipp, NULL);
+
 		nexthdr = ip6h->ip6_nxt;
 		switch (nexthdr) {
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_ROUTING:
 		case IPPROTO_DSTOPTS:
+		case IPPROTO_FRAGMENT:
 			/*
 			 * Use ip_hdr_length_nexthdr_v6().  And have a spare
 			 * mblk that's contiguous to feed it
 			 */
 			if ((spare_mp = msgpullup(mp, -1)) == NULL)
-				return (B_FALSE);
+				return (SELRET_NOMEM);
 			if (!ip_hdr_length_nexthdr_v6(spare_mp,
-			    (ip6_t *)spare_mp->b_rptr, &hdr_len, &nexthdrp)) {
-				/* Malformed packet - XXX ip_drop_packet()? */
-				freemsg(spare_mp);
-				return (B_FALSE);
+			    (ip6_t *)(spare_mp->b_rptr + outer_hdr_len),
+				&hdr_len, &nexthdrp)) {
+				/* Malformed packet - caller frees. */
+				ipsec_freemsg_chain(spare_mp);
+				return (SELRET_BADPKT);
 			}
 			nexthdr = *nexthdrp;
 			/* We can just extract based on hdr_len now. */
@@ -2371,21 +2608,39 @@
 			hdr_len = IPV6_HDR_LEN;
 			break;
 		}
+
+		if (port_policy_present && IS_V6_FRAGMENT(ipp) && !is_icmp) {
+			/* IPv6 Fragment */
+			ipsec_freemsg_chain(spare_mp);
+			return (SELRET_TUNFRAG);
+		}
 	} else {
+		if (is_icmp)
+			outer_hdr_len = ((uint8_t *)ipha) - mp->b_rptr;
 		check_proto = IPPROTO_ICMP;
 		sel->ips_isv4 = B_TRUE;
 		sel->ips_local_addr_v4 = ipha->ipha_dst;
 		sel->ips_remote_addr_v4 = ipha->ipha_src;
 		nexthdr = ipha->ipha_protocol;
 		hdr_len = IPH_HDR_LENGTH(ipha);
+
+		if (port_policy_present &&
+		    IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags) &&
+		    !is_icmp) {
+			/* IPv4 Fragment */
+			ipsec_freemsg_chain(spare_mp);
+			return (SELRET_TUNFRAG);
+		}
+
 	}
 	sel->ips_protocol = nexthdr;
 
-	if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP &&
-	    nexthdr != IPPROTO_SCTP && nexthdr != check_proto) {
+	if ((nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP &&
+		nexthdr != IPPROTO_SCTP && nexthdr != check_proto) ||
+	    (!port_policy_present && tunnel_mode)) {
 		sel->ips_remote_port = sel->ips_local_port = 0;
-		freemsg(spare_mp);	/* Always works, even if NULL. */
-		return (B_TRUE);
+		ipsec_freemsg_chain(spare_mp);
+		return (SELRET_SUCCESS);
 	}
 
 	if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) {
@@ -2398,11 +2653,11 @@
 		ipsec_hdr_pullup_needed++;
 		if (spare_mp == NULL &&
 		    (spare_mp = msgpullup(mp, -1)) == NULL) {
-			return (B_FALSE);
+			return (SELRET_NOMEM);
 		}
-		ports = (uint16_t *)&spare_mp->b_rptr[hdr_len];
+		ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len];
 	} else {
-		ports = (uint16_t *)&mp->b_rptr[hdr_len];
+		ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len];
 	}
 
 	if (nexthdr == check_proto) {
@@ -2410,19 +2665,17 @@
 		sel->ips_icmp_type = *typecode++;
 		sel->ips_icmp_code = *typecode;
 		sel->ips_remote_port = sel->ips_local_port = 0;
-		freemsg(spare_mp);	/* Always works, even if NULL */
-		return (B_TRUE);
-	}
-
-	sel->ips_remote_port = *ports++;
-	sel->ips_local_port = *ports;
-	freemsg(spare_mp);	/* Always works, even if NULL */
-	return (B_TRUE);
+	} else {
+		sel->ips_remote_port = *ports++;
+		sel->ips_local_port = *ports;
+	}
+	ipsec_freemsg_chain(spare_mp);
+	return (SELRET_SUCCESS);
 }
 
 static boolean_t
 ipsec_init_outbound_ports(ipsec_selector_t *sel, mblk_t *mp, ipha_t *ipha,
-    ip6_t *ip6h)
+    ip6_t *ip6h, int outer_hdr_len)
 {
 	/*
 	 * XXX cut&paste shared with ipsec_init_inbound_sel
@@ -2445,6 +2698,7 @@
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_ROUTING:
 		case IPPROTO_DSTOPTS:
+		case IPPROTO_FRAGMENT:
 			/*
 			 * Use ip_hdr_length_nexthdr_v6().  And have a spare
 			 * mblk that's contiguous to feed it
@@ -2452,11 +2706,12 @@
 			spare_mp = msgpullup(mp, -1);
 			if (spare_mp == NULL ||
 			    !ip_hdr_length_nexthdr_v6(spare_mp,
-				(ip6_t *)spare_mp->b_rptr, &hdr_len,
-				&nexthdrp)) {
+				(ip6_t *)(spare_mp->b_rptr + outer_hdr_len),
+				&hdr_len, &nexthdrp)) {
 				/* Always works, even if NULL. */
-				freemsg(spare_mp);
-				freemsg(mp);
+				ipsec_freemsg_chain(spare_mp);
+				ip_drop_packet_chain(mp, B_FALSE, NULL, NULL,
+				    &ipdrops_spd_nomem, &spd_dropper);
 				return (B_FALSE);
 			} else {
 				nexthdr = *nexthdrp;
@@ -2477,11 +2732,11 @@
 	if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP &&
 	    nexthdr != IPPROTO_SCTP && nexthdr != check_proto) {
 		sel->ips_local_port = sel->ips_remote_port = 0;
-		freemsg(spare_mp);  /* Always works, even if NULL. */
+		ipsec_freemsg_chain(spare_mp); /* Always works, even if NULL */
 		return (B_TRUE);
 	}
 
-	if (&mp->b_rptr[hdr_len] + 4 > mp->b_wptr) {
+	if (&mp->b_rptr[hdr_len] + 4 + outer_hdr_len > mp->b_wptr) {
 		/* If we didn't pullup a copy already, do so now. */
 		/*
 		 * XXX performance, will upper-layers frequently split TCP/UDP
@@ -2492,12 +2747,13 @@
 		 */
 		if (spare_mp == NULL &&
 		    (spare_mp = msgpullup(mp, -1)) == NULL) {
-			freemsg(mp);
+			ip_drop_packet_chain(mp, B_FALSE, NULL, NULL,
+			    &ipdrops_spd_nomem, &spd_dropper);
 			return (B_FALSE);
 		}
-		ports = (uint16_t *)&spare_mp->b_rptr[hdr_len];
+		ports = (uint16_t *)&spare_mp->b_rptr[hdr_len + outer_hdr_len];
 	} else {
-		ports = (uint16_t *)&mp->b_rptr[hdr_len];
+		ports = (uint16_t *)&mp->b_rptr[hdr_len + outer_hdr_len];
 	}
 
 	if (nexthdr == check_proto) {
@@ -2505,13 +2761,11 @@
 		sel->ips_icmp_type = *typecode++;
 		sel->ips_icmp_code = *typecode;
 		sel->ips_remote_port = sel->ips_local_port = 0;
-		freemsg(spare_mp);	/* Always works, even if NULL */
-		return (B_TRUE);
-	}
-
-	sel->ips_local_port = *ports++;
-	sel->ips_remote_port = *ports;
-	freemsg(spare_mp);	/* Always works, even if NULL */
+	} else {
+		sel->ips_local_port = *ports++;
+		sel->ips_remote_port = *ports;
+	}
+	ipsec_freemsg_chain(spare_mp);	/* Always works, even if NULL */
 	return (B_TRUE);
 }
 
@@ -2618,7 +2872,7 @@
  * effective MTU, yielding the inner payload size which reflects a
  * packet with *minimum* ESP padding..
  */
-static int32_t
+int32_t
 ipsec_act_ovhd(const ipsec_act_t *act)
 {
 	int32_t overhead = 0;
@@ -2662,8 +2916,8 @@
  * into trouble from lots of collisions on ::1 addresses and the like
  * (seems unlikely).
  */
-#define	IPSEC_IPV4_HASH(a) ((a) % ipsec_spd_hashsize)
-#define	IPSEC_IPV6_HASH(a) ((a.s6_addr32[3]) % ipsec_spd_hashsize)
+#define	IPSEC_IPV4_HASH(a, n) ((a) % (n))
+#define	IPSEC_IPV6_HASH(a, n) (((a).s6_addr32[3]) % (n))
 
 /*
  * These two hash functions should produce coordinated values
@@ -2679,22 +2933,25 @@
 
 	if (valid & IPSL_IPV4) {
 		if (selkey->ipsl_remote_pfxlen == 32)
-			return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4));
+			return (IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4,
+				    ipsec_spd_hashsize));
 	}
 	if (valid & IPSL_IPV6) {
 		if (selkey->ipsl_remote_pfxlen == 128)
-			return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6));
+			return (IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6,
+				    ipsec_spd_hashsize));
 	}
 	return (IPSEC_SEL_NOHASH);
 }
 
 static uint32_t
-selector_hash(ipsec_selector_t *sel)
+selector_hash(ipsec_selector_t *sel, ipsec_policy_root_t *root)
 {
 	if (sel->ips_isv4) {
-		return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4));
-	}
-	return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6));
+		return (IPSEC_IPV4_HASH(sel->ips_remote_addr_v4,
+			    root->ipr_nchains));
+	}
+	return (IPSEC_IPV6_HASH(sel->ips_remote_addr_v6, root->ipr_nchains));
 }
 
 /*
@@ -2864,7 +3121,8 @@
 	    !(selkey->ipsl_valid & IPSL_IPV6));
 
 	hval = selkey_hash(selkey);
-	selkey->ipsl_hval = hval;
+	/* Set pol_hval to uninitialized until we put it in a polhead. */
+	selkey->ipsl_sel_hval = hval;
 
 	bucket = (hval == IPSEC_SEL_NOHASH) ? 0 : hval;
 
@@ -2872,7 +3130,8 @@
 	HASH_LOCK(ipsec_sel_hash, bucket);
 
 	for (HASH_ITERATE(sp, ipsl_hash, ipsec_sel_hash, bucket)) {
-		if (bcmp(&sp->ipsl_key, selkey, sizeof (*selkey)) == 0)
+		if (bcmp(&sp->ipsl_key, selkey,
+		    offsetof(ipsec_selkey_t, ipsl_pol_hval)) == 0)
 			break;
 	}
 	if (sp != NULL) {
@@ -2891,6 +3150,11 @@
 	HASH_INSERT(sp, ipsl_hash, ipsec_sel_hash, bucket);
 	sp->ipsl_refs = 2;	/* one for hash table, one for caller */
 	sp->ipsl_key = *selkey;
+	/* Set to uninitalized and have insertion into polhead fix things. */
+	if (selkey->ipsl_sel_hval != IPSEC_SEL_NOHASH)
+		sp->ipsl_key.ipsl_pol_hval = 0;
+	else
+		sp->ipsl_key.ipsl_pol_hval = IPSEC_SEL_NOHASH;
 
 	HASH_UNLOCK(ipsec_sel_hash, bucket);
 
@@ -2901,7 +3165,7 @@
 ipsec_sel_rel(ipsec_sel_t **spp)
 {
 	ipsec_sel_t *sp = *spp;
-	int hval = sp->ipsl_key.ipsl_hval;
+	int hval = sp->ipsl_key.ipsl_sel_hval;
 	*spp = NULL;
 
 	if (hval == IPSEC_SEL_NOHASH)
@@ -2942,12 +3206,15 @@
  */
 ipsec_policy_t *
 ipsec_policy_create(ipsec_selkey_t *keys, const ipsec_act_t *a,
-    int nacts, int prio)
+    int nacts, int prio, uint64_t *index_ptr)
 {
 	ipsec_action_t *ap;
 	ipsec_sel_t *sp;
 	ipsec_policy_t *ipp;
 
+	if (index_ptr == NULL)
+		index_ptr = &ipsec_next_policy_index;
+
 	ipp = kmem_cache_alloc(ipsec_pol_cache, KM_NOSLEEP);
 	ap = ipsec_act_find(a, nacts);
 	sp = ipsec_find_sel(keys);
@@ -2969,7 +3236,8 @@
 	ipp->ipsp_sel = sp;
 	ipp->ipsp_act = ap;
 	ipp->ipsp_prio = prio;	/* rule priority */
-	ipp->ipsp_index = ipsec_next_policy_index++;
+	ipp->ipsp_index = *index_ptr;
+	(*index_ptr)++;
 
 	return (ipp);
 }
@@ -3018,10 +3286,10 @@
 
 	rw_enter(&php->iph_lock, RW_WRITER);
 
-	if (keys->ipsl_hval == IPSEC_SEL_NOHASH) {
+	if (sp->ipsl_key.ipsl_pol_hval == IPSEC_SEL_NOHASH) {
 		head = pr->ipr_nonhash[af];
 	} else {
-		head = pr->ipr_hash[keys->ipsl_hval].hash_head;
+		head = pr->ipr_hash[sp->ipsl_key.ipsl_pol_hval].hash_head;
 	}
 
 	for (ip = head; ip != NULL; ip = nip) {
@@ -3096,7 +3364,8 @@
 
 /*
  * Given a constructed ipsec_policy_t policy rule, see if it can be entered
- * into the correct policy ruleset.
+ * into the correct policy ruleset.  As a side-effect, it sets the hash
+ * entries on "ipp"'s ipsp_pol_hval.
  *
  * Returns B_TRUE if it can be entered, B_FALSE if it can't be (because a
  * duplicate policy exists with exactly the same selectors), or an icmp
@@ -3129,10 +3398,17 @@
 	 * Because selectors are interned below, we need only compare pointers
 	 * for equality.
 	 */
-	if (selkey->ipsl_hval == IPSEC_SEL_NOHASH) {
+	if (selkey->ipsl_sel_hval == IPSEC_SEL_NOHASH) {
 		head = pr->ipr_nonhash[af];
 	} else {
-		head = pr->ipr_hash[selkey->ipsl_hval].hash_head;
+		selkey->ipsl_pol_hval =
+		    (selkey->ipsl_valid & IPSL_IPV4) ?
+		    IPSEC_IPV4_HASH(selkey->ipsl_remote.ipsad_v4,
+			pr->ipr_nchains) :
+		    IPSEC_IPV6_HASH(selkey->ipsl_remote.ipsad_v6,
+			pr->ipr_nchains);
+
+		head = pr->ipr_hash[selkey->ipsl_pol_hval].hash_head;
 	}
 
 	for (p2 = head; p2 != NULL; p2 = p2->ipsp_hash.hash_next) {
@@ -3275,7 +3551,7 @@
 	ipsec_policy_root_t *pr = &php->iph_root[direction];
 	ipsec_selkey_t *selkey = &ipp->ipsp_sel->ipsl_key;
 	uint32_t valid = selkey->ipsl_valid;
-	uint32_t hval = selkey->ipsl_hval;
+	uint32_t hval = selkey->ipsl_pol_hval;
 	int af = -1;
 
 	ASSERT(RW_WRITE_HELD(&php->iph_lock));
@@ -3329,7 +3605,6 @@
 	}
 }
 
-
 void
 ipsec_polhead_flush(ipsec_policy_head_t *php)
 {
@@ -3346,11 +3621,22 @@
 void
 ipsec_polhead_free(ipsec_policy_head_t *php)
 {
+	int dir;
+
 	ASSERT(php->iph_refs == 0);
 	rw_enter(&php->iph_lock, RW_WRITER);
 	ipsec_polhead_flush(php);
 	rw_exit(&php->iph_lock);
 	rw_destroy(&php->iph_lock);
+	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
+		ipsec_policy_root_t *ipr = &php->iph_root[dir];
+		int chain;
+
+		for (chain = 0; chain < ipr->ipr_nchains; chain++)
+			mutex_destroy(&(ipr->ipr_hash[chain].hash_lock));
+
+	}
+	ipsec_polhead_free_table(php);
 	kmem_free(php, sizeof (*php));
 }
 
@@ -3367,7 +3653,7 @@
 	}
 }
 
-extern ipsec_policy_head_t *
+ipsec_policy_head_t *
 ipsec_polhead_create(void)
 {
 	ipsec_policy_head_t *php;
@@ -3394,7 +3680,7 @@
  * old one and return the only reference to the new one.
  * If the old one had a refcount of 1, just return it.
  */
-extern ipsec_policy_head_t *
+ipsec_policy_head_t *
 ipsec_polhead_split(ipsec_policy_head_t *php)
 {
 	ipsec_policy_head_t *nphp;
@@ -3494,7 +3780,7 @@
 	io->ipsec_out_frtn.free_arg = (char *)io;
 	io->ipsec_out_act = reflect_action;
 
-	if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h))
+	if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0))
 		return (B_FALSE);
 
 	io->ipsec_out_src_port = sel.ips_local_port;
@@ -3570,7 +3856,8 @@
 
 	nmp = ipsec_alloc_ipsec_out();
 	if (nmp == NULL) {
-		freemsg(cont);	/* XXX ip_drop_packet() ? */
+		ip_drop_packet_chain(cont, B_FALSE, NULL, NULL,
+		    &ipdrops_spd_nomem, &spd_dropper);
 		return (NULL);
 	}
 	ASSERT(nmp->b_datap->db_type == M_CTL);
@@ -3829,8 +4116,8 @@
 		 * it from the packet.
 		 */
 
-		if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h)) {
-			/* XXX any cleanup required here?? */
+		if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) {
+			/* Callee did ip_drop_packet(). */
 			return (NULL);
 		}
 		io->ipsec_out_src_port = sel.ips_local_port;
@@ -3854,7 +4141,16 @@
 			IPPH_REFHOLD(connp->conn_policy);
 			io->ipsec_out_polhead = connp->conn_policy;
 		}
-	}
+	} else {
+		/* Handle explicit drop action. */
+		if (p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_DISCARD ||
+		    p->ipsp_act->ipa_act.ipa_type == IPSEC_ACT_REJECT) {
+			ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
+			    &ipdrops_spd_explicit, &spd_dropper);
+			ipsec_mp = NULL;
+		}
+	}
+
 	return (ipsec_mp);
 }
 
@@ -4013,6 +4309,7 @@
 				ipsec_mp = mp;
 				io = NULL;
 			}
+			ASSERT(io == NULL || !io->ipsec_out_tunnel);
 		}
 		if (((io == NULL) || (io->ipsec_out_polhead == NULL)) &&
 		    ((connp == NULL) || (connp->conn_policy == NULL)))
@@ -4045,6 +4342,7 @@
 			ipsec_mp = mp;
 			io = NULL;
 		}
+		ASSERT(io == NULL || !io->ipsec_out_tunnel);
 	}
 
 	if (ipha != NULL) {
@@ -4104,15 +4402,14 @@
 		}
 	}
 
-	if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h)) {
+	if (!ipsec_init_outbound_ports(&sel, mp, ipha, ip6h, 0)) {
 		if (ipha != NULL) {
 			BUMP_MIB(&ip_mib, ipOutDiscards);
 		} else {
 			BUMP_MIB(&ip6_mib, ipv6OutDiscards);
 		}
 
-		ip_drop_packet(ipsec_mp, B_FALSE, NULL, NULL,
-		    &ipdrops_spd_nomem, &spd_dropper);
+		/* Callee dropped the packet. */
 		return (NULL);
 	}
 
@@ -4832,3 +5129,1541 @@
 	if (prov_update_handle != NULL)
 		crypto_unnotify_events(prov_update_handle);
 }
+
+/*
+ * Tunnel-mode support routines.
+ */
+
+/*
+ * Returns an mblk chain suitable for putnext() if policies match and IPsec
+ * SAs are available.  If there's no per-tunnel policy, or a match comes back
+ * with no match, then still return the packet and have global policy take
+ * a crack at it in IP.
+ *
+ * Remember -> we can be forwarding packets.  Keep that in mind w.r.t.
+ * inner-packet contents.
+ */
+mblk_t *
+ipsec_tun_outbound(mblk_t *mp, tun_t *atp, ipha_t *inner_ipv4,
+    ip6_t *inner_ipv6, ipha_t *outer_ipv4, ip6_t *outer_ipv6, int outer_hdr_len)
+{
+	ipsec_tun_pol_t *itp = atp->tun_itp;
+	ipsec_policy_head_t *polhead;
+	ipsec_selector_t sel;
+	mblk_t *ipsec_mp, *ipsec_mp_head, *nmp;
+	mblk_t *spare_mp = NULL;
+	ipsec_out_t *io;
+	boolean_t is_fragment;
+	ipsec_policy_t *pol;
+
+	ASSERT(outer_ipv6 != NULL && outer_ipv4 == NULL ||
+	    outer_ipv4 != NULL && outer_ipv6 == NULL);
+	/* We take care of inners in a bit. */
+
+	/* No policy on this tunnel - let global policy have at it. */
+	if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE))
+		return (mp);
+	polhead = itp->itp_policy;
+
+	bzero(&sel, sizeof (sel));
+	if (inner_ipv4 != NULL) {
+		ASSERT(inner_ipv6 == NULL);
+		sel.ips_isv4 = B_TRUE;
+		sel.ips_local_addr_v4 = inner_ipv4->ipha_src;
+		sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst;
+		sel.ips_protocol = (uint8_t)inner_ipv4->ipha_protocol;
+		is_fragment =
+		    IS_V4_FRAGMENT(inner_ipv4->ipha_fragment_offset_and_flags);
+	} else {
+		ASSERT(inner_ipv6 != NULL);
+		sel.ips_isv4 = B_FALSE;
+		sel.ips_local_addr_v6 = inner_ipv6->ip6_src;
+		/* Use ip_get_dst_v6() just for the fragment bit. */
+		sel.ips_remote_addr_v6 = ip_get_dst_v6(inner_ipv6,
+		    &is_fragment);
+		/*
+		 * Reset, because we don't care about routing-header dests
+		 * in the forwarding/tunnel path.
+		 */
+		sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst;
+	}
+
+	if (itp->itp_flags & ITPF_P_PER_PORT_SECURITY) {
+		if (is_fragment) {
+			ipha_t *oiph;
+			ipha_t *iph = NULL;
+			ip6_t *ip6h = NULL;
+			int hdr_len;
+			uint16_t ip6_hdr_length;
+			uint8_t v6_proto;
+			uint8_t *v6_proto_p;
+
+			/*
+			 * We have a fragment we need to track!
+			 */
+			mp = ipsec_fragcache_add(&itp->itp_fragcache, NULL, mp,
+			    outer_hdr_len);
+			if (mp == NULL)
+				return (NULL);
+
+			/*
+			 * If we get here, we have a full
+			 * fragment chain
+			 */
+
+			oiph = (ipha_t *)mp->b_rptr;
+			if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) {
+				hdr_len = ((outer_hdr_len != 0) ?
+				    IPH_HDR_LENGTH(oiph) : 0);
+				iph = (ipha_t *)(mp->b_rptr + hdr_len);
+			} else {
+				ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION);
+				if ((spare_mp = msgpullup(mp, -1)) == NULL) {
+					ip_drop_packet_chain(mp, B_FALSE,
+					    NULL, NULL, &ipdrops_spd_nomem,
+					    &spd_dropper);
+				}
+				ip6h = (ip6_t *)spare_mp->b_rptr;
+				(void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h,
+				    &ip6_hdr_length, &v6_proto_p);
+				hdr_len = ip6_hdr_length;
+			}
+			outer_hdr_len = hdr_len;
+
+			if (sel.ips_isv4) {
+				if (iph == NULL) {
+					/* Was v6 outer */
+					iph = (ipha_t *)(mp->b_rptr + hdr_len);
+				}
+				inner_ipv4 = iph;
+				sel.ips_local_addr_v4 = inner_ipv4->ipha_src;
+				sel.ips_remote_addr_v4 = inner_ipv4->ipha_dst;
+				sel.ips_protocol =
+				    (uint8_t)inner_ipv4->ipha_protocol;
+			} else {
+				if ((spare_mp == NULL) &&
+				    ((spare_mp = msgpullup(mp, -1)) == NULL)) {
+					ip_drop_packet_chain(mp, B_FALSE,
+					    NULL, NULL, &ipdrops_spd_nomem,
+					    &spd_dropper);
+				}
+				inner_ipv6 = (ip6_t *)(spare_mp->b_rptr +
+				    hdr_len);
+				sel.ips_local_addr_v6 = inner_ipv6->ip6_src;
+				sel.ips_remote_addr_v6 = inner_ipv6->ip6_dst;
+				(void) ip_hdr_length_nexthdr_v6(spare_mp,
+				    inner_ipv6, &ip6_hdr_length,
+				    &v6_proto_p);
+				v6_proto = *v6_proto_p;
+				sel.ips_protocol = v6_proto;
+#ifdef FRAGCACHE_DEBUG
+				cmn_err(CE_WARN, "v6_sel.ips_protocol = %d\n",
+				    sel.ips_protocol);
+#endif
+			}
+			/* Ports are extracted below */
+		}
+
+		/* Get ports... */
+		if (spare_mp != NULL) {
+			if (!ipsec_init_outbound_ports(&sel, spare_mp,
+			    inner_ipv4, inner_ipv6, outer_hdr_len)) {
+				/*
+				 * callee did ip_drop_packet_chain() on
+				 * spare_mp
+				 */
+				ipsec_freemsg_chain(mp);
+				return (NULL);
+			}
+		} else {
+			if (!ipsec_init_outbound_ports(&sel, mp,
+			    inner_ipv4, inner_ipv6, outer_hdr_len)) {
+				/* callee did ip_drop_packet_chain() on mp. */
+				return (NULL);
+			}
+		}
+#ifdef FRAGCACHE_DEBUG
+		if (inner_ipv4 != NULL)
+			cmn_err(CE_WARN,
+			    "(v4) sel.ips_protocol = %d, "
+			    "sel.ips_local_port = %d, "
+			    "sel.ips_remote_port = %d\n",
+			    sel.ips_protocol, ntohs(sel.ips_local_port),
+			    ntohs(sel.ips_remote_port));
+		if (inner_ipv6 != NULL)
+			cmn_err(CE_WARN,
+			    "(v6) sel.ips_protocol = %d, "
+			    "sel.ips_local_port = %d, "
+			    "sel.ips_remote_port = %d\n",
+			    sel.ips_protocol, ntohs(sel.ips_local_port),
+			    ntohs(sel.ips_remote_port));
+#endif
+		/* Success so far - done with spare_mp */
+		ipsec_freemsg_chain(spare_mp);
+	}
+	rw_enter(&polhead->iph_lock, RW_READER);
+	pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_OUTBOUND, &sel);
+	rw_exit(&polhead->iph_lock);
+	if (pol == NULL) {
+		/*
+		 * No matching policy on this tunnel, drop the packet.
+		 *
+		 * NOTE:  Tunnel-mode tunnels are different from the
+		 * IP global transport mode policy head.  For a tunnel-mode
+		 * tunnel, we drop the packet in lieu of passing it
+		 * along accepted the way a global-policy miss would.
+		 *
+		 * NOTE2:  "negotiate transport" tunnels should match ALL
+		 * inbound packets, but we do not uncomment the ASSERT()
+		 * below because if/when we open PF_POLICY, a user can
+		 * shoot him/her-self in the foot with a 0 priority.
+		 */
+
+		/* ASSERT(itp->itp_flags & ITPF_P_TUNNEL); */
+#ifdef FRAGCACHE_DEBUG
+		cmn_err(CE_WARN, "ipsec_tun_outbound(): No matching tunnel "
+		    "per-port policy\n");
+#endif
+		ip_drop_packet_chain(mp, B_FALSE, NULL, NULL,
+		    &ipdrops_spd_explicit, &spd_dropper);
+		return (NULL);
+	}
+
+#ifdef FRAGCACHE_DEBUG
+	cmn_err(CE_WARN, "Having matching tunnel per-port policy\n");
+#endif
+
+	/* Construct an IPSEC_OUT message. */
+	ipsec_mp = ipsec_mp_head = ipsec_alloc_ipsec_out();
+	if (ipsec_mp == NULL) {
+		IPPOL_REFRELE(pol);
+		ip_drop_packet(mp, B_FALSE, NULL, NULL, &ipdrops_spd_nomem,
+		    &spd_dropper);
+		return (NULL);
+	}
+	ipsec_mp->b_cont = mp;
+	io = (ipsec_out_t *)ipsec_mp->b_rptr;
+	IPPH_REFHOLD(polhead);
+	/*
+	 * NOTE: free() function of ipsec_out mblk will release polhead and
+	 * pol references.
+	 */
+	io->ipsec_out_polhead = polhead;
+	io->ipsec_out_policy = pol;
+	io->ipsec_out_zoneid = atp->tun_zoneid;
+	io->ipsec_out_v4 = (outer_ipv4 != NULL);
+	io->ipsec_out_secure = B_TRUE;
+
+	if (!(itp->itp_flags & ITPF_P_TUNNEL)) {
+		/* Set up transport mode for tunnelled packets. */
+		io->ipsec_out_proto = (inner_ipv4 != NULL) ? IPPROTO_ENCAP :
+		    IPPROTO_IPV6;
+		return (ipsec_mp);
+	}
+
+	/* Fill in tunnel-mode goodies here. */
+	io->ipsec_out_tunnel = B_TRUE;
+	/* XXX Do I need to fill in all of the goodies here? */
+	if (inner_ipv4) {
+		io->ipsec_out_inaf = AF_INET;
+		io->ipsec_out_insrc[0] =
+		    pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v4;
+		io->ipsec_out_indst[0] =
+		    pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v4;
+	} else {
+		io->ipsec_out_inaf = AF_INET6;
+		io->ipsec_out_insrc[0] =
+		    pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[0];
+		io->ipsec_out_insrc[1] =
+		    pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[1];
+		io->ipsec_out_insrc[2] =
+		    pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[2];
+		io->ipsec_out_insrc[3] =
+		    pol->ipsp_sel->ipsl_key.ipsl_local.ipsad_v6.s6_addr32[3];
+		io->ipsec_out_indst[0] =
+		    pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[0];
+		io->ipsec_out_indst[1] =
+		    pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[1];
+		io->ipsec_out_indst[2] =
+		    pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[2];
+		io->ipsec_out_indst[3] =
+		    pol->ipsp_sel->ipsl_key.ipsl_remote.ipsad_v6.s6_addr32[3];
+	}
+	io->ipsec_out_insrcpfx = pol->ipsp_sel->ipsl_key.ipsl_local_pfxlen;
+	io->ipsec_out_indstpfx = pol->ipsp_sel->ipsl_key.ipsl_remote_pfxlen;
+	/* NOTE:  These are used for transport mode too. */
+	io->ipsec_out_src_port = pol->ipsp_sel->ipsl_key.ipsl_lport;
+	io->ipsec_out_dst_port = pol->ipsp_sel->ipsl_key.ipsl_rport;
+	io->ipsec_out_proto = pol->ipsp_sel->ipsl_key.ipsl_proto;
+
+	/*
+	 * The mp pointer still valid
+	 * Add ipsec_out to each fragment.
+	 * The fragment head already has one
+	 */
+	nmp = mp->b_next;
+	mp->b_next = NULL;
+	mp = nmp;
+	ASSERT(ipsec_mp != NULL);
+	while (mp != NULL) {
+		nmp = mp->b_next;
+		ipsec_mp->b_next = ipsec_out_tag(ipsec_mp_head, mp);
+		if (ipsec_mp->b_next == NULL) {
+			ip_drop_packet_chain(ipsec_mp_head, B_FALSE, NULL, NULL,
+			    &ipdrops_spd_nomem, &spd_dropper);
+			ip_drop_packet_chain(mp, B_FALSE, NULL, NULL,
+			    &ipdrops_spd_nomem, &spd_dropper);
+			return (NULL);
+		}
+		ipsec_mp = ipsec_mp->b_next;
+		mp->b_next = NULL;
+		mp = nmp;
+	}
+	return (ipsec_mp_head);
+}
+
+/*
+ * NOTE: The following releases pol's reference and
+ * calls ip_drop_packet() for me on NULL returns.
+ */
+mblk_t *
+ipsec_check_ipsecin_policy_reasm(mblk_t *ipsec_mp, ipsec_policy_t *pol,
+    ipha_t *inner_ipv4, ip6_t *inner_ipv6, uint64_t pkt_unique)
+{
+	/* Assume ipsec_mp is a chain of b_next-linked IPSEC_IN M_CTLs. */
+	mblk_t *data_chain = NULL, *data_tail = NULL;
+	mblk_t *ii_next;
+
+	while (ipsec_mp != NULL) {
+		ii_next = ipsec_mp->b_next;
+		ipsec_mp->b_next = NULL;  /* No tripping asserts. */
+
+		/*
+		 * Need IPPOL_REFHOLD(pol) for extras because
+		 * ipsecin_policy does the refrele.
+		 */
+		IPPOL_REFHOLD(pol);
+
+		if (ipsec_check_ipsecin_policy(NULL, ipsec_mp, pol,
+		    inner_ipv4, inner_ipv6, pkt_unique) != NULL) {
+			if (data_tail == NULL) {
+				/* First one */
+				data_chain = data_tail = ipsec_mp->b_cont;
+			} else {
+				data_tail->b_next = ipsec_mp->b_cont;
+				data_tail = data_tail->b_next;
+			}
+			freeb(ipsec_mp);
+		} else {
+			/*
+			 * ipsec_check_ipsecin_policy() freed ipsec_mp
+			 * already.   Need to get rid of any extra pol
+			 * references, and any remaining bits as well.
+			 */
+			IPPOL_REFRELE(pol);
+			ipsec_freemsg_chain(data_chain);
+			ipsec_freemsg_chain(ii_next);	/* ipdrop stats? */
+			return (NULL);
+		}
+		ipsec_mp = ii_next;
+	}
+	/*
+	 * One last release because either the loop bumped it up, or we never
+	 * called ipsec_check_ipsecin_policy().
+	 */
+	IPPOL_REFRELE(pol);
+
+	/* data_chain is ready for return to tun module. */
+	return (data_chain);
+}
+
+
+/*
+ * Returns B_TRUE if the inbound packet passed an IPsec policy check.  Returns
+ * B_FALSE if it failed or if it is a fragment needing its friends before a
+ * policy check can be performed.
+ *
+ * Expects a non-NULL *data_mp, an optional ipsec_mp, and a non-NULL polhead.
+ * data_mp may be reassigned with a b_next chain of packets if fragments
+ * neeeded to be collected for a proper policy check.
+ *
+ * Always frees ipsec_mp, but only frees data_mp if returns B_FALSE.  This
+ * function calls ip_drop_packet() on data_mp if need be.
+ *
+ * NOTE:  outer_hdr_len is signed.  If it's a negative value, the caller
+ * is inspecting an ICMP packet.
+ */
+boolean_t
+ipsec_tun_inbound(mblk_t *ipsec_mp, mblk_t **data_mp, ipsec_tun_pol_t *itp,
+    ipha_t *inner_ipv4, ip6_t *inner_ipv6, ipha_t *outer_ipv4,
+    ip6_t *outer_ipv6, int outer_hdr_len)
+{
+	ipsec_policy_head_t *polhead;
+	ipsec_selector_t sel;
+	mblk_t *message = (ipsec_mp == NULL) ? *data_mp : ipsec_mp;
+	ipsec_policy_t *pol;
+	uint16_t tmpport;
+	selret_t rc;
+	boolean_t retval, port_policy_present, is_icmp;
+	in6_addr_t tmpaddr;
+	uint8_t flags;
+
+	sel.ips_is_icmp_inv_acq = 0;
+
+	ASSERT(outer_ipv4 != NULL && outer_ipv6 == NULL ||
+	    outer_ipv4 == NULL && outer_ipv6 != NULL);
+	ASSERT(inner_ipv4 != NULL && inner_ipv6 == NULL ||
+	    inner_ipv4 == NULL && inner_ipv6 != NULL);
+	ASSERT(message == *data_mp || message->b_cont == *data_mp);
+
+	if (outer_hdr_len < 0) {
+		outer_hdr_len = (-outer_hdr_len);
+		is_icmp = B_TRUE;
+	} else {
+		is_icmp = B_FALSE;
+	}
+
+	if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) {
+		polhead = itp->itp_policy;
+		/*
+		 * We need to perform full Tunnel-Mode enforcement,
+		 * and we need to have inner-header data for such enforcement.
+		 *
+		 * See ipsec_init_inbound_sel() for the 0x80000000 on inbound
+		 * and on return.
+		 */
+
+		port_policy_present = ((itp->itp_flags &
+		    ITPF_P_PER_PORT_SECURITY) ? B_TRUE : B_FALSE);
+		flags = ((port_policy_present ? SEL_PORT_POLICY : SEL_NONE) |
+		    (is_icmp ? SEL_IS_ICMP : SEL_NONE) | SEL_TUNNEL_MODE);
+
+		rc = ipsec_init_inbound_sel(&sel, *data_mp, inner_ipv4,
+		    inner_ipv6, flags);
+
+		switch (rc) {
+		case SELRET_NOMEM:
+			ip_drop_packet(message, B_TRUE, NULL, NULL,
+			    &ipdrops_spd_nomem, &spd_dropper);
+			return (B_FALSE);
+		case SELRET_TUNFRAG:
+			/*
+			 * At this point, if we're cleartext, we don't want
+			 * to go there.
+			 */
+			if (ipsec_mp == NULL) {
+				ip_drop_packet(*data_mp, B_TRUE, NULL, NULL,
+				    &ipdrops_spd_got_clear, &spd_dropper);
+				*data_mp = NULL;
+				return (B_FALSE);
+			}
+			ASSERT(((ipsec_in_t *)ipsec_mp->b_rptr)->
+			    ipsec_in_secure);
+			message = ipsec_fragcache_add(&itp->itp_fragcache,
+			    ipsec_mp, *data_mp, outer_hdr_len);
+
+			if (message == NULL) {
+				/*
+				 * Data is cached, fragment chain is not
+				 * complete.  I consume ipsec_mp and data_mp
+				 */
+				return (B_FALSE);
+			}
+
+			/*
+			 * If we get here, we have a full fragment chain.
+			 * Reacquire headers and selectors from first fragment.
+			 */
+			if (inner_ipv4 != NULL) {
+				inner_ipv4 = (ipha_t *)message->b_cont->b_rptr;
+				ASSERT(message->b_cont->b_wptr -
+				    message->b_cont->b_rptr > sizeof (ipha_t));
+			} else {
+				inner_ipv6 = (ip6_t *)message->b_cont->b_rptr;
+				ASSERT(message->b_cont->b_wptr -
+				    message->b_cont->b_rptr > sizeof (ip6_t));
+			}
+			/* Use SEL_NONE so we always get ports! */
+			rc = ipsec_init_inbound_sel(&sel, message->b_cont,
+			    inner_ipv4, inner_ipv6, SEL_NONE);
+			switch (rc) {
+			case SELRET_SUCCESS:
+				/*
+				 * Get to same place as first caller's
+				 * SELRET_SUCCESS case.
+				 */
+				break;
+			case SELRET_NOMEM:
+				ip_drop_packet_chain(message, B_TRUE, NULL,
+				    NULL, &ipdrops_spd_nomem, &spd_dropper);
+				return (B_FALSE);
+			case SELRET_BADPKT:
+				ip_drop_packet_chain(message, B_TRUE, NULL,
+				    NULL, &ipdrops_spd_malformed_frag,
+				    &spd_dropper);
+				return (B_FALSE);
+			case SELRET_TUNFRAG:
+				cmn_err(CE_WARN, "(TUNFRAG on 2nd call...)");
+				/* FALLTHRU */
+			default:
+				cmn_err(CE_WARN, "ipsec_init_inbound_sel(mark2)"
+				    " returns bizarro 0x%x", rc);
+				/* Guaranteed panic! */
+				ASSERT(rc == SELRET_NOMEM);
+				return (B_FALSE);
+			}
+			/* FALLTHRU */
+		case SELRET_SUCCESS:
+			/*
+			 * Common case:
+			 * No per-port policy or a non-fragment.  Keep going.
+			 */
+			break;
+		case SELRET_BADPKT:
+			/*
+			 * We may receive ICMP (with IPv6 inner) packets that
+			 * trigger this return value.  Send 'em in for
+			 * enforcement checking.
+			 */
+			cmn_err(CE_NOTE, "ipsec_tun_inbound(): "
+			    "sending 'bad packet' in for enforcement");
+			break;
+		default:
+			cmn_err(CE_WARN,
+			    "ipsec_init_inbound_sel() returns bizarro 0x%x",
+			    rc);
+			ASSERT(rc == SELRET_NOMEM);	/* Guaranteed panic! */
+			return (B_FALSE);
+		}
+
+		if (is_icmp) {
+			/*
+			 * Swap local/remote because this is an ICMP packet.
+			 */
+			tmpaddr = sel.ips_local_addr_v6;
+			sel.ips_local_addr_v6 = sel.ips_remote_addr_v6;
+			sel.ips_remote_addr_v6 = tmpaddr;
+			tmpport = sel.ips_local_port;
+			sel.ips_local_port = sel.ips_remote_port;
+			sel.ips_remote_port = tmpport;
+		}
+
+		/* find_policy_head() */
+		rw_enter(&polhead->iph_lock, RW_READER);
+		pol = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND,
+		    &sel);
+		rw_exit(&polhead->iph_lock);
+		if (pol != NULL) {
+			if (ipsec_mp == NULL ||
+			    !((ipsec_in_t *)ipsec_mp->b_rptr)->
+				ipsec_in_secure) {
+				retval = pol->ipsp_act->ipa_allow_clear;
+				if (!retval) {
+					/*
+					 * XXX should never get here with
+					 * tunnel reassembled fragments?
+					 */
+					ASSERT(message->b_next == NULL);
+					ip_drop_packet(message, B_TRUE, NULL,
+					    NULL, &ipdrops_spd_got_clear,
+					    &spd_dropper);
+				} else if (ipsec_mp != NULL) {
+					freeb(ipsec_mp);
+				}
+
+				IPPOL_REFRELE(pol);
+				return (retval);
+			}
+			/*
+			 * NOTE: The following releases pol's reference and
+			 * calls ip_drop_packet() for me on NULL returns.
+			 *
+			 * "sel" is still good here, so let's use it!
+			 */
+			*data_mp = ipsec_check_ipsecin_policy_reasm(message,
+			    pol, inner_ipv4, inner_ipv6, SA_UNIQUE_ID(
+				sel.ips_remote_port, sel.ips_local_port,
+				(inner_ipv4 == NULL) ? IPPROTO_IPV6 :
+				IPPROTO_ENCAP, sel.ips_protocol));
+			return (*data_mp != NULL);
+		}
+
+		/*
+		 * Else fallthru and check the global policy on the outer
+		 * header(s) if this tunnel is an old-style transport-mode
+		 * one.  Drop the packet explicitly (no policy entry) for
+		 * a new-style tunnel-mode tunnel.
+		 */
+		if ((itp->itp_flags & ITPF_P_TUNNEL) && !is_icmp) {
+			ip_drop_packet_chain(message, B_TRUE, NULL,
+			    NULL, &ipdrops_spd_explicit, &spd_dropper);
+			return (B_FALSE);
+		}
+	}
+
+	/*
+	 * NOTE:  If we reach here, we will not have packet chains from
+	 * fragcache_add(), because the only way I get chains is on a
+	 * tunnel-mode tunnel, which either returns with a pass, or gets
+	 * hit by the ip_drop_packet_chain() call right above here.
+	 */
+
+	/* If no per-tunnel security, check global policy now. */
+	if (ipsec_mp != NULL &&
+	    (((outer_ipv4 != NULL) && !ipsec_inbound_v4_policy_present) ||
+		((outer_ipv6 != NULL) && !ipsec_inbound_v6_policy_present))) {
+		if (((ipsec_in_t *)(ipsec_mp->b_rptr))->
+		    ipsec_in_icmp_loopback) {
+			/*
+			 * This is an ICMP message with an ipsec_mp
+			 * attached.  We should accept it.
+			 */
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
+			return (B_TRUE);
+		}
+
+		ip_drop_packet(ipsec_mp, B_TRUE, NULL, NULL,
+		    &ipdrops_spd_got_secure, &spd_dropper);
+		return (B_FALSE);
+	}
+
+	/* NOTE:  Frees message if it returns NULL. */
+	if (ipsec_check_global_policy(message, NULL, outer_ipv4, outer_ipv6,
+		(ipsec_mp != NULL)) == NULL) {
+		return (B_FALSE);
+	}
+
+	if (ipsec_mp != NULL)
+		freeb(ipsec_mp);
+
+	/*
+	 * At this point, we pretend it's a cleartext accepted
+	 * packet.
+	 */
+	return (B_TRUE);
+}
+
+/*
+ * AVL comparison routine for our list of tunnel polheads.
+ */
+static int
+tunnel_compare(const void *arg1, const void *arg2)
+{
+	ipsec_tun_pol_t *left, *right;
+	int rc;
+
+	left = (ipsec_tun_pol_t *)arg1;
+	right = (ipsec_tun_pol_t *)arg2;
+
+	rc = strncmp(left->itp_name, right->itp_name, LIFNAMSIZ);
+	return (rc == 0 ? rc : (rc > 0 ? 1 : -1));
+}
+
+/*
+ * Free a tunnel policy node.
+ */
+void
+itp_free(ipsec_tun_pol_t *node)
+{
+	IPPH_REFRELE(node->itp_policy);
+	IPPH_REFRELE(node->itp_inactive);
+	mutex_destroy(&node->itp_lock);
+	kmem_free(node, sizeof (*node));
+}
+
+void
+itp_unlink(ipsec_tun_pol_t *node)
+{
+	rw_enter(&tunnel_policy_lock, RW_WRITER);
+	tunnel_policy_gen++;
+	ipsec_fragcache_uninit(&node->itp_fragcache);
+	avl_remove(&tunnel_policies, node);
+	rw_exit(&tunnel_policy_lock);
+	ITP_REFRELE(node);
+}
+
+/*
+ * Public interface to look up a tunnel security policy by name.  Used by
+ * spdsock mostly.  Returns "node" with a bumped refcnt.
+ */
+ipsec_tun_pol_t *
+get_tunnel_policy(char *name)
+{
+	ipsec_tun_pol_t *node, lookup;
+
+	(void) strncpy(lookup.itp_name, name, LIFNAMSIZ);
+
+	rw_enter(&tunnel_policy_lock, RW_READER);
+	node = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, &lookup, NULL);
+	if (node != NULL) {
+		ITP_REFHOLD(node);
+	}
+	rw_exit(&tunnel_policy_lock);
+
+	return (node);
+}
+
+/*
+ * Public interface to walk all tunnel security polcies.  Useful for spdsock
+ * DUMP operations.  iterator() will not consume a reference.
+ */
+void
+itp_walk(void (*iterator)(ipsec_tun_pol_t *, void *), void *arg)
+{
+	ipsec_tun_pol_t *node;
+
+	rw_enter(&tunnel_policy_lock, RW_READER);
+	for (node = avl_first(&tunnel_policies); node != NULL;
+	    node = AVL_NEXT(&tunnel_policies, node)) {
+		iterator(node, arg);
+	}
+	rw_exit(&tunnel_policy_lock);
+}
+
+/*
+ * Initialize policy head.  This can only fail if there's a memory problem.
+ */
+static boolean_t
+tunnel_polhead_init(ipsec_policy_head_t *iph)
+{
+	rw_init(&iph->iph_lock, NULL, RW_DEFAULT, NULL);
+	iph->iph_refs = 1;
+	iph->iph_gen = 0;
+	if (ipsec_alloc_table(iph, tun_spd_hashsize, KM_SLEEP, B_FALSE) != 0) {
+		ipsec_polhead_free_table(iph);
+		return (B_FALSE);
+	}
+	ipsec_polhead_init(iph, tun_spd_hashsize);
+	return (B_TRUE);
+}
+
+/*
+ * Create a tunnel policy node with "name".  Set errno with
+ * ENOMEM if there's a memory problem, and EEXIST if there's an existing
+ * node.
+ */
+ipsec_tun_pol_t *
+create_tunnel_policy(char *name, int *errno, uint64_t *gen)
+{
+	ipsec_tun_pol_t *newbie, *existing;
+	avl_index_t where;
+
+	newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
+	if (newbie == NULL) {
+		*errno = ENOMEM;
+		return (NULL);
+	}
+	if (!ipsec_fragcache_init(&newbie->itp_fragcache)) {
+		kmem_free(newbie, sizeof (*newbie));
+		*errno = ENOMEM;
+		return (NULL);
+	}
+
+	(void) strncpy(newbie->itp_name, name, LIFNAMSIZ);
+
+	rw_enter(&tunnel_policy_lock, RW_WRITER);
+	existing = (ipsec_tun_pol_t *)avl_find(&tunnel_policies, newbie,
+	    &where);
+	if (existing != NULL) {
+		itp_free(newbie);
+		*errno = EEXIST;
+		rw_exit(&tunnel_policy_lock);
+		return (NULL);
+	}
+	tunnel_policy_gen++;
+	*gen = tunnel_policy_gen;
+	newbie->itp_refcnt = 2;	/* One for the caller, one for the tree. */
+	newbie->itp_next_policy_index = 1;
+	avl_insert(&tunnel_policies, newbie, where);
+	mutex_init(&newbie->itp_lock, NULL, MUTEX_DEFAULT, NULL);
+	newbie->itp_policy = kmem_zalloc(sizeof (ipsec_policy_head_t),
+	    KM_NOSLEEP);
+	if (newbie->itp_policy == NULL)
+		goto nomem;
+	newbie->itp_inactive = kmem_zalloc(sizeof (ipsec_policy_head_t),
+	    KM_NOSLEEP);
+	if (newbie->itp_inactive == NULL) {
+		kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t));
+		goto nomem;
+	}
+
+	if (!tunnel_polhead_init(newbie->itp_policy)) {
+		kmem_free(newbie->itp_policy, sizeof (ipsec_policy_head_t));
+		kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t));
+		goto nomem;
+	} else if (!tunnel_polhead_init(newbie->itp_inactive)) {
+		IPPH_REFRELE(newbie->itp_policy);
+		kmem_free(newbie->itp_inactive, sizeof (ipsec_policy_head_t));
+		goto nomem;
+	}
+	rw_exit(&tunnel_policy_lock);
+
+	return (newbie);
+nomem:
+	*errno = ENOMEM;
+	kmem_free(newbie, sizeof (*newbie));
+	return (NULL);
+}
+
+/*
+ * We can't call the tun_t lookup function until tun is
+ * loaded, so create a dummy function to avoid symbol
+ * lookup errors on boot.
+ */
+/* ARGSUSED */
+ipsec_tun_pol_t *
+itp_get_byaddr_dummy(uint32_t *laddr, uint32_t *faddr, int af)
+{
+	return (NULL);  /* Always return NULL. */
+}
+
+/*
+ * Frag cache code, based on SunScreen 3.2 source
+ *	screen/kernel/common/screen_fragcache.c
+ */
+
+#define	IPSEC_FRAG_TTL_MAX	5
+/*
+ * Note that the following parameters create 256 hash buckets
+ * with 1024 free entries to be distributed.  Things are cleaned
+ * periodically and are attempted to be cleaned when there is no
+ * free space, but this system errs on the side of dropping packets
+ * over creating memory exhaustion.  We may decide to make hash
+ * factor a tunable if this proves to be a bad decision.
+ */
+#define	IPSEC_FRAG_HASH_SLOTS	(1<<8)
+#define	IPSEC_FRAG_HASH_FACTOR	4
+#define	IPSEC_FRAG_HASH_SIZE	(IPSEC_FRAG_HASH_SLOTS * IPSEC_FRAG_HASH_FACTOR)
+
+#define	IPSEC_FRAG_HASH_MASK		(IPSEC_FRAG_HASH_SLOTS - 1)
+#define	IPSEC_FRAG_HASH_FUNC(id)	(((id) & IPSEC_FRAG_HASH_MASK) ^ \
+					    (((id) / \
+					    (ushort_t)IPSEC_FRAG_HASH_SLOTS) & \
+					    IPSEC_FRAG_HASH_MASK))
+
+/* Maximum fragments per packet.  48 bytes payload x 1366 packets > 64KB */
+#define	IPSEC_MAX_FRAGS		1366
+
+#define	V4_FRAG_OFFSET(ipha) ((ntohs(ipha->ipha_fragment_offset_and_flags) & \
+				    IPH_OFFSET) << 3)
+#define	V4_MORE_FRAGS(ipha) (ntohs(ipha->ipha_fragment_offset_and_flags) & \
+		IPH_MF)
+
+/*
+ * Initialize an ipsec fragcache instance.
+ * Returns B_FALSE if memory allocation fails.
+ */
+boolean_t
+ipsec_fragcache_init(ipsec_fragcache_t *frag)
+{
+	ipsec_fragcache_entry_t *ftemp;
+	int i;
+
+	mutex_init(&frag->itpf_lock, NULL, MUTEX_DEFAULT, NULL);
+	frag->itpf_ptr = (ipsec_fragcache_entry_t **)
+		kmem_zalloc(
+		    sizeof (ipsec_fragcache_entry_t *) *
+		    IPSEC_FRAG_HASH_SLOTS, KM_NOSLEEP);
+	if (frag->itpf_ptr == NULL)
+		return (B_FALSE);
+
+	ftemp = (ipsec_fragcache_entry_t *)
+		kmem_zalloc(sizeof (ipsec_fragcache_entry_t) *
+		    IPSEC_FRAG_HASH_SIZE, KM_NOSLEEP);
+	if (ftemp == NULL) {
+		kmem_free(frag->itpf_ptr,
+		    sizeof (ipsec_fragcache_entry_t *) *
+		    IPSEC_FRAG_HASH_SLOTS);
+		return (B_FALSE);
+	}
+
+	frag->itpf_freelist = NULL;
+
+	for (i = 0; i < IPSEC_FRAG_HASH_SIZE; i++) {
+	    ftemp->itpfe_next = frag->itpf_freelist;
+	    frag->itpf_freelist = ftemp;
+	    ftemp++;
+	}
+
+	frag->itpf_expire_hint = 0;
+
+	return (B_TRUE);
+}
+
+void
+ipsec_fragcache_uninit(ipsec_fragcache_t *frag)
+{
+	ipsec_fragcache_entry_t *fep;
+	int i;
+
+	mutex_enter(&frag->itpf_lock);
+	if (frag->itpf_ptr) {
+		/* Delete any existing fragcache entry chains */
+		for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) {
+			fep = (frag->itpf_ptr)[i];
+			while (fep != NULL) {
+				/* Returned fep is next in chain or NULL */
+				fep = fragcache_delentry(i, fep, frag);
+			}
+		}
+		/*
+		 * Chase the pointers back to the beginning
+		 * of the memory allocation and then
+		 * get rid of the allocated freelist
+		 */
+		while (frag->itpf_freelist->itpfe_next != NULL)
+			frag->itpf_freelist = frag->itpf_freelist->itpfe_next;
+		/*
+		 * XXX - If we ever dynamically grow the freelist
+		 * then we'll have to free entries individually
+		 * or determine how many entries or chunks we have
+		 * grown since the initial allocation.
+		 */
+		kmem_free(frag->itpf_freelist,
+		    sizeof (ipsec_fragcache_entry_t) *
+		    IPSEC_FRAG_HASH_SIZE);
+		/* Free the fragcache structure */
+		kmem_free(frag->itpf_ptr,
+		    sizeof (ipsec_fragcache_entry_t *) *
+		    IPSEC_FRAG_HASH_SLOTS);
+	}
+	mutex_exit(&frag->itpf_lock);
+	mutex_destroy(&frag->itpf_lock);
+}
+
+/*
+ * Add a fragment to the fragment cache.   Consumes mp if NULL is returned.
+ * Returns mp if a whole fragment has been assembled, NULL otherwise
+ */
+
+mblk_t *
+ipsec_fragcache_add(ipsec_fragcache_t *frag, mblk_t *ipsec_mp, mblk_t *mp,
+    int outer_hdr_len)
+{
+	boolean_t is_v4;
+	time_t itpf_time;
+	ipha_t *iph;
+	ipha_t *oiph;
+	ip6_t *ip6h = NULL;
+	uint8_t v6_proto;
+	uint8_t *v6_proto_p;
+	uint16_t ip6_hdr_length;
+	ip6_pkt_t ipp;
+	ip6_frag_t *fraghdr;
+	ipsec_fragcache_entry_t *fep;
+	int i;
+	mblk_t *nmp, *prevmp, *spare_mp = NULL;
+	int firstbyte, lastbyte;
+	int offset;
+	int last;
+	boolean_t inbound = (ipsec_mp != NULL);
+	mblk_t *first_mp = inbound ? ipsec_mp : mp;
+
+	mutex_enter(&frag->itpf_lock);
+
+	oiph  = (ipha_t *)mp->b_rptr;
+	iph  = (ipha_t *)(mp->b_rptr + outer_hdr_len);
+	if (IPH_HDR_VERSION(iph) == IPV4_VERSION) {
+		is_v4 = B_TRUE;
+	} else {
+		ASSERT(IPH_HDR_VERSION(iph) == IPV6_VERSION);
+		if ((spare_mp = msgpullup(mp, -1)) == NULL) {
+			mutex_exit(&frag->itpf_lock);
+			ip_drop_packet(first_mp, inbound, NULL, NULL,
+			    &ipdrops_spd_nomem, &spd_dropper);
+			return (NULL);
+		}
+		ip6h = (ip6_t *)(spare_mp->b_rptr + outer_hdr_len);
+
+		if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h, &ip6_hdr_length,
+		    &v6_proto_p)) {
+			/*
+			 * Find upper layer protocol.
+			 * If it fails we have a malformed packet
+			 */
+			mutex_exit(&frag->itpf_lock);
+			ip_drop_packet(first_mp, inbound, NULL, NULL,
+			    &ipdrops_spd_malformed_packet, &spd_dropper);
+			freemsg(spare_mp);
+			return (NULL);
+		} else {
+			v6_proto = *v6_proto_p;
+		}
+
+
+		bzero(&ipp, sizeof (ipp));
+		(void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL);
+		if (!(ipp.ipp_fields & IPPF_FRAGHDR)) {
+			/*
+			 * We think this is a fragment, but didn't find
+			 * a fragment header.  Something is wrong.
+			 */
+			mutex_exit(&frag->itpf_lock);
+			ip_drop_packet(first_mp, inbound, NULL, NULL,
+			    &ipdrops_spd_malformed_frag, &spd_dropper);
+			freemsg(spare_mp);
+			return (NULL);
+		}
+		fraghdr = ipp.ipp_fraghdr;
+		is_v4 = B_FALSE;
+	}
+
+	/* Anything to cleanup? */
+
+	/*
+	 * This cleanup call could be put in a timer loop
+	 * but it may actually be just as reasonable a decision to
+	 * leave it here.  The disadvantage is this only gets called when
+	 * frags are added.  The advantage is that it is not
+	 * susceptible to race conditions like a time-based cleanup
+	 * may be.
+	 */
+	itpf_time = gethrestime_sec();
+	if (itpf_time >= frag->itpf_expire_hint)
+		ipsec_fragcache_clean(frag);
+
+	/* Lookup to see if there is an existing entry */
+
+	if (is_v4)
+		i = IPSEC_FRAG_HASH_FUNC(iph->ipha_ident);
+	else
+		i = IPSEC_FRAG_HASH_FUNC(fraghdr->ip6f_ident);
+
+	for (fep = (frag->itpf_ptr)[i]; fep; fep = fep->itpfe_next) {
+		if (is_v4) {
+			ASSERT(iph != NULL);
+			if ((fep->itpfe_id == iph->ipha_ident) &&
+			    (fep->itpfe_src == iph->ipha_src) &&
+			    (fep->itpfe_dst == iph->ipha_dst) &&
+			    (fep->itpfe_proto == iph->ipha_protocol))
+				break;
+		} else {
+			ASSERT(fraghdr != NULL);
+			ASSERT(fep != NULL);
+			if ((fep->itpfe_id == fraghdr->ip6f_ident) &&
+			    IN6_ARE_ADDR_EQUAL(&fep->itpfe_src6,
+			    &ip6h->ip6_src) &&
+			    IN6_ARE_ADDR_EQUAL(&fep->itpfe_dst6,
+			    &ip6h->ip6_dst) && (fep->itpfe_proto == v6_proto))
+				break;
+		}
+	}
+
+	if (is_v4) {
+		firstbyte = V4_FRAG_OFFSET(iph);
+		lastbyte  = firstbyte + ntohs(iph->ipha_length) -
+		    IPH_HDR_LENGTH(iph);
+		last = (V4_MORE_FRAGS(iph) == 0);
+#ifdef FRAGCACHE_DEBUG
+		cmn_err(CE_WARN, "V4 fragcache: firstbyte = %d, lastbyte = %d, "
+		    "last = %d, id = %d\n", firstbyte, lastbyte, last,
+		    iph->ipha_ident);
+#endif
+	} else {
+		firstbyte = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
+		lastbyte  = firstbyte + ntohs(ip6h->ip6_plen) +
+		    sizeof (ip6_t) - ip6_hdr_length;
+		last = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG) == 0;
+#ifdef FRAGCACHE_DEBUG
+		cmn_err(CE_WARN, "V6 fragcache: firstbyte = %d, lastbyte = %d, "
+		    "last = %d, id = %d, fraghdr = %p, spare_mp = %p\n",
+		    firstbyte, lastbyte, last, fraghdr->ip6f_ident,
+		    fraghdr, spare_mp);
+#endif
+	}
+
+	/* check for bogus fragments and delete the entry */
+	if (firstbyte > 0 && firstbyte <= 8) {
+		if (fep != NULL)
+			(void) fragcache_delentry(i, fep, frag);
+		mutex_exit(&frag->itpf_lock);
+		ip_drop_packet(first_mp, inbound, NULL, NULL,
+		    &ipdrops_spd_malformed_frag, &spd_dropper);
+		freemsg(spare_mp);
+		return (NULL);
+	}
+
+	/* Not found, allocate a new entry */
+	if (fep == NULL) {
+		if (frag->itpf_freelist == NULL) {
+			/* see if there is some space */
+			ipsec_fragcache_clean(frag);
+			if (frag->itpf_freelist == NULL) {
+				mutex_exit(&frag->itpf_lock);
+				ip_drop_packet(first_mp, inbound, NULL, NULL,
+				    &ipdrops_spd_nomem, &spd_dropper);
+				freemsg(spare_mp);
+				return (NULL);
+			}
+		}
+
+		fep = frag->itpf_freelist;
+		frag->itpf_freelist = fep->itpfe_next;
+
+		if (is_v4) {
+			bcopy((caddr_t)&iph->ipha_src, (caddr_t)&fep->itpfe_src,
+			    sizeof (struct in_addr));
+			bcopy((caddr_t)&iph->ipha_dst, (caddr_t)&fep->itpfe_dst,
+			    sizeof (struct in_addr));
+			fep->itpfe_id = iph->ipha_ident;
+			fep->itpfe_proto = iph->ipha_protocol;
+			i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id);
+		} else {
+			bcopy((in6_addr_t *)&ip6h->ip6_src,
+			    (in6_addr_t *)&fep->itpfe_src6,
+			    sizeof (struct in6_addr));
+			bcopy((in6_addr_t *)&ip6h->ip6_dst,
+			    (in6_addr_t *)&fep->itpfe_dst6,
+			    sizeof (struct in6_addr));
+			fep->itpfe_id = fraghdr->ip6f_ident;
+			fep->itpfe_proto = v6_proto;
+			i = IPSEC_FRAG_HASH_FUNC(fep->itpfe_id);
+		}
+		itpf_time = gethrestime_sec();
+		fep->itpfe_exp = itpf_time + IPSEC_FRAG_TTL_MAX + 1;
+		fep->itpfe_last = 0;
+		fep->itpfe_fraglist = NULL;
+		fep->itpfe_depth = 0;
+		fep->itpfe_next = (frag->itpf_ptr)[i];
+		(frag->itpf_ptr)[i] = fep;
+
+		if (frag->itpf_expire_hint > fep->itpfe_exp)
+			frag->itpf_expire_hint = fep->itpfe_exp;
+
+	}
+	freemsg(spare_mp);
+
+	/* Insert it in the frag list */
+	/* List is in order by starting offset of fragments */
+
+	prevmp = NULL;
+	for (nmp = fep->itpfe_fraglist; nmp; nmp = nmp->b_next) {
+		ipha_t *niph;
+		ipha_t *oniph;
+		ip6_t *nip6h;
+		ip6_pkt_t nipp;
+		ip6_frag_t *nfraghdr;
+		uint16_t nip6_hdr_length;
+		uint8_t *nv6_proto_p;
+		int nfirstbyte, nlastbyte;
+		char *data, *ndata;
+		mblk_t *nspare_mp = NULL;
+		mblk_t *ndata_mp = (inbound ? nmp->b_cont : nmp);
+		int hdr_len;
+
+		oniph  = (ipha_t *)mp->b_rptr;
+		nip6h = NULL;
+		niph = NULL;
+
+		/*
+		 * Determine outer header type and length and set
+		 * pointers appropriately
+		 */
+
+		if (IPH_HDR_VERSION(oniph) == IPV4_VERSION) {
+			hdr_len = ((outer_hdr_len != 0) ?
+			    IPH_HDR_LENGTH(oiph) : 0);
+			niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len);
+		} else {
+			ASSERT(IPH_HDR_VERSION(oniph) == IPV6_VERSION);
+			if ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL) {
+				mutex_exit(&frag->itpf_lock);
+				ip_drop_packet_chain(nmp, inbound, NULL, NULL,
+				    &ipdrops_spd_nomem, &spd_dropper);
+				return (NULL);
+			}
+			nip6h = (ip6_t *)nspare_mp->b_rptr;
+			(void) ip_hdr_length_nexthdr_v6(nspare_mp, nip6h,
+			    &nip6_hdr_length, &v6_proto_p);
+			hdr_len = ((outer_hdr_len != 0) ? nip6_hdr_length : 0);
+		}
+
+		/*
+		 * Determine inner header type and length and set
+		 * pointers appropriately
+		 */
+
+		if (is_v4) {
+			if (niph == NULL) {
+				/* Was v6 outer */
+				niph = (ipha_t *)(ndata_mp->b_rptr + hdr_len);
+			}
+			nfirstbyte = V4_FRAG_OFFSET(niph);
+			nlastbyte = nfirstbyte + ntohs(niph->ipha_length) -
+			    IPH_HDR_LENGTH(niph);
+		} else {
+			if ((nspare_mp == NULL) &&
+			    ((nspare_mp = msgpullup(ndata_mp, -1)) == NULL)) {
+				mutex_exit(&frag->itpf_lock);
+				ip_drop_packet_chain(nmp, inbound, NULL, NULL,
+				    &ipdrops_spd_nomem, &spd_dropper);
+				return (NULL);
+			}
+			nip6h = (ip6_t *)(nspare_mp->b_rptr + hdr_len);
+			if (!ip_hdr_length_nexthdr_v6(nspare_mp, nip6h,
+			    &nip6_hdr_length, &nv6_proto_p)) {
+				mutex_exit(&frag->itpf_lock);
+			    ip_drop_packet_chain(nmp, inbound, NULL, NULL,
+				&ipdrops_spd_malformed_frag, &spd_dropper);
+			    ipsec_freemsg_chain(nspare_mp);
+			    return (NULL);
+			}
+			bzero(&nipp, sizeof (nipp));
+			(void) ip_find_hdr_v6(nspare_mp, nip6h, &nipp, NULL);
+			nfraghdr = nipp.ipp_fraghdr;
+			nfirstbyte = ntohs(nfraghdr->ip6f_offlg &
+			    IP6F_OFF_MASK);
+			nlastbyte  = nfirstbyte + ntohs(nip6h->ip6_plen) +
+			    sizeof (ip6_t) - nip6_hdr_length;
+		}
+		ipsec_freemsg_chain(nspare_mp);
+
+		/* Check for overlapping fragments */
+		if (firstbyte >= nfirstbyte && firstbyte < nlastbyte) {
+			/*
+			 * Overlap Check:
+			 *  ~~~~---------		# Check if the newly
+			 * ~	ndata_mp|		# received fragment
+			 *  ~~~~---------		# overlaps with the
+			 *	 ---------~~~~~~	# current fragment.
+			 *	|    mp		~
+			 *	 ---------~~~~~~
+			 */
+			if (is_v4) {
+				data  = (char *)iph  + IPH_HDR_LENGTH(iph) +
+				    firstbyte - nfirstbyte;
+				ndata = (char *)niph + IPH_HDR_LENGTH(niph);
+			} else {
+				data  = (char *)ip6h  +
+				    nip6_hdr_length + firstbyte -
+				    nfirstbyte;
+				ndata = (char *)nip6h + nip6_hdr_length;
+			}
+			if (bcmp(data, ndata, MIN(lastbyte, nlastbyte)
+			    - firstbyte)) {
+				/* Overlapping data does not match */
+				(void) fragcache_delentry(i, fep, frag);
+				mutex_exit(&frag->itpf_lock);
+				ip_drop_packet(first_mp, inbound, NULL, NULL,
+				    &ipdrops_spd_overlap_frag, &spd_dropper);
+				return (NULL);
+			}
+			/* Part of defense for jolt2.c fragmentation attack */
+			if (firstbyte >= nfirstbyte && lastbyte <= nlastbyte) {
+				/*
+				 * Check for identical or subset fragments:
+				 *  ----------	    ~~~~--------~~~~~
+				 * |    nmp   | or  ~	   nmp	    ~
+				 *  ----------	    ~~~~--------~~~~~
+				 *  ----------		  ------
+				 * |	mp    |		 |  mp  |
+				 *  ----------		  ------
+				 */
+				mutex_exit(&frag->itpf_lock);
+				ip_drop_packet(first_mp, inbound, NULL, NULL,
+				    &ipdrops_spd_evil_frag, &spd_dropper);
+				return (NULL);
+			}
+
+		}
+
+		/* Correct location for this fragment? */
+		if (firstbyte <= nfirstbyte) {
+			/*
+			 * Check if the tail end of the new fragment overlaps
+			 * with the head of the current fragment.
+			 *	  --------~~~~~~~
+			 *	 |    nmp	~
+			 *	  --------~~~~~~~
+			 *  ~~~~~--------
+			 *  ~	mp	 |
+			 *  ~~~~~--------
+			 */
+			if (lastbyte > nfirstbyte) {
+				/* Fragments overlap */
+				data  = (char *)iph  + IPH_HDR_LENGTH(iph) +
+				    firstbyte - nfirstbyte;
+				ndata = (char *)niph + IPH_HDR_LENGTH(niph);
+				if (is_v4) {
+					data  = (char *)iph +
+					    IPH_HDR_LENGTH(iph) + firstbyte -
+					    nfirstbyte;
+					ndata = (char *)niph +
+					    IPH_HDR_LENGTH(niph);
+				} else {
+					data  = (char *)ip6h  +
+					    nip6_hdr_length + firstbyte -
+					    nfirstbyte;
+					ndata = (char *)nip6h + nip6_hdr_length;
+				}
+				if (bcmp(data, ndata, MIN(lastbyte, nlastbyte)
+				    - nfirstbyte)) {
+					/* Overlap mismatch */
+					(void) fragcache_delentry(i, fep, frag);
+					mutex_exit(&frag->itpf_lock);
+					ip_drop_packet(first_mp, inbound, NULL,
+					    NULL, &ipdrops_spd_overlap_frag,
+					    &spd_dropper);
+					return (NULL);
+				}
+			}
+
+			/*
+			 * Fragment does not illegally overlap and can now
+			 * be inserted into the chain
+			 */
+			break;
+		}
+
+		prevmp = nmp;
+	}
+	first_mp->b_next = nmp;
+
+	if (prevmp == NULL) {
+		fep->itpfe_fraglist = first_mp;
+	} else {
+		prevmp->b_next = first_mp;
+	}
+	if (last)
+		fep->itpfe_last = 1;
+
+	/* Part of defense for jolt2.c fragmentation attack */
+	if (++(fep->itpfe_depth) > IPSEC_MAX_FRAGS) {
+		(void) fragcache_delentry(i, fep, frag);
+		mutex_exit(&frag->itpf_lock);
+		ip_drop_packet(first_mp, inbound, NULL, NULL,
+		    &ipdrops_spd_max_frags, &spd_dropper);
+		return (NULL);
+	}
+
+	/* Check for complete packet */
+
+	if (!fep->itpfe_last) {
+		mutex_exit(&frag->itpf_lock);
+#ifdef FRAGCACHE_DEBUG
+		cmn_err(CE_WARN, "Fragment cached, not last.\n");
+#endif
+		return (NULL);
+	}
+
+#ifdef FRAGCACHE_DEBUG
+	cmn_err(CE_WARN, "Last fragment cached.\n");
+	cmn_err(CE_WARN, "mp = %p, first_mp = %p.\n", mp, first_mp);
+#endif
+
+	offset = 0;
+	for (mp = fep->itpfe_fraglist; mp; mp = mp->b_next) {
+		mblk_t *data_mp = (inbound ? mp->b_cont : mp);
+		int hdr_len;
+
+		oiph  = (ipha_t *)data_mp->b_rptr;
+		ip6h = NULL;
+		iph = NULL;
+
+		spare_mp = NULL;
+		if (IPH_HDR_VERSION(oiph) == IPV4_VERSION) {
+			hdr_len = ((outer_hdr_len != 0) ?
+			    IPH_HDR_LENGTH(oiph) : 0);
+			iph = (ipha_t *)(data_mp->b_rptr + hdr_len);
+		} else {
+			ASSERT(IPH_HDR_VERSION(oiph) == IPV6_VERSION);
+			if ((spare_mp = msgpullup(data_mp, -1)) == NULL) {
+				mutex_exit(&frag->itpf_lock);
+				ip_drop_packet_chain(mp, inbound, NULL, NULL,
+				    &ipdrops_spd_nomem, &spd_dropper);
+				return (NULL);
+			}
+			ip6h = (ip6_t *)spare_mp->b_rptr;
+			(void) ip_hdr_length_nexthdr_v6(spare_mp, ip6h,
+			    &ip6_hdr_length, &v6_proto_p);
+			hdr_len = ((outer_hdr_len != 0) ? ip6_hdr_length : 0);
+		}
+
+		/* Calculate current fragment start/end */
+		if (is_v4) {
+			if (iph == NULL) {
+				/* Was v6 outer */
+				iph = (ipha_t *)(data_mp->b_rptr + hdr_len);
+			}
+			firstbyte = V4_FRAG_OFFSET(iph);
+			lastbyte = firstbyte + ntohs(iph->ipha_length) -
+			    IPH_HDR_LENGTH(iph);
+		} else {
+			if ((spare_mp == NULL) &&
+				((spare_mp = msgpullup(data_mp, -1)) == NULL)) {
+				mutex_exit(&frag->itpf_lock);
+				ip_drop_packet_chain(mp, inbound, NULL, NULL,
+				    &ipdrops_spd_nomem, &spd_dropper);
+				return (NULL);
+			}
+			ip6h = (ip6_t *)(spare_mp->b_rptr + hdr_len);
+			if (!ip_hdr_length_nexthdr_v6(spare_mp, ip6h,
+			    &ip6_hdr_length, &v6_proto_p)) {
+				mutex_exit(&frag->itpf_lock);
+				ip_drop_packet_chain(mp, inbound, NULL, NULL,
+				    &ipdrops_spd_malformed_frag, &spd_dropper);
+				ipsec_freemsg_chain(spare_mp);
+				return (NULL);
+			}
+			v6_proto = *v6_proto_p;
+			bzero(&ipp, sizeof (ipp));
+			(void) ip_find_hdr_v6(spare_mp, ip6h, &ipp, NULL);
+			fraghdr = ipp.ipp_fraghdr;
+			firstbyte = ntohs(fraghdr->ip6f_offlg &
+			    IP6F_OFF_MASK);
+			lastbyte  = firstbyte + ntohs(ip6h->ip6_plen) +
+			    sizeof (ip6_t) - ip6_hdr_length;
+		}
+
+		/*
+		 * If this fragment is greater than current offset,
+		 * we have a missing fragment so return NULL
+		 */
+		if (firstbyte > offset) {
+			mutex_exit(&frag->itpf_lock);
+#ifdef FRAGCACHE_DEBUG
+			/*
+			 * Note, this can happen when the last frag
+			 * gets sent through because it is smaller
+			 * than the MTU.  It is not necessarily an
+			 * error condition.
+			 */
+			cmn_err(CE_WARN, "Frag greater than offset! : "
+			    "missing fragment: firstbyte = %d, offset = %d, "
+			    "mp = %p\n", firstbyte, offset, mp);
+#endif
+			ipsec_freemsg_chain(spare_mp);
+			return (NULL);
+		}
+
+		/*
+		 * If we are at the last fragment, we have the complete
+		 * packet, so rechain things and return it to caller
+		 * for processing
+		 */
+
+		if ((is_v4 && !V4_MORE_FRAGS(iph)) ||
+		    (!is_v4 && !(fraghdr->ip6f_offlg & IP6F_MORE_FRAG))) {
+			mp = fep->itpfe_fraglist;
+			fep->itpfe_fraglist = NULL;
+			(void) fragcache_delentry(i, fep, frag);
+			mutex_exit(&frag->itpf_lock);
+
+			if ((is_v4 && (firstbyte + ntohs(iph->ipha_length) >
+			    65535)) || (!is_v4 && (firstbyte +
+			    ntohs(ip6h->ip6_plen) > 65535))) {
+				/* It is an invalid "ping-o-death" packet */
+				/* Discard it */
+				ip_drop_packet_chain(mp, inbound, NULL, NULL,
+				    &ipdrops_spd_evil_frag, &spd_dropper);
+				ipsec_freemsg_chain(spare_mp);
+				return (NULL);
+			}
+#ifdef FRAGCACHE_DEBUG
+			cmn_err(CE_WARN, "Fragcache returning mp = %p, "
+				"mp->b_next = %p", mp, mp->b_next);
+#endif
+			ipsec_freemsg_chain(spare_mp);
+			/*
+			 * For inbound case, mp has ipsec_in b_next'd chain
+			 * For outbound case, it is just data mp chain
+			 */
+			return (mp);
+		}
+		ipsec_freemsg_chain(spare_mp);
+
+		/*
+		 * Update new ending offset if this
+		 * fragment extends the packet
+		 */
+		if (offset < lastbyte)
+			offset = lastbyte;
+	}
+
+	mutex_exit(&frag->itpf_lock);
+
+	/* Didn't find last fragment, so return NULL */
+	return (NULL);
+}
+
+static void
+ipsec_fragcache_clean(ipsec_fragcache_t *frag)
+{
+	ipsec_fragcache_entry_t *fep;
+	int i;
+	ipsec_fragcache_entry_t *earlyfep = NULL;
+	time_t itpf_time;
+	int earlyexp;
+	int earlyi = 0;
+
+	ASSERT(MUTEX_HELD(&frag->itpf_lock));
+
+	itpf_time = gethrestime_sec();
+	earlyexp = itpf_time + 10000;
+
+	for (i = 0; i < IPSEC_FRAG_HASH_SLOTS; i++) {
+	    fep = (frag->itpf_ptr)[i];
+	    while (fep) {
+		if (fep->itpfe_exp < itpf_time) {
+			/* found */
+			fep = fragcache_delentry(i, fep, frag);
+		} else {
+			if (fep->itpfe_exp < earlyexp) {
+				earlyfep = fep;
+				earlyexp = fep->itpfe_exp;
+				earlyi = i;
+			}
+			fep = fep->itpfe_next;
+		}
+	    }
+	}
+
+	frag->itpf_expire_hint = earlyexp;
+
+	/* if (!found) */
+	if (frag->itpf_freelist == NULL)
+		(void) fragcache_delentry(earlyi, earlyfep, frag);
+}
+
+static ipsec_fragcache_entry_t *
+fragcache_delentry(int slot, ipsec_fragcache_entry_t *fep,
+    ipsec_fragcache_t *frag)
+{
+	ipsec_fragcache_entry_t *targp;
+	ipsec_fragcache_entry_t *nextp = fep->itpfe_next;
+
+	ASSERT(MUTEX_HELD(&frag->itpf_lock));
+
+	/* Free up any fragment list still in cache entry */
+	ipsec_freemsg_chain(fep->itpfe_fraglist);
+
+	targp = (frag->itpf_ptr)[slot];
+	ASSERT(targp != 0);
+
+	if (targp == fep) {
+		/* unlink from head of hash chain */
+		(frag->itpf_ptr)[slot] = nextp;
+		/* link into free list */
+		fep->itpfe_next = frag->itpf_freelist;
+		frag->itpf_freelist = fep;
+		return (nextp);
+	}
+
+	/* maybe should use double linked list to make update faster */
+	/* must be past front of chain */
+	while (targp) {
+	    if (targp->itpfe_next == fep) {
+		    /* unlink from hash chain */
+		    targp->itpfe_next = nextp;
+		    /* link into free list */
+		    fep->itpfe_next = frag->itpf_freelist;
+		    frag->itpf_freelist = fep;
+		    return (nextp);
+	    }
+	    targp = targp->itpfe_next;
+	    ASSERT(targp != 0);
+	}
+	/* NOTREACHED */
+	return (NULL);
+}
--- a/usr/src/uts/common/inet/ip/spdsock.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/spdsock.c	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -42,7 +41,6 @@
 #include <sys/debug.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
-#include <sys/proc.h>
 #include <sys/suntpi.h>
 #include <sys/policy.h>
 
@@ -57,6 +55,8 @@
 #include <inet/ip6.h>
 #include <inet/mi.h>
 #include <inet/nd.h>
+#include <inet/ip_if.h>
+#include <inet/tun.h>
 #include <inet/optcom.h>
 #include <inet/ipsec_info.h>
 #include <inet/ipsec_impl.h>
@@ -158,6 +158,8 @@
 static void spdsock_rsrv(queue_t *);
 static void spdsock_loadcheck(void *);
 static void spdsock_merge_algs(void);
+static void spdsock_flush_one(ipsec_policy_head_t *);
+static mblk_t *spdsock_dump_next_record(spdsock_t *);
 
 static struct module_info info = {
 	5138, "spdsock", 1, INFPSZ, 512, 128
@@ -192,6 +194,9 @@
 
 #define	NEXECMODES	(sizeof (execmodes) / sizeof (execmodes[0]))
 
+#define	ALL_ACTIVE_POLHEADS ((ipsec_policy_head_t *)-1)
+#define	ALL_INACTIVE_POLHEADS ((ipsec_policy_head_t *)-2)
+
 /* ARGSUSED */
 static int
 spdsock_param_get(q, mp, cp, cr)
@@ -301,7 +306,33 @@
 static boolean_t
 ext_check(spd_ext_t *ext)
 {
-
+	spd_if_t *tunname = (spd_if_t *)ext;
+	int i;
+	char *idstr;
+
+	if (ext->spd_ext_type == SPD_EXT_TUN_NAME) {
+		/* (NOTE:  Modified from SADB_EXT_IDENTITY..) */
+
+		/*
+		 * Make sure the strings in these identities are
+		 * null-terminated.  Let's "proactively" null-terminate the
+		 * string at the last byte if it's not terminated sooner.
+		 */
+		i = SPD_64TO8(tunname->spd_if_len) - sizeof (spd_if_t);
+		idstr = (char *)(tunname + 1);
+		while (*idstr != '\0' && i > 0) {
+			i--;
+			idstr++;
+		}
+		if (i == 0) {
+			/*
+			 * I.e., if the bozo user didn't NULL-terminate the
+			 * string...
+			 */
+			idstr--;
+			*idstr = '\0';
+		}
+	}
 	return (B_TRUE);	/* For now... */
 }
 
@@ -454,14 +485,55 @@
 	qreply(q, mp);
 }
 
-/* ARGSUSED */
+/*
+ * Do NOT consume a reference to itp.
+ */
 static void
-spdsock_flush(queue_t *q, ipsec_policy_head_t *iph,
-    mblk_t *mp, spd_ext_t **extv)
+spdsock_flush_node(ipsec_tun_pol_t *itp, void *cookie)
+{
+	boolean_t active = (boolean_t)cookie;
+	ipsec_policy_head_t *iph;
+
+	iph = active ? itp->itp_policy : itp->itp_inactive;
+	IPPH_REFHOLD(iph);
+	mutex_enter(&itp->itp_lock);
+	spdsock_flush_one(iph);
+	if (active)
+		itp->itp_flags &= ~ITPF_PFLAGS;
+	else
+		itp->itp_flags &= ~ITPF_IFLAGS;
+	mutex_exit(&itp->itp_lock);
+}
+
+/*
+ * Clear out one polhead.
+ */
+static void
+spdsock_flush_one(ipsec_policy_head_t *iph)
 {
 	rw_enter(&iph->iph_lock, RW_WRITER);
 	ipsec_polhead_flush(iph);
 	rw_exit(&iph->iph_lock);
+	IPPH_REFRELE(iph);
+}
+
+static void
+spdsock_flush(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp)
+{
+	boolean_t active;
+
+	if (iph != ALL_ACTIVE_POLHEADS && iph != ALL_INACTIVE_POLHEADS) {
+		spdsock_flush_one(iph);
+	} else {
+		active = (iph == ALL_ACTIVE_POLHEADS);
+
+		/* First flush the global policy. */
+		spdsock_flush_one(active ? ipsec_system_policy() :
+		    ipsec_inactive_policy());
+
+		/* Then flush every tunnel's appropriate one. */
+		itp_walk(spdsock_flush_node, (void *)active);
+	}
 
 	spd_echo(q, mp);
 }
@@ -592,8 +664,12 @@
  * Sanity check action against reality, and shrink-wrap key sizes..
  */
 static boolean_t
-spdsock_check_action(ipsec_act_t *act, int *diag)
+spdsock_check_action(ipsec_act_t *act, boolean_t tunnel_polhead, int *diag)
 {
+	if (tunnel_polhead && act->ipa_apply.ipp_use_unique) {
+		*diag = SPD_DIAGNOSTIC_ADD_INCON_FLAGS;
+		return (B_FALSE);
+	}
 	if ((act->ipa_type != IPSEC_ACT_APPLY) &&
 	    (act->ipa_apply.ipp_use_ah ||
 		act->ipa_apply.ipp_use_esp ||
@@ -625,6 +701,11 @@
 	struct spd_attribute *attrp, *endattrp;
 	uint64_t *endp;
 	int nact;
+	boolean_t tunnel_polhead;
+
+	tunnel_polhead = (extv[SPD_EXT_TUN_NAME] != NULL &&
+	    (((struct spd_rule *)extv[SPD_EXT_RULE])->spd_rule_flags &
+		SPD_RULE_FLAG_TUNNEL));
 
 	*actpp = NULL;
 	*nactp = 0;
@@ -673,9 +754,10 @@
 				*diag = SPD_DIAGNOSTIC_ADD_WRONG_ACT_COUNT;
 				goto fail;
 			}
-			if (!spdsock_check_action(&act, diag))
+			if (!spdsock_check_action(&act, tunnel_polhead, diag))
 				goto fail;
 			*actp++ = act;
+			spdsock_reset_act(&act);
 			break;
 
 		case SPD_ATTR_TYPE:
@@ -686,6 +768,13 @@
 			break;
 
 		case SPD_ATTR_FLAGS:
+			if (!tunnel_polhead && extv[SPD_EXT_TUN_NAME] != NULL) {
+				/*
+				 * Set "sa unique" for transport-mode
+				 * tunnels whether we want to or not.
+				 */
+				attrp->spd_attr_value |= SPD_APPLY_UNIQUE;
+			}
 			if (!spd_convert_flags(attrp->spd_attr_value, &act)) {
 				*diag = SPD_DIAGNOSTIC_ADD_BAD_FLAGS;
 				goto fail;
@@ -693,14 +782,26 @@
 			break;
 
 		case SPD_ATTR_AH_AUTH:
+			if (attrp->spd_attr_value == 0) {
+				*diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG;
+				goto fail;
+			}
 			act.ipa_apply.ipp_auth_alg = attrp->spd_attr_value;
 			break;
 
 		case SPD_ATTR_ESP_ENCR:
+			if (attrp->spd_attr_value == 0) {
+				*diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG;
+				goto fail;
+			}
 			act.ipa_apply.ipp_encr_alg = attrp->spd_attr_value;
 			break;
 
 		case SPD_ATTR_ESP_AUTH:
+			if (attrp->spd_attr_value == 0) {
+				*diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG;
+				goto fail;
+			}
 			act.ipa_apply.ipp_esp_auth_alg = attrp->spd_attr_value;
 			break;
 
@@ -768,14 +869,15 @@
 static int
 mkrule(ipsec_policy_head_t *iph, struct spd_rule *rule,
     ipsec_selkey_t *sel, ipsec_act_t *actp, int nact, uint_t dir, uint_t af,
-    tmprule_t **rp)
+    tmprule_t **rp, uint64_t *index)
 {
 	ipsec_policy_t *pol;
 
 	sel->ipsl_valid &= ~(IPSL_IPV6|IPSL_IPV4);
 	sel->ipsl_valid |= af;
 
-	pol = ipsec_policy_create(sel, actp, nact, rule->spd_rule_priority);
+	pol = ipsec_policy_create(sel, actp, nact, rule->spd_rule_priority,
+	    index);
 	if (pol == NULL)
 		return (ENOMEM);
 
@@ -793,17 +895,19 @@
 static int
 mkrulepair(ipsec_policy_head_t *iph, struct spd_rule *rule,
     ipsec_selkey_t *sel, ipsec_act_t *actp, int nact, uint_t dir, uint_t afs,
-    tmprule_t **rp)
+    tmprule_t **rp, uint64_t *index)
 {
 	int error;
 
 	if (afs & IPSL_IPV4) {
-		error = mkrule(iph, rule, sel, actp, nact, dir, IPSL_IPV4, rp);
+		error = mkrule(iph, rule, sel, actp, nact, dir, IPSL_IPV4, rp,
+		    index);
 		if (error != 0)
 			return (error);
 	}
 	if (afs & IPSL_IPV6) {
-		error = mkrule(iph, rule, sel, actp, nact, dir, IPSL_IPV6, rp);
+		error = mkrule(iph, rule, sel, actp, nact, dir, IPSL_IPV6, rp,
+		    index);
 		if (error != 0)
 			return (error);
 	}
@@ -812,34 +916,85 @@
 
 
 static void
-spdsock_addrule(queue_t *q, ipsec_policy_head_t *iph,
-    mblk_t *mp, spd_ext_t **extv)
+spdsock_addrule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp,
+    spd_ext_t **extv, ipsec_tun_pol_t *itp)
 {
 	ipsec_selkey_t sel;
 	ipsec_act_t *actp;
 	uint_t nact;
-	int diag, error, afs;
+	int diag = 0, error, afs;
 	struct spd_rule *rule = (struct spd_rule *)extv[SPD_EXT_RULE];
 	tmprule_t rules[4], *rulep = &rules[0];
+	boolean_t tunnel_mode, empty_itp, active;
+	uint64_t *index = (itp == NULL) ? NULL : &itp->itp_next_policy_index;
 
 	if (rule == NULL) {
 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_NO_RULE_EXT);
 		return;
 	}
 
+	tunnel_mode = (rule->spd_rule_flags & SPD_RULE_FLAG_TUNNEL);
+
+	if (itp != NULL) {
+		mutex_enter(&itp->itp_lock);
+		ASSERT(itp->itp_policy == iph || itp->itp_inactive == iph);
+		active = (itp->itp_policy == iph);
+		if (ITP_P_ISACTIVE(itp, iph)) {
+			/* Check for mix-and-match of tunnel/transport. */
+			if ((tunnel_mode && !ITP_P_ISTUNNEL(itp, iph)) ||
+			    (!tunnel_mode && ITP_P_ISTUNNEL(itp, iph))) {
+				mutex_exit(&itp->itp_lock);
+				spdsock_error(q, mp, EBUSY, 0);
+				return;
+			}
+			empty_itp = B_FALSE;
+		} else {
+			empty_itp = B_TRUE;
+			itp->itp_flags = active ? ITPF_P_ACTIVE : ITPF_I_ACTIVE;
+			if (tunnel_mode)
+				itp->itp_flags |= active ? ITPF_P_TUNNEL :
+				    ITPF_I_TUNNEL;
+		}
+	} else {
+		empty_itp = B_FALSE;
+	}
+
 	if (rule->spd_rule_index != 0) {
-		spdsock_diag(q, mp, SPD_DIAGNOSTIC_INVALID_RULE_INDEX);
-		return;
+		diag = SPD_DIAGNOSTIC_INVALID_RULE_INDEX;
+		error = EINVAL;
+		goto fail2;
 	}
 
 	if (!spdsock_ext_to_sel(extv, &sel, &diag)) {
-		spdsock_diag(q, mp, diag);
-		return;
+		error = EINVAL;
+		goto fail2;
+	}
+
+	if (itp != NULL) {
+		if (tunnel_mode) {
+			if (sel.ipsl_valid &
+			    (IPSL_REMOTE_PORT | IPSL_LOCAL_PORT)) {
+				itp->itp_flags |= active ?
+				    ITPF_P_PER_PORT_SECURITY :
+				    ITPF_I_PER_PORT_SECURITY;
+			}
+		} else {
+			/*
+			 * For now, we don't allow transport-mode on a tunnel
+			 * with ANY specific selectors.  Bail if we have such
+			 * a request.
+			 */
+			if (sel.ipsl_valid & IPSL_WILDCARD) {
+				diag = SPD_DIAGNOSTIC_NO_TUNNEL_SELECTORS;
+				error = EINVAL;
+				goto fail2;
+			}
+		}
 	}
 
 	if (!spdsock_ext_to_actvec(extv, &actp, &nact, &diag)) {
-		spdsock_diag(q, mp, diag);
-		return;
+		error = EINVAL;
+		goto fail2;
 	}
 	/*
 	 * If no addresses were specified, add both.
@@ -852,14 +1007,14 @@
 
 	if (rule->spd_rule_flags & SPD_RULE_FLAG_OUTBOUND) {
 		error = mkrulepair(iph, rule, &sel, actp, nact,
-		    IPSEC_TYPE_OUTBOUND, afs, &rulep);
+		    IPSEC_TYPE_OUTBOUND, afs, &rulep, index);
 		if (error != 0)
 			goto fail;
 	}
 
 	if (rule->spd_rule_flags & SPD_RULE_FLAG_INBOUND) {
 		error = mkrulepair(iph, rule, &sel, actp, nact,
-		    IPSEC_TYPE_INBOUND, afs, &rulep);
+		    IPSEC_TYPE_INBOUND, afs, &rulep, index);
 		if (error != 0)
 			goto fail;
 	}
@@ -868,6 +1023,8 @@
 		ipsec_enter_policy(iph, rulep->pol, rulep->dir);
 
 	rw_exit(&iph->iph_lock);
+	if (itp != NULL)
+		mutex_exit(&itp->itp_lock);
 
 	ipsec_actvec_free(actp, nact);
 	spd_echo(q, mp);
@@ -879,55 +1036,115 @@
 		IPPOL_REFRELE(rulep->pol);
 	}
 	ipsec_actvec_free(actp, nact);
-	spdsock_error(q, mp, error, 0);
+fail2:
+	if (itp != NULL) {
+		if (empty_itp)
+			itp->itp_flags = 0;
+		mutex_exit(&itp->itp_lock);
+	}
+	spdsock_error(q, mp, error, diag);
 }
 
 void
-spdsock_deleterule(queue_t *q, ipsec_policy_head_t *iph,
-    mblk_t *mp, spd_ext_t **extv)
+spdsock_deleterule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp,
+    spd_ext_t **extv, ipsec_tun_pol_t *itp)
 {
 	ipsec_selkey_t sel;
 	struct spd_rule *rule = (struct spd_rule *)extv[SPD_EXT_RULE];
-	int diag;
+	int err, diag = 0;
 
 	if (rule == NULL) {
 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_NO_RULE_EXT);
 		return;
 	}
 
+	/*
+	 * Must enter itp_lock first to avoid deadlock.  See tun.c's
+	 * set_sec_simple() for the other case of itp_lock and iph_lock.
+	 */
+	if (itp != NULL)
+		mutex_enter(&itp->itp_lock);
+
 	if (rule->spd_rule_index != 0) {
 		if (ipsec_policy_delete_index(iph, rule->spd_rule_index) != 0) {
-			spdsock_error(q, mp, ESRCH, 0);
-			return;
+			err = ESRCH;
+			goto fail;
 		}
 	} else {
 		if (!spdsock_ext_to_sel(extv, &sel, &diag)) {
-			spdsock_diag(q, mp, diag);
-			return;
+			err = EINVAL;	/* diag already set... */
+			goto fail;
+		}
+
+		if ((rule->spd_rule_flags & SPD_RULE_FLAG_INBOUND) &&
+		    !ipsec_policy_delete(iph, &sel, IPSEC_TYPE_INBOUND)) {
+			err = ESRCH;
+			goto fail;
+		}
+
+		if ((rule->spd_rule_flags & SPD_RULE_FLAG_OUTBOUND) &&
+		    !ipsec_policy_delete(iph, &sel, IPSEC_TYPE_OUTBOUND)) {
+			err = ESRCH;
+			goto fail;
 		}
-
-		if (rule->spd_rule_flags & SPD_RULE_FLAG_INBOUND) {
-			if (!ipsec_policy_delete(iph, &sel,
-			    IPSEC_TYPE_INBOUND))
-				goto fail;
+	}
+
+	if (itp != NULL) {
+		ASSERT(iph == itp->itp_policy || iph == itp->itp_inactive);
+		rw_enter(&iph->iph_lock, RW_READER);
+		if (avl_numnodes(&iph->iph_rulebyid) == 0) {
+			if (iph == itp->itp_policy)
+				itp->itp_flags &= ~ITPF_PFLAGS;
+			else
+				itp->itp_flags &= ~ITPF_IFLAGS;
 		}
-
-		if (rule->spd_rule_flags & SPD_RULE_FLAG_OUTBOUND) {
-			if (!ipsec_policy_delete(iph, &sel,
-			    IPSEC_TYPE_OUTBOUND))
-				goto fail;
-		}
+		/* Can exit locks in any order. */
+		rw_exit(&iph->iph_lock);
+		mutex_exit(&itp->itp_lock);
 	}
 	spd_echo(q, mp);
 	return;
 fail:
-	spdsock_error(q, mp, ESRCH, 0);
+	if (itp != NULL)
+		mutex_exit(&itp->itp_lock);
+	spdsock_error(q, mp, err, diag);
+}
+
+/* Do NOT consume a reference to itp. */
+/* ARGSUSED */
+static void
+spdsock_flip_node(ipsec_tun_pol_t *itp, void *ignoreme)
+{
+	mutex_enter(&itp->itp_lock);
+	ITPF_SWAP(itp->itp_flags);
+	ipsec_swap_policy(itp->itp_policy, itp->itp_inactive);
+	mutex_exit(&itp->itp_lock);
 }
 
 void
-spdsock_flip(queue_t *q, mblk_t *mp)
+spdsock_flip(queue_t *q, mblk_t *mp, spd_if_t *tunname)
 {
-	ipsec_swap_policy();	/* can't fail */
+	char *tname;
+	ipsec_tun_pol_t *itp;
+
+	if (tunname != NULL) {
+		tname = (char *)tunname->spd_if_name;
+		if (*tname == '\0') {
+			ipsec_swap_global_policy();	/* can't fail */
+			itp_walk(spdsock_flip_node, NULL);
+		} else {
+			itp = get_tunnel_policy(tname);
+			if (itp == NULL) {
+				/* Better idea for "tunnel not found"? */
+				spdsock_error(q, mp, ESRCH, 0);
+				return;
+			}
+			spdsock_flip_node(itp, NULL);
+			ITP_REFRELE(itp);
+		}
+	} else {
+		ipsec_swap_global_policy();	/* can't fail */
+	}
 	spd_echo(q, mp);
 }
 
@@ -936,8 +1153,8 @@
  */
 /* ARGSUSED */
 static void
-spdsock_lookup(queue_t *q, ipsec_policy_head_t *iph,
-    mblk_t *mp, spd_ext_t **extv)
+spdsock_lookup(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp,
+    spd_ext_t **extv, ipsec_tun_pol_t *itp)
 {
 	spdsock_error(q, mp, EINVAL, 0);
 }
@@ -979,11 +1196,69 @@
 	mblk_t *m;
 	ipsec_policy_head_t *iph = ss->spdsock_dump_head;
 	mblk_t *req = ss->spdsock_dump_req;
+	ipsec_tun_pol_t *itp, dummy;
+
+	ss->spdsock_dump_remaining_polheads--;
+	if (error == 0 && ss->spdsock_dump_remaining_polheads != 0) {
+		/* Attempt a respin with a new policy head. */
+		rw_enter(&tunnel_policy_lock, RW_READER);
+		/* NOTE:  No need for ITP_REF*() macros here. */
+		if (tunnel_policy_gen > ss->spdsock_dump_tun_gen) {
+			/* Bail with EAGAIN. */
+			error = EAGAIN;
+		} else if (ss->spdsock_dump_name[0] == '\0') {
+			/* Just finished global, find first node. */
+			itp = (ipsec_tun_pol_t *)avl_first(&tunnel_policies);
+		} else {
+			/*
+			 * We just finished current-named polhead, find
+			 * the next one.
+			 */
+			(void) strncpy(dummy.itp_name, ss->spdsock_dump_name,
+			    LIFNAMSIZ);
+			itp = (ipsec_tun_pol_t *)avl_find(&tunnel_policies,
+			    &dummy, NULL);
+			ASSERT(itp != NULL);
+			itp = (ipsec_tun_pol_t *)AVL_NEXT(&tunnel_policies,
+			    itp);
+			/* remaining_polheads should maintain this assertion. */
+			ASSERT(itp != NULL);
+		}
+		if (error == 0) {
+			(void) strncpy(ss->spdsock_dump_name, itp->itp_name,
+			    LIFNAMSIZ);
+			/* Reset other spdsock_dump thingies. */
+			IPPH_REFRELE(ss->spdsock_dump_head);
+			if (ss->spdsock_dump_active) {
+				ss->spdsock_dump_tunnel =
+				    itp->itp_flags & ITPF_P_TUNNEL;
+				iph = itp->itp_policy;
+			} else {
+				ss->spdsock_dump_tunnel =
+				    itp->itp_flags & ITPF_I_TUNNEL;
+				iph = itp->itp_inactive;
+			}
+			IPPH_REFHOLD(iph);
+			rw_enter(&iph->iph_lock, RW_READER);
+			ss->spdsock_dump_head = iph;
+			ss->spdsock_dump_gen = iph->iph_gen;
+			ss->spdsock_dump_cur_type = 0;
+			ss->spdsock_dump_cur_af = IPSEC_AF_V4;
+			ss->spdsock_dump_cur_rule = NULL;
+			ss->spdsock_dump_count = 0;
+			ss->spdsock_dump_cur_chain = 0;
+			rw_exit(&iph->iph_lock);
+			rw_exit(&tunnel_policy_lock);
+			/* And start again. */
+			return (spdsock_dump_next_record(ss));
+		}
+		rw_exit(&tunnel_policy_lock);
+	}
 
 	rw_enter(&iph->iph_lock, RW_READER);
 	m = spdsock_dump_ruleset(req, iph, ss->spdsock_dump_count, error);
 	rw_exit(&iph->iph_lock);
-
+	IPPH_REFRELE(iph);
 	ss->spdsock_dump_req = NULL;
 	freemsg(req);
 
@@ -1284,11 +1559,13 @@
 
 
 static uint_t
-spdsock_encode_rule_head(uint8_t *base, uint_t offset,
-    spd_msg_t *req, const ipsec_policy_t *rule, uint_t dir, uint_t af)
+spdsock_encode_rule_head(uint8_t *base, uint_t offset, spd_msg_t *req,
+    const ipsec_policy_t *rule, uint_t dir, uint_t af, char *name,
+    boolean_t tunnel)
 {
 	struct spd_msg *spmsg;
 	struct spd_rule *spr;
+	spd_if_t *sid;
 
 	uint_t start = offset;
 
@@ -1311,11 +1588,34 @@
 		spr->spd_rule_type = SPD_EXT_RULE;
 		spr->spd_rule_priority = rule->ipsp_prio;
 		spr->spd_rule_flags = spdsock_rule_flags(dir, af);
+		if (tunnel)
+			spr->spd_rule_flags |= SPD_RULE_FLAG_TUNNEL;
 		spr->spd_rule_unused = 0;
 		spr->spd_rule_len = SPD_8TO64(sizeof (*spr));
 		spr->spd_rule_index = rule->ipsp_index;
 	}
 	offset += sizeof (struct spd_rule);
+
+	/*
+	 * If we have an interface name (i.e. if this policy head came from
+	 * a tunnel), add the SPD_EXT_TUN_NAME extension.
+	 */
+	if (name[0] != '\0') {
+
+		ASSERT(ALIGNED64(offset));
+
+		if (base != NULL) {
+			sid = (spd_if_t *)(base + offset);
+			sid->spd_if_exttype = SPD_EXT_TUN_NAME;
+			sid->spd_if_len = SPD_8TO64(sizeof (spd_if_t) +
+			    roundup((strlen(name) - 4), 8));
+			(void) strlcpy((char *)sid->spd_if_name, name,
+			    LIFNAMSIZ);
+		}
+
+		offset += sizeof (spd_if_t) + roundup((strlen(name) - 4), 8);
+	}
+
 	offset = spdsock_encode_sel(base, offset, rule->ipsp_sel);
 	offset = spdsock_encode_action_list(base, offset, rule->ipsp_act);
 
@@ -1330,7 +1630,7 @@
 /* ARGSUSED */
 static mblk_t *
 spdsock_encode_rule(mblk_t *req, const ipsec_policy_t *rule,
-    uint_t dir, uint_t af)
+    uint_t dir, uint_t af, char *name, boolean_t tunnel)
 {
 	mblk_t *m;
 	uint_t len;
@@ -1339,7 +1639,8 @@
 	/*
 	 * Figure out how much space we'll need.
 	 */
-	len = spdsock_encode_rule_head(NULL, 0, mreq, rule, dir, af);
+	len = spdsock_encode_rule_head(NULL, 0, mreq, rule, dir, af, name,
+	    tunnel);
 
 	/*
 	 * Allocate mblk.
@@ -1353,7 +1654,8 @@
 	 */
 	m->b_wptr = m->b_rptr + len;
 	bzero(m->b_rptr, len);
-	(void) spdsock_encode_rule_head(m->b_rptr, 0, mreq, rule, dir, af);
+	(void) spdsock_encode_rule_head(m->b_rptr, 0, mreq, rule, dir, af,
+	    name, tunnel);
 	return (m);
 }
 
@@ -1447,7 +1749,8 @@
 	}
 
 	m = spdsock_encode_rule(req, rule, ss->spdsock_dump_cur_type,
-	    ss->spdsock_dump_cur_af);
+	    ss->spdsock_dump_cur_af, ss->spdsock_dump_name,
+	    ss->spdsock_dump_tunnel);
 	rw_exit(&iph->iph_lock);
 
 	if (m == NULL)
@@ -1488,12 +1791,30 @@
  */
 /* ARGSUSED */
 static void
-spdsock_dump(queue_t *q, ipsec_policy_head_t *iph,
-    mblk_t *mp, spd_ext_t **extv)
+spdsock_dump(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp)
 {
 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
 	mblk_t *mr;
 
+	/* spdsock_parse() already NULL-terminated spdsock_dump_name. */
+	if (iph == ALL_ACTIVE_POLHEADS || iph == ALL_INACTIVE_POLHEADS) {
+		rw_enter(&tunnel_policy_lock, RW_READER);
+		ss->spdsock_dump_remaining_polheads = 1 +
+		    avl_numnodes(&tunnel_policies);
+		ss->spdsock_dump_tun_gen = tunnel_policy_gen;
+		rw_exit(&tunnel_policy_lock);
+		if (iph == ALL_ACTIVE_POLHEADS) {
+			iph = ipsec_system_policy();
+			ss->spdsock_dump_active = B_TRUE;
+		} else {
+			iph = ipsec_inactive_policy();
+			ss->spdsock_dump_active = B_FALSE;
+		}
+		ASSERT(ss->spdsock_dump_name[0] == '\0');
+	} else {
+		ss->spdsock_dump_remaining_polheads = 1;
+	}
+
 	rw_enter(&iph->iph_lock, RW_READER);
 
 	mr = spdsock_dump_ruleset(mp, iph, 0, 0);
@@ -1518,10 +1839,46 @@
 	qenable(OTHERQ(q));
 }
 
+/* Do NOT consume a reference to ITP. */
 void
-spdsock_clone(queue_t *q, mblk_t *mp)
+spdsock_clone_node(ipsec_tun_pol_t *itp, void *ep)
+{
+	int *errptr = (int *)ep;
+
+	if (*errptr != 0)
+		return;	/* We've failed already for some reason. */
+	mutex_enter(&itp->itp_lock);
+	ITPF_CLONE(itp->itp_flags);
+	*errptr = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive);
+	mutex_exit(&itp->itp_lock);
+}
+
+void
+spdsock_clone(queue_t *q, mblk_t *mp, spd_if_t *tunname)
 {
-	int error = ipsec_clone_system_policy();
+	int error;
+	char *tname;
+	ipsec_tun_pol_t *itp;
+
+	if (tunname != NULL) {
+		tname = (char *)tunname->spd_if_name;
+		if (*tname == '\0') {
+			error = ipsec_clone_system_policy();
+			if (error == 0)
+				itp_walk(spdsock_clone_node, &error);
+		} else {
+			itp = get_tunnel_policy(tname);
+			if (itp == NULL) {
+				spdsock_error(q, mp, ENOENT, 0);
+				return;
+			}
+			spdsock_clone_node(itp, &error);
+			ITP_REFRELE(itp);
+		}
+	} else {
+		error = ipsec_clone_system_policy();
+	}
+
 	if (error != 0)
 		spdsock_error(q, mp, error, 0);
 	else
@@ -2068,6 +2425,160 @@
 	}
 }
 
+/*
+ * With a reference-held ill, dig down and find an instance of "tun", and
+ * assign its tunnel policy pointer, while reference-holding it.  Also,
+ * release ill's refrence when finished.
+ *
+ * We'll be messing with q_next, so be VERY careful.
+ */
+static void
+find_tun_and_set_itp(ill_t *ill, ipsec_tun_pol_t *itp)
+{
+	queue_t *q;
+	tun_t *tun;
+
+	/* Don't bother if this ill is going away. */
+	if (ill->ill_flags & ILL_CONDEMNED) {
+		ill_refrele(ill);
+		return;
+	}
+
+
+	q = ill->ill_wq;
+	claimstr(q);	/* Lighter-weight than freezestr(). */
+
+	do {
+		/* Use strcmp() because "tun" is bounded. */
+		if (strcmp(q->q_qinfo->qi_minfo->mi_idname, "tun") == 0) {
+			/* Aha!  Got it. */
+			tun = (tun_t *)q->q_ptr;
+			if (tun != NULL) {
+				mutex_enter(&tun->tun_lock);
+				if (tun->tun_itp != itp) {
+					ASSERT(tun->tun_itp == NULL);
+					ITP_REFHOLD(itp);
+					tun->tun_itp = itp;
+				}
+				mutex_exit(&tun->tun_lock);
+				goto release_and_return;
+			}
+			/*
+			 * Else assume this is some other module named "tun"
+			 * and move on, hoping we find one that actually has
+			 * something in q_ptr.
+			 */
+		}
+		q = q->q_next;
+	} while (q != NULL);
+
+release_and_return:
+	releasestr(ill->ill_wq);
+	ill_refrele(ill);
+}
+
+/*
+ * Sort through the mess of polhead options to retrieve an appropriate one.
+ * Returns NULL if we send an spdsock error.  Returns a valid pointer if we
+ * found a valid polhead.  Returns ALL_ACTIVE_POLHEADS (aka. -1) or
+ * ALL_INACTIVE_POLHEADS (aka. -2) if the operation calls for the operation to
+ * act on ALL policy heads.
+ */
+static ipsec_policy_head_t *
+get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid,
+    int msgtype, ipsec_tun_pol_t **itpp)
+{
+	ipsec_tun_pol_t *itp;
+	ipsec_policy_head_t *iph;
+	int errno;
+	char *tname;
+	boolean_t active;
+	spdsock_t *ss = (spdsock_t *)q->q_ptr;
+	uint64_t gen;	/* Placeholder */
+	ill_t *v4, *v6;
+
+	active = (spdid == SPD_ACTIVE);
+	*itpp = NULL;
+	if (!active && spdid != SPD_STANDBY) {
+		spdsock_diag(q, mp, SPD_DIAGNOSTIC_BAD_SPDID);
+		return (NULL);
+	}
+
+	if (tunname != NULL) {
+		/* Acting on a tunnel's SPD. */
+		tname = (char *)tunname->spd_if_name;
+		if (*tname == '\0') {
+			/* Handle all-polhead cases here. */
+			if (msgtype != SPD_FLUSH && msgtype != SPD_DUMP) {
+				spdsock_diag(q, mp,
+				    SPD_DIAGNOSTIC_NOT_GLOBAL_OP);
+				return (NULL);
+			}
+			return (active ? ALL_ACTIVE_POLHEADS :
+			    ALL_INACTIVE_POLHEADS);
+		}
+
+		itp = get_tunnel_policy(tname);
+		if (itp == NULL) {
+			if (msgtype != SPD_ADDRULE) {
+				/* "Tunnel not found" */
+				spdsock_error(q, mp, ENOENT, 0);
+				return (NULL);
+			}
+
+			errno = 0;
+			itp = create_tunnel_policy(tname, &errno, &gen);
+			if (itp == NULL) {
+				/*
+				 * Something very bad happened, most likely
+				 * ENOMEM.  Return an indicator.
+				 */
+				spdsock_error(q, mp, errno, 0);
+				return (NULL);
+			}
+		}
+		/*
+		 * Troll the plumbed tunnels and see if we have a
+		 * match.  We need to do this always in case we add
+		 * policy AFTER plumbing a tunnel.
+		 */
+		v4 = ill_lookup_on_name(tname, B_FALSE, B_FALSE, NULL,
+		    NULL, NULL, &errno, NULL);
+		if (v4 != NULL)
+			find_tun_and_set_itp(v4, itp);
+		v6 = ill_lookup_on_name(tname, B_FALSE, B_TRUE, NULL,
+		    NULL, NULL, &errno, NULL);
+		if (v6 != NULL)
+			find_tun_and_set_itp(v6, itp);
+		ASSERT(itp != NULL);
+		*itpp = itp;
+		/* For spdsock dump state, set the polhead's name. */
+		if (msgtype == SPD_DUMP) {
+			(void) strncpy(ss->spdsock_dump_name, tname, LIFNAMSIZ);
+			ss->spdsock_dump_tunnel = itp->itp_flags &
+			    (active ? ITPF_P_TUNNEL : ITPF_I_TUNNEL);
+		}
+	} else {
+		itp = NULL;
+		/* For spdsock dump state, indicate it's global policy. */
+		if (msgtype == SPD_DUMP)
+			ss->spdsock_dump_name[0] = '\0';
+	}
+
+	if (active)
+		iph = (itp == NULL) ? ipsec_system_policy() : itp->itp_policy;
+	else
+		iph = (itp == NULL) ? ipsec_inactive_policy() :
+		    itp->itp_inactive;
+
+	ASSERT(iph != NULL);
+	if (itp != NULL) {
+		IPPH_REFHOLD(iph);
+	}
+
+	return (iph);
+}
+
 static void
 spdsock_parse(queue_t *q, mblk_t *mp)
 {
@@ -2075,6 +2586,8 @@
 	spd_ext_t *extv[SPD_EXT_MAX + 1];
 	uint_t msgsize;
 	ipsec_policy_head_t *iph;
+	ipsec_tun_pol_t *itp;
+	spd_if_t *tunname;
 
 	/* Make sure nothing's below me. */
 	ASSERT(WR(q)->q_next == NULL);
@@ -2099,7 +2612,6 @@
 	}
 
 	if (msgsize > (uint_t)(mp->b_wptr - mp->b_rptr)) {
-
 		/* Get all message into one mblk. */
 		if (pullupmsg(mp, -1) == 0) {
 			/*
@@ -2143,24 +2655,6 @@
 	}
 
 	/*
-	 * Which rule set are we operating on today?
-	 */
-
-	switch (spmsg->spd_msg_spdid) {
-	case SPD_ACTIVE:
-		iph = ipsec_system_policy();
-		break;
-
-	case SPD_STANDBY:
-		iph = ipsec_inactive_policy();
-		break;
-
-	default:
-		spdsock_diag(q, mp, SPD_DIAGNOSTIC_BAD_SPDID);
-		return;
-	}
-
-	/*
 	 * Special-case SPD_UPDATEALGS so as not to load IPsec.
 	 */
 	if (!ipsec_loaded() && spmsg->spd_msg_type != SPD_UPDATEALGS) {
@@ -2174,50 +2668,87 @@
 		return;
 	}
 
+	/* First check for messages that need no polheads at all. */
 	switch (spmsg->spd_msg_type) {
 	case SPD_UPDATEALGS:
 		spdsock_updatealg(q, mp, extv);
 		return;
-	case SPD_FLUSH:
-		spdsock_flush(q, iph, mp, extv);
-		return;
-
-	case SPD_ADDRULE:
-		spdsock_addrule(q, iph, mp, extv);
-		return;
-
-	case SPD_DELETERULE:
-		spdsock_deleterule(q, iph, mp, extv);
-		return;
-
-	case SPD_FLIP:
-		spdsock_flip(q, mp);
-		return;
-
-	case SPD_LOOKUP:
-		spdsock_lookup(q, iph, mp, extv);
-		return;
-
-	case SPD_DUMP:
-		spdsock_dump(q, iph, mp, extv);
-		return;
-
-	case SPD_CLONE:
-		spdsock_clone(q, mp);
-		return;
-
 	case SPD_ALGLIST:
 		spdsock_alglist(q, mp);
 		return;
-
 	case SPD_DUMPALGS:
 		spdsock_dumpalgs(q, mp);
 		return;
-
+	}
+
+	/*
+	 * Then check for ones that need both primary/secondary polheads,
+	 * finding the appropriate tunnel policy if need be.
+	 */
+	tunname = (spd_if_t *)extv[SPD_EXT_TUN_NAME];
+	switch (spmsg->spd_msg_type) {
+	case SPD_FLIP:
+		spdsock_flip(q, mp, tunname);
+		return;
+	case SPD_CLONE:
+		spdsock_clone(q, mp, tunname);
+		return;
+	}
+
+	/*
+	 * Finally, find ones that operate on exactly one polhead, or
+	 * "all polheads" of a given type (active/inactive).
+	 */
+	iph = get_appropriate_polhead(q, mp, tunname, spmsg->spd_msg_spdid,
+	    spmsg->spd_msg_type, &itp);
+	if (iph == NULL)
+		return;
+
+	/* All-polheads-ready operations. */
+	switch (spmsg->spd_msg_type) {
+	case SPD_FLUSH:
+		if (itp != NULL) {
+			mutex_enter(&itp->itp_lock);
+			if (spmsg->spd_msg_spdid == SPD_ACTIVE)
+				itp->itp_flags &= ~ITPF_PFLAGS;
+			else
+				itp->itp_flags &= ~ITPF_IFLAGS;
+			mutex_exit(&itp->itp_lock);
+			ITP_REFRELE(itp);
+		}
+		spdsock_flush(q, iph, mp);
+		return;
+	case SPD_DUMP:
+		if (itp != NULL)
+			ITP_REFRELE(itp);
+		spdsock_dump(q, iph, mp);
+		return;
+	}
+
+	if (iph == ALL_ACTIVE_POLHEADS || iph == ALL_INACTIVE_POLHEADS) {
+		spdsock_diag(q, mp, SPD_DIAGNOSTIC_NOT_GLOBAL_OP);
+		return;
+	}
+
+	/* Single-polhead-only operations. */
+	switch (spmsg->spd_msg_type) {
+	case SPD_ADDRULE:
+		spdsock_addrule(q, iph, mp, extv, itp);
+		break;
+	case SPD_DELETERULE:
+		spdsock_deleterule(q, iph, mp, extv, itp);
+		break;
+	case SPD_LOOKUP:
+		spdsock_lookup(q, iph, mp, extv, itp);
+		break;
 	default:
 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_BAD_MSG_TYPE);
-		return;
+		break;
 	}
+
+	IPPH_REFRELE(iph);
+	if (itp != NULL)
+		ITP_REFRELE(itp);
 }
 
 /*
--- a/usr/src/uts/common/inet/ip/tun.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ip/tun.c	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -75,7 +74,9 @@
 #include <net/if_dl.h>
 #include <inet/ip_if.h>
 #include <sys/strsun.h>
+#include <inet/ipdrop.h>
 #include <inet/tun.h>
+#include <inet/ipsec_impl.h>
 
 
 #include <sys/conf.h>
@@ -103,21 +104,23 @@
 static int	tun_rproc(queue_t *, mblk_t *);
 static int	tun_wproc_mdata(queue_t *, mblk_t *);
 static int	tun_wproc(queue_t *, mblk_t  *);
-static int	tun_rdata_v4(queue_t *, mblk_t *);
-static int	tun_rdata_v6(queue_t *, mblk_t *);
-static int	tun_send_sec_req(queue_t *);
+static int	tun_rdata(queue_t *, mblk_t *, mblk_t *, tun_t *, uint_t);
+static int	tun_rdata_v4(queue_t *, mblk_t *, mblk_t *, tun_t *);
+static int	tun_rdata_v6(queue_t *, mblk_t *, mblk_t *, tun_t *);
+static int	tun_set_sec_simple(tun_t *, ipsec_req_t *);
 static void	tun_send_ire_req(queue_t *);
 static uint32_t	tun_update_link_mtu(queue_t *, uint32_t, boolean_t);
 static mblk_t	*tun_realloc_mblk(queue_t *, mblk_t *, size_t, mblk_t *,
     boolean_t);
 static void	tun_recover(queue_t *, mblk_t *, size_t);
-static void	tun_rem_list(tun_t *);
-static void	tun_rput_icmp_err_v4(queue_t *, mblk_t *);
-static void	icmp_ricmp_err_v4_v4(queue_t *, mblk_t *);
-static void	icmp_ricmp_err_v6_v4(queue_t *, mblk_t *);
-static void	icmp_ricmp_err_v4_v6(queue_t *, mblk_t *, icmp6_t *);
-static void	icmp_ricmp_err_v6_v6(queue_t *, mblk_t *, icmp6_t *);
-static void	tun_rput_icmp_err_v6(queue_t *, mblk_t *);
+static void	tun_rem_ppa_list(tun_t *);
+static void	tun_rem_tun_byaddr_list(tun_t *);
+static void	tun_rput_icmp_err_v4(queue_t *, mblk_t *, mblk_t *);
+static void	icmp_ricmp_err_v4_v4(queue_t *, mblk_t *, mblk_t *);
+static void	icmp_ricmp_err_v6_v4(queue_t *, mblk_t *, mblk_t *);
+static void	icmp_ricmp_err_v4_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *);
+static void	icmp_ricmp_err_v6_v6(queue_t *, mblk_t *, mblk_t *, icmp6_t *);
+static void	tun_rput_icmp_err_v6(queue_t *, mblk_t *, mblk_t *);
 static int	tun_rput_tpi(queue_t *, mblk_t *);
 static int	tun_send_bind_req(queue_t *);
 static void	tun_statinit(tun_stats_t *, char *);
@@ -129,6 +132,7 @@
 static int	tun_wputnext_v6(queue_t *, mblk_t *);
 static int	tun_wputnext_v4(queue_t *, mblk_t *);
 static boolean_t tun_limit_value_v6(queue_t *, mblk_t *, ip6_t *, int *);
+static void	tun_freemsg_chain(mblk_t *, uint64_t *);
 
 /* module's defined constants, globals and data structures */
 
@@ -175,7 +179,6 @@
 
 #define	TUN_RECOVER_WAIT		(1*hz)
 
-
 /* canned DL_INFO_ACK  - adjusted based on tunnel type */
 dl_info_ack_t infoack = {
 	DL_INFO_ACK,	/* dl_primitive */
@@ -250,6 +253,14 @@
 static kmutex_t		tun_global_lock;
 static tun_stats_t	*tun_ppa_list[TUN_PPA_SZ];
 static tun_stats_t	*tun_add_stat(queue_t *);
+
+#define	TUN_T_SZ	251
+#define	TUN_BYADDR_LIST_HASH(a) (((a).s6_addr32[3]) % (TUN_T_SZ))
+
+tun_t *tun_byaddr_list[TUN_T_SZ];
+static void tun_add_byaddr(tun_t *);
+static ipsec_tun_pol_t *itp_get_byaddr_fn(uint32_t *, uint32_t *, int);
+
 static boolean_t 	tun_do_fastpath = B_TRUE;
 static ipaddr_t		relay_rtr_addr_v4 = INADDR_ANY;
 
@@ -319,6 +330,9 @@
 	if (rc == 0) {
 		mutex_init(&tun_global_lock, NULL, MUTEX_DEFAULT, NULL);
 	}
+	rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER);
+	itp_get_byaddr = itp_get_byaddr_fn;
+	rw_exit(&itp_get_byaddr_rw_lock);
 	return (rc);
 }
 
@@ -330,6 +344,9 @@
 	rc = mod_remove(&modlinkage);
 	if (rc == 0) {
 		mutex_destroy(&tun_global_lock);
+		rw_enter(&itp_get_byaddr_rw_lock, RW_WRITER);
+		itp_get_byaddr = itp_get_byaddr_dummy;
+		rw_exit(&itp_get_byaddr_rw_lock);
 	}
 	return (rc);
 }
@@ -350,6 +367,8 @@
 tun_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 {
 	tun_t	*atp;
+	mblk_t *hello;
+	ipsec_info_t *ii;
 
 	if (q->q_ptr != NULL) {
 		/* re-open of an already open instance */
@@ -362,11 +381,16 @@
 
 	tun1dbg(("tun_open\n"));
 
+	hello = allocb(sizeof (ipsec_info_t), BPRI_HI);
+	if (hello == NULL)
+		return (ENOMEM);
+
 	/* allocate per-instance structure */
 	atp = kmem_zalloc(sizeof (tun_t), KM_SLEEP);
 
 	atp->tun_state = DL_UNATTACHED;
 	atp->tun_dev = *devp;
+	atp->tun_zoneid = crgetzoneid(credp);
 
 	/*
 	 * Based on the lower version of IP, initialize stuff that
@@ -437,7 +461,15 @@
 	}
 
 	q->q_ptr = WR(q)->q_ptr = atp;
+	atp->tun_wq = WR(q);
+	tun_add_byaddr(atp);
+	ii = (ipsec_info_t *)hello->b_rptr;
+	hello->b_wptr = hello->b_rptr + sizeof (*ii);
+	hello->b_datap->db_type = M_CTL;
+	ii->ipsec_info_type = TUN_HELLO;
+	ii->ipsec_info_len = sizeof (*ii);
 	qprocson(q);
+	putnext(WR(q), hello);
 	return (0);
 }
 
@@ -454,11 +486,20 @@
 
 	qprocsoff(q);
 
+	/* NOTE:  tun_rem_ppa_list() may unlink tun_itp from its AVL tree. */
 	if (atp->tun_stats != NULL)
-		tun_rem_list(atp);
+		tun_rem_ppa_list(atp);
+
+	if (atp->tun_itp != NULL) {
+		/* In brackets because of ITP_REFRELE's brackets. */
+		ITP_REFRELE(atp->tun_itp);
+	}
 
 	mutex_destroy(&atp->tun_lock);
 
+	/* remove tun_t from global list */
+	tun_rem_tun_byaddr_list(atp);
+
 	/* free per-instance struct  */
 	kmem_free(atp, sizeof (tun_t));
 
@@ -756,6 +797,200 @@
 }
 
 /*
+ * Free all messages in an mblk chain and optionally collect
+ * byte-counter stats.  Caller responsible for per-packet stats
+ */
+static void
+tun_freemsg_chain(mblk_t *mp, uint64_t *bytecount)
+{
+	mblk_t *mpnext;
+	while (mp != NULL) {
+		ASSERT(mp->b_prev == NULL);
+		mpnext = mp->b_next;
+		mp->b_next = NULL;
+		if (bytecount != NULL)
+			atomic_add_64(bytecount, (int64_t)msgdsize(mp));
+		freemsg(mp);
+		mp = mpnext;
+	}
+}
+
+/*
+ * Send all messages in a chain of mblk chains and optionally collect
+ * byte-counter stats.  Caller responsible for per-packet stats, and insuring
+ * mp is always non-NULL.
+ *
+ * This is a macro so we can save stack.  Assume the caller function
+ * has local-variable "nmp" as a placeholder.  Define two versions, one with
+ * byte-counting stats and one without.
+ */
+#define	TUN_PUTMSG_CHAIN_STATS(q, mp, nmp, bytecount) \
+	(nmp) = NULL; \
+	ASSERT((mp) != NULL); \
+	do { \
+		if ((nmp) != NULL) \
+			putnext(q, (nmp)); \
+		ASSERT((mp)->b_prev == NULL); \
+		(nmp) = (mp); \
+		(mp) = (mp)->b_next; \
+		(nmp)->b_next = NULL; \
+		atomic_add_64(bytecount, (int64_t)msgdsize(nmp)); \
+	} while ((mp) != NULL); \
+\
+	putnext((q), (nmp))  /* trailing semicolon provided by instantiator. */
+
+#define	TUN_PUTMSG_CHAIN(q, mp, nmp) \
+	(nmp) = NULL; \
+	ASSERT((mp) != NULL); \
+	do { \
+		if ((nmp) != NULL) \
+			putnext(q, (nmp)); \
+		ASSERT((mp)->b_prev == NULL); \
+		(nmp) = (mp); \
+		(mp) = (mp)->b_next; \
+		(nmp)->b_next = NULL; \
+	} while ((mp) != NULL); \
+\
+	putnext((q), (nmp))  /* trailing semicolon provided by instantiator. */
+
+/*
+ * Macro that not only checks tun_itp, but also sees if one got loaded
+ * via ipsecconf(1m)/PF_POLICY behind our backs.  Note the sleazy update of
+ * (tun)->tun_itp_gen so we don't lose races with other possible updates via
+ * PF_POLICY.
+ */
+#define	tun_policy_present(tun)	(((tun)->tun_itp != NULL) || \
+	(((tun)->tun_itp_gen < tunnel_policy_gen) && \
+	    ((tun)->tun_itp_gen = tunnel_policy_gen) && \
+	    (((tun)->tun_itp = get_tunnel_policy((tun)->tun_lifname)) != NULL)))
+
+/*
+ * Search tun_byaddr_list for occurrence of tun_t with matching
+ * inner addresses.  This function does not take into account
+ * prefixes.  Possibly we could generalize this function in the
+ * future with V6_MASK_EQ() and pass in an all 1's prefix for IP
+ * address matches.
+ * Returns NULL on no match.
+ * This function is not directly called - it's assigned into itp_get_byaddr().
+ */
+static ipsec_tun_pol_t *
+itp_get_byaddr_fn(uint32_t *lin, uint32_t *fin, int af)
+{
+	tun_t	*tun_list;
+	uint_t index;
+	in6_addr_t lmapped, fmapped, *laddr, *faddr;
+
+	if (af == AF_INET) {
+		laddr = &lmapped;
+		faddr = &fmapped;
+		IN6_INADDR_TO_V4MAPPED((struct in_addr *)lin, laddr);
+		IN6_INADDR_TO_V4MAPPED((struct in_addr *)fin, faddr);
+	} else {
+		laddr = (in6_addr_t *)lin;
+		faddr = (in6_addr_t *)fin;
+	}
+
+	index = TUN_BYADDR_LIST_HASH(*faddr);
+
+	/*
+	 * it's ok to grab global lock while holding tun_lock/perimeter
+	 */
+	mutex_enter(&tun_global_lock);
+
+	/*
+	 * walk through list of tun_t looking for a match of
+	 * inner addresses.  Addresses are inserted with
+	 * IN6_IPADDR_TO_V4MAPPED(), so v6 matching works for
+	 * all cases.
+	 */
+	for (tun_list = tun_byaddr_list[index]; tun_list;
+	    tun_list = tun_list->tun_next) {
+		if (IN6_ARE_ADDR_EQUAL(&tun_list->tun_laddr, laddr) &&
+		    IN6_ARE_ADDR_EQUAL(&tun_list->tun_faddr, faddr)) {
+			ipsec_tun_pol_t *itp;
+
+			if (!tun_policy_present(tun_list)) {
+				tun1dbg(("itp_get_byaddr: No IPsec policy on "
+				    "matching tun_t instance %p/%s\n",
+				    (void *)tun_list, tun_list->tun_lifname));
+				continue;
+			}
+			tun1dbg(("itp_get_byaddr: Found matching tun_t %p with "
+			    "IPsec policy\n", (void *)tun_list));
+			mutex_enter(&tun_list->tun_itp->itp_lock);
+			itp = tun_list->tun_itp;
+			mutex_exit(&tun_global_lock);
+			ITP_REFHOLD(itp);
+			mutex_exit(&itp->itp_lock);
+			tun1dbg(("itp_get_byaddr: Found itp %p \n",
+			    (void *)itp));
+			return (itp);
+		}
+	}
+
+	/* didn't find one, return zilch */
+
+	tun1dbg(("itp_get_byaddr: No matching tunnel instances with policy\n"));
+	mutex_exit(&tun_global_lock);
+	return (NULL);
+}
+
+/*
+ * Search tun_byaddr_list for occurrence of tun_t, same upper and lower stream,
+ * and same type (6to4 vs automatic vs configured)
+ * If none is found, insert this tun entry.
+ */
+static void
+tun_add_byaddr(tun_t *atp)
+{
+	tun_t	*tun_list;
+	t_uscalar_t	ppa = atp->tun_ppa;
+	uint_t	mask = atp->tun_flags & (TUN_LOWER_MASK | TUN_UPPER_MASK);
+	uint_t	tun_type = (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4));
+	uint_t index = TUN_BYADDR_LIST_HASH(atp->tun_faddr);
+
+	tun1dbg(("tun_add_byaddr: index = %d\n", index));
+
+	ASSERT(atp->tun_next == NULL);
+	/*
+	 * it's ok to grab global lock while holding tun_lock/perimeter
+	 */
+	mutex_enter(&tun_global_lock);
+
+	/*
+	 * walk through list of tun_t looking for a match of
+	 * ppa, same upper and lower stream and same tunnel type
+	 * (automatic or configured).
+	 * There shouldn't be all that many tunnels, so a sequential
+	 * search of the bucket should be fine.
+	 */
+	for (tun_list = tun_byaddr_list[index]; tun_list;
+	    tun_list = tun_list->tun_next) {
+		if (tun_list->tun_ppa == ppa &&
+		    ((tun_list->tun_flags & (TUN_LOWER_MASK |
+		    TUN_UPPER_MASK)) == mask) &&
+		    ((tun_list->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) ==
+		    tun_type)) {
+			tun1dbg(("tun_add_byaddr: tun 0x%p Found ppa %d " \
+			    "tun_stats 0x%p\n", (void *)atp, ppa,
+			    (void *)tun_list));
+			tun1dbg(("tun_add_byaddr: Nothing to do."));
+			mutex_exit(&tun_global_lock);
+			return;
+		}
+	}
+
+	/* didn't find one, throw it in the global list */
+
+	atp->tun_next = tun_byaddr_list[index];
+	atp->tun_ptpn = &(tun_byaddr_list[index]);
+	if (tun_byaddr_list[index] != NULL)
+		tun_byaddr_list[index]->tun_ptpn = &(atp->tun_next);
+	tun_byaddr_list[index] = atp;
+	mutex_exit(&tun_global_lock);
+}
+
+/*
  * Search tun_ppa_list for occurrence of tun_ppa, same lower stream,
  * and same type (6to4 vs automatic vs configured)
  * If none is found, insert this tun entry and create a new kstat for
@@ -778,7 +1013,7 @@
 
 	ASSERT(atp->tun_stats == NULL);
 
-	ASSERT(atp->tun_next == NULL);
+	ASSERT(atp->tun_kstat_next == NULL);
 	/*
 	 * it's ok to grab global lock while holding tun_lock/perimeter
 	 */
@@ -804,16 +1039,27 @@
 			mutex_exit(&tun_global_lock);
 			ASSERT(tun_list->ts_refcnt > 0);
 			tun_list->ts_refcnt++;
-			ASSERT(atp->tun_next == NULL);
+			ASSERT(atp->tun_kstat_next == NULL);
 			ASSERT(atp != tun_list->ts_atp);
 			/*
 			 * add this tunnel instance to head of list
 			 * of tunnels referencing this kstat structure
 			 */
-			atp->tun_next = tun_list->ts_atp;
+			atp->tun_kstat_next = tun_list->ts_atp;
 			tun_list->ts_atp = atp;
 			atp->tun_stats = tun_list;
 			mutex_exit(&tun_list->ts_lock);
+
+			/*
+			 * Check for IPsec tunnel policy pointer, if it hasn't
+			 * been set already.  If we call get_tunnel_policy()
+			 * and return NULL, there's none configured.
+			 */
+			if (atp->tun_lifname[0] != '\0' &&
+			    atp->tun_itp == NULL) {
+				atp->tun_itp =
+				    get_tunnel_policy(atp->tun_lifname);
+			}
 			return (tun_list);
 		}
 	}
@@ -833,7 +1079,7 @@
 		tun_stat->ts_next = tun_ppa_list[index];
 		tun_ppa_list[index] = tun_stat;
 		tun_stat->ts_atp = atp;
-		atp->tun_next = NULL;
+		atp->tun_kstat_next = NULL;
 		atp->tun_stats = tun_stat;
 		mutex_exit(&tun_global_lock);
 		tun_statinit(tun_stat, q->q_qinfo->qi_minfo->mi_idname);
@@ -844,11 +1090,34 @@
 }
 
 /*
+ * remove tun from tun_byaddr_list
+ * called either holding tun_lock or in perimeter
+ */
+static void
+tun_rem_tun_byaddr_list(tun_t *atp)
+{
+	mutex_enter(&tun_global_lock);
+
+	/*
+	 * remove tunnel instance from list of tun_t
+	 */
+	*(atp->tun_ptpn) = atp->tun_next;
+	if (atp->tun_next != NULL) {
+		atp->tun_next->tun_ptpn = atp->tun_ptpn;
+		atp->tun_next = NULL;
+	}
+	atp->tun_ptpn = NULL;
+
+	ASSERT(atp->tun_next == NULL);
+	mutex_exit(&tun_global_lock);
+}
+
+/*
  * remove tun from tun_ppa_list
  * called either holding tun_lock or in perimeter
  */
 static void
-tun_rem_list(tun_t *atp)
+tun_rem_ppa_list(tun_t *atp)
 {
 	uint_t index = TUN_LIST_HASH(atp->tun_ppa);
 	tun_stats_t	*tun_stat = atp->tun_stats;
@@ -865,16 +1134,20 @@
 	tun_stat->ts_refcnt--;
 
 	/*
-	 * If this is the last instance, delete the tun_stat
+	 * If this is the last instance, delete the tun_stat AND unlink the
+	 * ipsec_tun_pol_t from the AVL tree.
 	 */
 	if (tun_stat->ts_refcnt == 0) {
 		kstat_t		*tksp;
 
-		tun1dbg(("tun_rem_list: tun 0x%p Last ref ppa %d tun_stat " \
-		    "0x%p\n", (void *)atp, tun_stat->ts_ppa,
+		tun1dbg(("tun_rem_ppa_list: tun 0x%p Last ref ppa %d tun_stat" \
+		    " 0x%p\n", (void *)atp, tun_stat->ts_ppa,
 		    (void *)tun_stat));
 
-		ASSERT(atp->tun_next == NULL);
+		if (atp->tun_itp != NULL)
+			itp_unlink(atp->tun_itp);
+
+		ASSERT(atp->tun_kstat_next == NULL);
 		for (tun_list = &tun_ppa_list[index]; *tun_list;
 		    tun_list = &(*tun_list)->ts_next) {
 			if (tun_stat == *tun_list) {
@@ -894,10 +1167,10 @@
 	}
 	mutex_exit(&tun_global_lock);
 
-	tun1dbg(("tun_rem_list: tun 0x%p Removing ref ppa %d tun_stat 0x%p\n",
-	    (void *)atp, tun_stat->ts_ppa, (void *)tun_stat));
-
-	ASSERT(tun_stat->ts_atp->tun_next != NULL);
+	tun1dbg(("tun_rem_ppa_list: tun 0x%p Removing ref ppa %d tun_stat " \
+	    "0x%p\n", (void *)atp, tun_stat->ts_ppa, (void *)tun_stat));
+
+	ASSERT(tun_stat->ts_atp->tun_kstat_next != NULL);
 
 	/*
 	 * remove tunnel instance from list of tunnels referencing
@@ -905,15 +1178,15 @@
 	 * sequentially
 	 */
 	for (at_list = &tun_stat->ts_atp; *at_list;
-	    at_list = &(*at_list)->tun_next) {
+	    at_list = &(*at_list)->tun_kstat_next) {
 		if (atp == *at_list) {
-			*at_list = atp->tun_next;
-			atp->tun_next = NULL;
+			*at_list = atp->tun_kstat_next;
+			atp->tun_kstat_next = NULL;
 			break;
 		}
 	}
 	ASSERT(tun_stat->ts_atp != NULL);
-	ASSERT(atp->tun_next == NULL);
+	ASSERT(atp->tun_kstat_next == NULL);
 	mutex_exit(&tun_stat->ts_lock);
 }
 
@@ -1035,7 +1308,7 @@
 		 * for this instance
 		 */
 		if (atp->tun_stats) {
-			tun_rem_list(atp);
+			tun_rem_ppa_list(atp);
 			tun1dbg(("tun_wput_dlpi_other: deleting kstat"));
 		}
 		tun_sendokack(q, mp, prim);
@@ -1465,6 +1738,7 @@
 				goto nak;
 			}
 			atp->tun_ipha.ipha_dst = sin->sin_addr.s_addr;
+			/* Remove from previous hash bucket */
 			IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
 			    &atp->tun_faddr);
 		} else if (ta->ifta_saddr.ss_family == AF_INET6) {
@@ -1481,6 +1755,7 @@
 				goto nak;
 			}
 
+			/* Remove from previous hash bucket */
 			atp->tun_ip6h.ip6_dst = atp->tun_faddr =
 			    sin6->sin6_addr;
 		} else {
@@ -1493,6 +1768,9 @@
 		 * was good.
 		 */
 		atp->tun_flags |= TUN_DST;
+		/* tun_faddr changed, move to proper hash bucket */
+		tun_rem_tun_byaddr_list(atp);
+		tun_add_byaddr(atp);
 	}
 
 	if (new && (ta->ifta_flags & IFTUN_HOPLIMIT)) {
@@ -1541,9 +1819,13 @@
 		}
 	}
 
-	if (ta->ifta_flags & IFTUN_SECURITY) {
-		ipsec_req_t *ipsr;
-
+	/*
+	 * If we passed in IFTUN_COMPLEX_SECURITY, do not do anything.  This
+	 * allows us to let dumb ifconfig(1m)-like apps reflect what they see
+	 * without a penalty.
+	 */
+	if ((ta->ifta_flags & (IFTUN_SECURITY | IFTUN_COMPLEX_SECURITY)) ==
+	    IFTUN_SECURITY) {
 		/* Can't set security properties for automatic tunnels. */
 		if (atp->tun_flags & (TUN_AUTOMATIC | TUN_6TO4)) {
 			uerr = EINVAL;
@@ -1552,19 +1834,23 @@
 
 		/*
 		 * The version number checked out, so just cast
-		 * iftr_secinfo to an ipsr.
-		 *
-		 * Also pay attention to the iftr_secinfo.
+		 * ifta_secinfo to an ipsr.
 		 */
-
-		ipsr = (ipsec_req_t *)(&ta->ifta_secinfo);
-		atp->tun_secinfo = *ipsr;
-		atp->tun_flags |= TUN_SECURITY;
-		/*
-		 * Do setting of security options after T_BIND_ACK
-		 * happens.  If there is no T_BIND_ACK, however,
-		 * see below.
-		 */
+		if (ipsec_loaded()) {
+			uerr = tun_set_sec_simple(atp,
+			    (ipsec_req_t *)&ta->ifta_secinfo);
+		} else {
+			if (ipsec_failed()) {
+				uerr = EPROTONOSUPPORT;
+				goto nak;
+			}
+			/* Otherwise, try again later and load IPsec. */
+			(void) putq(q, mp);
+			ipsec_loader_loadnow();
+			return;
+		}
+		if (uerr != 0)
+			goto nak;
 	}
 
 	mp->b_datap->db_type = M_IOCACK;
@@ -1587,22 +1873,6 @@
 			atp->tun_iocmp = NULL;
 			goto nak;
 		}
-	} else if (ta->ifta_flags & IFTUN_SECURITY) {
-		/*
-		 * If just a change of security settings, do it now!
-		 * Either ifta_flags or tun_flags will do, but ASSERT
-		 * that both are turned on.
-		 */
-		ASSERT(atp->tun_flags & TUN_SECURITY);
-		atp->tun_iocmp = mp;
-		uerr = tun_send_sec_req(q);
-		if (uerr == 0) {
-			/* qreply() done by T_OPTMGMT_REQ processing */
-			return;
-		} else {
-			atp->tun_iocmp = NULL;
-			goto nak;
-		}
 	}
 	qreply(q, mp);
 	return;
@@ -1612,6 +1882,24 @@
 	qreply(q, mp);
 }
 
+static boolean_t
+tun_thisvers_policy(tun_t *atp)
+{
+	boolean_t rc;
+	ipsec_policy_head_t *iph;
+	int uvec = atp->tun_flags & TUN_UPPER_MASK;
+
+	if (atp->tun_itp == NULL)
+		return (B_FALSE);
+	iph = atp->tun_itp->itp_policy;
+
+	rw_enter(&iph->iph_lock, RW_READER);
+	rc = iph_ipvN(iph, (uvec & TUN_U_V6));
+	rw_exit(&iph->iph_lock);
+
+	return (rc);
+}
+
 /*
  * Processes SIOCs to setup a tunnel and IOCs to configure tunnel module.
  * M_IOCDATA->M_COPY->DATA or M_IOCTL->DATA
@@ -1632,6 +1920,7 @@
 	boolean_t new;
 	ipaddr_t *rr_addr;
 	char buf[INET6_ADDRSTRLEN];
+	struct lifreq *lifr;
 
 	lvers = atp->tun_flags & TUN_LOWER_MASK;
 
@@ -1690,12 +1979,29 @@
 		 * If we revise IFTUN_VERSION, this will become revision-
 		 * dependent.
 		 */
-		if (atp->tun_flags & TUN_SECURITY) {
-			ipsec_req_t *ipsr;
-
-			ta->ifta_flags |= IFTUN_SECURITY;
-			ipsr = (ipsec_req_t *)(&ta->ifta_secinfo);
-			*ipsr = atp->tun_secinfo;
+
+		if (tun_policy_present(atp) && tun_thisvers_policy(atp)) {
+			mutex_enter(&atp->tun_itp->itp_lock);
+			if (!(atp->tun_itp->itp_flags & ITPF_P_TUNNEL) &&
+			    (atp->tun_policy_index >=
+				atp->tun_itp->itp_next_policy_index)) {
+				ipsec_req_t *ipsr;
+
+				/*
+				 * Convert 0.0.0.0/0, 0::0/0 tree entry to
+				 * ipsec_req_t.
+				 */
+				ipsr = (ipsec_req_t *)ta->ifta_secinfo;
+				*ipsr = atp->tun_secinfo;
+				/* Reality check for empty polhead. */
+				if (ipsr->ipsr_ah_req != 0 ||
+				    ipsr->ipsr_esp_req != 0)
+					ta->ifta_flags |= IFTUN_SECURITY;
+			} else {
+				ta->ifta_flags |=
+				    (IFTUN_COMPLEX_SECURITY | IFTUN_SECURITY);
+			}
+			mutex_exit(&atp->tun_itp->itp_lock);
 		}
 
 		if (new && (iocp->ioc_cmd == SIOCGTUNPARAM)) {
@@ -1837,11 +2143,45 @@
 		if (uerr != 0)
 			goto nak;
 		break;
-	/*
-	 * We are module that thinks it's a driver so nak anything
-	 * we don't understand
-	 */
+	case SIOCSLIFNAME:
+		/*
+		 * Intercept SIOCSLIFNAME and attach the name to my
+		 * tunnel_instance.  For extra paranoia, if my name is not ""
+		 * (as it would be at tun_t initialization), don't change
+		 * anything.
+		 *
+		 * For now, this is the only way to tie tunnel names (as
+		 * used in IPsec Tunnel Policy (ITP) instances) to actual
+		 * tunnel instances.  In practice, SIOCSLIFNAME is only
+		 * used by ifconfig(1m) to change the ill name to something
+		 * ifconfig can handle.
+		 */
+		mp1 = mp->b_cont;
+		if (mp1 != NULL) {
+			lifr = (struct lifreq *)mp1->b_rptr;
+			if (atp->tun_lifname[0] == '\0') {
+				(void) strncpy(atp->tun_lifname,
+				    lifr->lifr_name, LIFNAMSIZ);
+				ASSERT(atp->tun_itp == NULL);
+				atp->tun_itp =
+				    get_tunnel_policy(atp->tun_lifname);
+				/*
+				 * It really doesn't matter if we return
+				 * NULL or not.  If we get the itp pointer,
+				 * we're in good shape.
+				 */
+			} else {
+				tun0dbg(("SIOCSLIFNAME:  new is %s, old is %s"
+				    " -  not changing\n",
+				    lifr->lifr_name, atp->tun_lifname));
+			}
+		}
+		break;
 	default:
+		/*
+		 * We are module that thinks it's a driver so nak anything we
+		 * don't understand
+		 */
 		uerr = EINVAL;
 		goto nak;
 	}
@@ -2079,50 +2419,253 @@
 	return (error);
 }
 
-static int
-tun_send_sec_req(queue_t *q)
+/*
+ * Because a TUNSPARAM ioctl()'s requirement to only set IPsec policy for a
+ * given upper instance (IPv4-over-IP* or IPv6-over-IP*), have a special
+ * AF-specific flusher.  This way, setting one upper instance doesn't sabotage
+ * the other.  Don't bother with the hash-chained policy heads - they won't be
+ * filled in in TUNSPARAM cases.
+ */
+static void
+flush_af(ipsec_policy_head_t *polhead, int ulp_vector)
+{
+	int dir;
+	int af = (ulp_vector == TUN_U_V4) ? IPSEC_AF_V4 : IPSEC_AF_V6;
+	ipsec_policy_t *ip, *nip;
+
+	ASSERT(RW_WRITE_HELD(&polhead->iph_lock));
+
+	for (dir = 0; dir < IPSEC_NTYPES; dir++) {
+		for (ip = polhead->iph_root[dir].ipr_nonhash[af]; ip != NULL;
+		    ip = nip) {
+			nip = ip->ipsp_hash.hash_next;
+			IPPOL_UNCHAIN(polhead, ip);
+		}
+	}
+}
+
+/*
+ * Set and insert the actual simple policies.
+ */
+static boolean_t
+insert_actual_policies(ipsec_tun_pol_t *itp, ipsec_act_t *actp, uint_t nact,
+    int ulp_vector)
 {
-	tun_t *atp = (tun_t *)q->q_ptr;
-	mblk_t *optmp;
-	struct T_optmgmt_req *omr;
-	struct opthdr *oh;
+	ipsec_selkey_t selkey;
+	ipsec_policy_t *pol;
+	ipsec_policy_root_t *pr;
+	ipsec_policy_head_t *polhead = itp->itp_policy;
+
+	bzero(&selkey, sizeof (selkey));
+
+	if (ulp_vector & TUN_U_V4) {
+		selkey.ipsl_valid = IPSL_IPV4;
+
+		/* v4 inbound */
+		pol = ipsec_policy_create(&selkey, actp, nact,
+		    IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index);
+		if (pol == NULL)
+			return (B_FALSE);
+		pr = &polhead->iph_root[IPSEC_TYPE_INBOUND];
+		HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V4]);
+		ipsec_insert_always(&polhead->iph_rulebyid, pol);
+
+		/* v4 outbound */
+		pol = ipsec_policy_create(&selkey, actp, nact,
+		    IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index);
+		if (pol == NULL)
+			return (B_FALSE);
+		pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND];
+		HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V4]);
+		ipsec_insert_always(&polhead->iph_rulebyid, pol);
+	}
+
+	if (ulp_vector & TUN_U_V6) {
+		selkey.ipsl_valid = IPSL_IPV6;
+
+		/* v6 inbound */
+		pol = ipsec_policy_create(&selkey, actp, nact,
+		    IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index);
+		if (pol == NULL)
+			return (B_FALSE);
+		pr = &polhead->iph_root[IPSEC_TYPE_INBOUND];
+		HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V6]);
+		ipsec_insert_always(&polhead->iph_rulebyid, pol);
+
+		/* v6 outbound */
+		pol = ipsec_policy_create(&selkey, actp, nact,
+		    IPSEC_PRIO_SOCKET, &itp->itp_next_policy_index);
+		if (pol == NULL)
+			return (B_FALSE);
+		pr = &polhead->iph_root[IPSEC_TYPE_OUTBOUND];
+		HASHLIST_INSERT(pol, ipsp_hash, pr->ipr_nonhash[IPSEC_AF_V6]);
+		ipsec_insert_always(&polhead->iph_rulebyid, pol);
+	}
+
+	return (B_TRUE);
+}
+
+/*
+ * For the old-fashioned tunnel-ioctl method of setting tunnel security
+ * properties.  In the new world, set this to be a low-priority 0.0.0.0/0
+ * match.
+ */
+static int
+tun_set_sec_simple(tun_t *atp, ipsec_req_t *ipsr)
+{
+	int rc = 0;
+	uint_t nact;
+	ipsec_act_t *actp = NULL;
+	boolean_t clear_all, old_policy = B_FALSE;
+	ipsec_tun_pol_t *itp;
+	tun_t *other_tun;
+
+	tun1dbg(
+	    ("tun_set_sec_simple: adjusting tunnel security the old way."));
+
+#define	REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER)
+	/* Can't specify self-encap on a tunnel!!! */
+	if ((ipsr->ipsr_self_encap_req && REQ_MASK) != 0)
+		return (EINVAL);
 
 	/*
-	 * Since we're adjusting security, adjust tun_extra_offset!
+	 * If it's a "clear-all" entry, unset the security flags and
+	 * resume normal cleartext (or inherit-from-global) policy.
 	 */
-	atp->tun_extra_offset = TUN_LINK_EXTRA_OFF;
-
-	optmp = tun_realloc_mblk(q, NULL, sizeof (*omr) + sizeof (*oh) +
-	    sizeof (atp->tun_secinfo), NULL, B_FALSE);
-	if (optmp == NULL)
-		return (ENOMEM);
-
-	optmp->b_wptr += sizeof (*omr) + sizeof (*oh) +
-	    sizeof (atp->tun_secinfo);
-	optmp->b_datap->db_type = M_PROTO;
-	omr = (struct T_optmgmt_req *)optmp->b_rptr;
-	oh = (struct opthdr *)(omr + 1);
-	/*
-	 * XXX Which TPI version am I?  Make sure we stay on top of things
-	 * w.r.t. option management.
-	 */
-	omr->PRIM_type = T_SVR4_OPTMGMT_REQ;
-	omr->MGMT_flags = T_NEGOTIATE;
-	omr->OPT_offset = sizeof (*omr);
-	omr->OPT_length = sizeof (*oh) + sizeof (atp->tun_secinfo);
-
-	oh->level = IPPROTO_IP;
-	oh->name = IP_SEC_OPT;
-	oh->len = sizeof (atp->tun_secinfo);
+	clear_all = ((ipsr->ipsr_ah_req & REQ_MASK) == 0 &&
+	    (ipsr->ipsr_esp_req & REQ_MASK) == 0);
+#undef REQ_MASK
 
 	mutex_enter(&atp->tun_lock);
-	*((ipsec_req_t *)(oh + 1)) = atp->tun_secinfo;
+	if (!tun_policy_present(atp)) {
+		if (clear_all) {
+			bzero(&atp->tun_secinfo, sizeof (ipsec_req_t));
+			atp->tun_policy_index = 0;
+			goto bail;	/* No need to allocate! */
+		}
+
+		ASSERT(atp->tun_lifname[0] != '\0');
+		atp->tun_itp = create_tunnel_policy(atp->tun_lifname,
+		    &rc, &atp->tun_itp_gen);
+		/* NOTE:  "rc" set by create_tunnel_policy(). */
+		if (atp->tun_itp == NULL)
+			goto bail;
+	}
+	itp = atp->tun_itp;
+
+	/* Allocate the actvec now, before holding itp or polhead locks. */
+	ipsec_actvec_from_req(ipsr, &actp, &nact);
+	if (actp == NULL) {
+		rc = ENOMEM;
+		goto bail;
+	}
+
+	/*
+	 * Just write on the active polhead.  Save the primary/secondary
+	 * stuff for spdsock operations.
+	 *
+	 * Mutex because we need to write to the polhead AND flags atomically.
+	 * Other threads will acquire the polhead lock as a reader if the
+	 * (unprotected) flag is set.
+	 */
+	mutex_enter(&itp->itp_lock);
+	if (itp->itp_flags & ITPF_P_TUNNEL) {
+		/*
+		 * Oops, we lost a race.  Let's get out of here.
+		 */
+		rc = EBUSY;
+		goto mutex_bail;
+	}
+	old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0);
+
+	if (old_policy) {
+		/*
+		 * We have to be more subtle here than we would
+		 * in the spdosock code-paths, due to backward compatibility.
+		 */
+		ITPF_CLONE(itp->itp_flags);
+		rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive);
+		if (rc != 0) {
+			/* inactive has already been cleared. */
+			itp->itp_flags &= ~ITPF_IFLAGS;
+			goto mutex_bail;
+		}
+		rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
+		flush_af(itp->itp_policy, atp->tun_flags & TUN_UPPER_MASK);
+	} else {
+		/* Else assume itp->itp_policy is already flushed. */
+		rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
+	}
+
+	if (clear_all) {
+		/* We've already cleared out the polhead.  We are now done. */
+		if (avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0)
+			itp->itp_flags &= ~ITPF_PFLAGS;
+		rw_exit(&itp->itp_policy->iph_lock);
+		bzero(&atp->tun_secinfo, sizeof (ipsec_req_t));
+		old_policy = B_FALSE;	/* Clear out the inactive one too. */
+		goto recover_bail;
+	}
+	if (insert_actual_policies(itp, actp, nact,
+		atp->tun_flags & TUN_UPPER_MASK)) {
+		rw_exit(&itp->itp_policy->iph_lock);
+		/*
+		 * Adjust MTU and make sure the DL side knows what's up.
+		 */
+		atp->tun_ipsec_overhead = ipsec_act_ovhd(actp);
+		itp->itp_flags = ITPF_P_ACTIVE;
+		/*
+		 * <sigh> There has to be a better way, but for now, send an
+		 * IRE_DB_REQ again.  We will resynch from scratch, but have
+		 * the tun_ipsec_overhead taken into account.
+		 */
+		tun_send_ire_req(atp->tun_wq);
+		old_policy = B_FALSE;	/* Blank out inactive - we succeeded */
+		/* Copy ipsec_req_t for subsequent SIOGTUNPARAM ops. */
+		atp->tun_secinfo = *ipsr;
+	} else {
+		rw_exit(&itp->itp_policy->iph_lock);
+		rc = ENOMEM;
+	}
+
+recover_bail:
+	atp->tun_policy_index = itp->itp_next_policy_index;
+	/* Find the "other guy" (v4/v6) and update his tun_policy_index too. */
+	if (atp->tun_stats != NULL) {
+		if (atp->tun_stats->ts_atp == atp) {
+			other_tun = atp->tun_kstat_next;
+			ASSERT(other_tun == NULL ||
+			    other_tun->tun_kstat_next == NULL);
+		} else {
+			other_tun = atp->tun_stats->ts_atp;
+			ASSERT(other_tun != NULL);
+			ASSERT(other_tun->tun_kstat_next == atp);
+		}
+		if (other_tun != NULL)
+			other_tun->tun_policy_index = atp->tun_policy_index;
+	}
+
+	if (old_policy) {
+		/* Recover policy in in active polhead. */
+		ipsec_swap_policy(itp->itp_policy, itp->itp_inactive);
+		ITPF_SWAP(itp->itp_flags);
+		atp->tun_extra_offset = TUN_LINK_EXTRA_OFF;
+	}
+
+	/* Clear policy in inactive polhead. */
+	itp->itp_flags &= ~ITPF_IFLAGS;
+	rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER);
+	ipsec_polhead_flush(itp->itp_inactive);
+	rw_exit(&itp->itp_inactive->iph_lock);
+
+mutex_bail:
+	mutex_exit(&itp->itp_lock);
+
+bail:
+	if (actp != NULL)
+		ipsec_actvec_free(actp, nact);
 	mutex_exit(&atp->tun_lock);
-
-	tun1dbg(("tun_send_sec_req: adjusting tunnel security."));
-
-	putnext(WR(q), optmp);
-	return (0);
+	return (rc);
 }
 
 /*
@@ -2192,7 +2735,8 @@
 	 * from below, then the pmtu argument has already been adjusted
 	 * by the IPsec overhead.
 	 */
-	if (!icmp && (atp->tun_flags & TUN_SECURITY))
+	if (!icmp && atp->tun_itp != NULL &&
+	    (atp->tun_itp->itp_flags & ITPF_P_ACTIVE))
 		newmtu -= atp->tun_ipsec_overhead;
 
 	if (atp->tun_flags & TUN_L_V4) {
@@ -2232,29 +2776,6 @@
 	mblk_t *iocmp;
 
 	switch (prim) {
-	case T_OPTMGMT_ACK:
-		mutex_enter(&atp->tun_lock);
-		iocmp = atp->tun_iocmp;
-		atp->tun_iocmp = NULL;
-		if (atp->tun_secinfo.ipsr_esp_req == 0 &&
-		    atp->tun_secinfo.ipsr_ah_req == 0) {
-			atp->tun_flags &= ~TUN_SECURITY;
-			mutex_exit(&atp->tun_lock);
-		} else {
-			/*
-			 * Since the security properties of the tunnel have
-			 * changed, request new ire information to
-			 * re-calculate the tunnel's link MTU.
-			 */
-			mutex_exit(&atp->tun_lock);
-			tun1dbg(("tun_rput_tpi: tunnel security attributes have"
-			    "been set.  Requesting ire"));
-			tun_send_ire_req(q);
-		}
-		ASSERT(iocmp != NULL);
-		putnext(q, iocmp);
-		freemsg(mp);
-		break;
 	case T_BIND_ACK:
 		tun1dbg(("tun_rput_tpi: got a T_BIND_ACK\n"));
 		mutex_enter(&atp->tun_lock);
@@ -2274,7 +2795,14 @@
 			ire_t *ire;
 
 			ire = (ire_t *)mp->b_cont->b_rptr;
-			atp->tun_ipsec_overhead = ire->ire_ipsec_overhead;
+			/*
+			 * Take advice from lower-layer if it is bigger than
+			 * what we have cached now.  We do manage per-tunnel
+			 * policy, but there may be global overhead to account
+			 * for.
+			 */
+			atp->tun_ipsec_overhead = max(ire->ire_ipsec_overhead,
+			    atp->tun_ipsec_overhead);
 			if (atp->tun_flags & TUN_DST) {
 				atp->tun_extra_offset =
 				    MAX(ire->ire_ll_hdr_length,
@@ -2296,34 +2824,6 @@
 		atp->tun_flags &= ~TUN_BIND_SENT;
 
 		iocmp = atp->tun_iocmp;
-		/*
-		 * If we have security information to send, do it after
-		 * the T_BIND_ACK has been received.  Don't bother ACK-ing
-		 * the ioctl until after this has been done.
-		 *
-		 * XXX One small nit about here is that if the user didn't set
-		 * IFTUN_SECURITY, then this handler will reset the security
-		 * levels anyway.
-		 */
-
-		if (atp->tun_flags & TUN_SECURITY) {
-			int err;
-			struct iocblk *iocp;
-
-			/* Exit the mutex for tun_send_sec_req(). */
-			mutex_exit(&atp->tun_lock);
-			err = tun_send_sec_req(q);
-			if (err != 0) {
-				/* Re-enter the mutex. */
-				mutex_enter(&atp->tun_lock);
-				atp->tun_flags &= ~TUN_SECURITY;
-				iocp = (struct iocblk *)iocmp->b_rptr;
-				iocp->ioc_error = err;
-			} else {
-				/* Let the OPTMGMT_ACK handler deal with it. */
-				break;	/* Out of this case. */
-			}
-		}
 
 		/*
 		 * Ack the ioctl
@@ -2336,24 +2836,6 @@
 	case T_ERROR_ACK: {
 		struct T_error_ack *terr = (struct T_error_ack *)mp->b_rptr;
 		switch (terr->ERROR_prim) {
-		case T_SVR4_OPTMGMT_REQ: {
-			struct iocblk *iocp;
-
-			mutex_enter(&atp->tun_lock);
-			/* XXX Should we should unbind too? */
-			atp->tun_flags &= ~TUN_SECURITY;
-			iocmp = atp->tun_iocmp;
-			atp->tun_iocmp = NULL;
-			mutex_exit(&atp->tun_lock);
-			iocp = (struct iocblk *)(iocmp->b_rptr);
-			/* XXX Does OPTMGMT generate TLI_errors? */
-			if (terr->UNIX_error == 0)
-				iocp->ioc_error = EINVAL;
-			else iocp->ioc_error = terr->UNIX_error;
-			putnext(q, iocmp);
-			freemsg(mp);
-			return (0);
-		}
 		case T_BIND_REQ: {
 			struct iftun_req	*ta;
 			mblk_t *mp1;
@@ -2426,16 +2908,14 @@
 /*
  * handle tunnel over IPv6
  */
-/* ARGSUSED */
 static int
-tun_rdata_v6(queue_t *q, mblk_t *mp)
+tun_rdata_v6(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp)
 {
-	tun_t *atp = (tun_t *)q->q_ptr;
 	ip6_t *outer_ip6h, *ip6h;
 	ipha_t *inner_iph;
 	uint8_t *rptr;
 	size_t		hdrlen;
-	mblk_t		*mp1;
+	mblk_t		*mp1, *nmp, *orig_mp = data_mp;
 	uint8_t		nexthdr;
 	boolean_t	inner_v4;
 	in6_addr_t	v6src;
@@ -2443,25 +2923,12 @@
 	char		buf[TUN_WHO_BUF];
 	char		buf1[INET6_ADDRSTRLEN];
 	char		buf2[INET6_ADDRSTRLEN];
+	int		pullup_len;
 
 	/* need at least an IPv6 header. */
-	ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (ip6_t));
-
-	if (atp->tun_state != DL_IDLE) {
-		atomic_add_32(&atp->tun_InErrors, 1);
-		atomic_add_64(&atp->tun_HCInUcastPkts, 1);
-		goto drop;
-	}
-
-	if (!canputnext(q)) {
-		tun1dbg(("tun_rdata_v6: flow controlled\n"));
-		ASSERT(mp->b_datap->db_type < QPCTL);
-		atomic_add_32(&atp->tun_nocanput, 1);
-		(void) putbq(q, mp);
-		return (ENOMEM);	/* to stop service procedure */
-	}
-
-	outer_ip6h = (ip6_t *)mp->b_rptr;
+	ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
+
+	outer_ip6h = (ip6_t *)data_mp->b_rptr;
 
 	/* Handle ip6i_t case. */
 	if (outer_ip6h->ip6_nxt == IPPROTO_RAW) {
@@ -2470,22 +2937,24 @@
 		 * use ASSERT because of lint warnings.
 		 */
 		rptr = (uint8_t *)(outer_ip6h + 1);
-		mp->b_rptr = rptr;
-		if (rptr == mp->b_wptr) {
-			mp1 = mp->b_cont;
-			freeb(mp);
-			mp = mp1;
-			rptr = mp->b_rptr;
+		data_mp->b_rptr = rptr;
+		if (rptr == data_mp->b_wptr) {
+			mp1 = data_mp->b_cont;
+			freeb(data_mp);
+			orig_mp = data_mp = mp1;
+			rptr = data_mp->b_rptr;
+			if (ipsec_mp != NULL)
+				ipsec_mp->b_cont = data_mp;
 		}
-		ASSERT(mp->b_wptr - rptr >= sizeof (ip6_t));
+		ASSERT(data_mp->b_wptr - rptr >= sizeof (ip6_t));
 		outer_ip6h = (ip6_t *)rptr;
 	}
 
 
-	hdrlen = ip_hdr_length_v6(mp, outer_ip6h);
+	hdrlen = ip_hdr_length_v6(data_mp, outer_ip6h);
 	ASSERT(IPH_HDR_VERSION(outer_ip6h) == IPV6_VERSION);
 	ASSERT(hdrlen >= sizeof (ip6_t));
-	ASSERT(hdrlen <= (mp->b_wptr - mp->b_rptr));
+	ASSERT(hdrlen <= (data_mp->b_wptr - data_mp->b_rptr));
 
 	v6src = outer_ip6h->ip6_src;
 	v6dst = outer_ip6h->ip6_dst;
@@ -2501,7 +2970,7 @@
 		ip6_pkt_t ipp;
 
 		ipp.ipp_fields = 0; /* must be initialized */
-		(void) ip_find_hdr_v6(mp, outer_ip6h, &ipp, NULL);
+		(void) ip_find_hdr_v6(data_mp, outer_ip6h, &ipp, NULL);
 		if (ipp.ipp_dstopts != NULL) {
 			nexthdr = ipp.ipp_dstopts->ip6d_nxt;
 		} else if (ipp.ipp_rthdr != NULL) {
@@ -2519,52 +2988,87 @@
 	}
 	inner_v4 = (nexthdr == IPPROTO_ENCAP);
 
-	/* Shave off the outer header(s). */
-	if ((mp->b_wptr - mp->b_rptr) == hdrlen) {
-		tun1dbg(("tun_rdata_v6: new path hdrlen= %lu\n", hdrlen));
-		mp1 = mp->b_cont;
-		freeb(mp);
-		mp = mp1;
-		if (mp == NULL) {
-			tun0dbg(("tun_rdata_v6: b_cont null, no data\n"));
-			atomic_add_32(&atp->tun_InErrors, 1);
-			return (0);
-		}
-	} else {
-		mp->b_rptr += hdrlen;
-	}
-
-	if ((mp->b_wptr - mp->b_rptr) <
-	    (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t))) {
-		if (!pullupmsg(mp,
-		    (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t)))) {
+	/*
+	 * NOTE:  The "+ 4" is for the upper-layer protocol information
+	 * (ports) so we can enforce policy.
+	 */
+	pullup_len = hdrlen + (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t)) + 4;
+	if ((data_mp->b_wptr - data_mp->b_rptr) < pullup_len) {
+		if (!pullupmsg(data_mp, pullup_len)) {
 			atomic_add_32(&atp->tun_InErrors, 1);
 			atomic_add_32(&atp->tun_InDiscard, 1);
 			goto drop;
 		}
+		outer_ip6h = (ip6_t *)data_mp->b_rptr;
 	}
 
+	/* Shave off the outer header(s). */
+	data_mp->b_rptr += hdrlen;
+
 	if (inner_v4) {
 		/* IPv4 in IPv6 */
-		inner_iph = (ipha_t *)mp->b_rptr;
+		inner_iph = (ipha_t *)data_mp->b_rptr;
 		ASSERT(IPH_HDR_VERSION(inner_iph) == IPV4_VERSION);
 		ASSERT(IN6_ARE_ADDR_EQUAL(&v6dst, &atp->tun_laddr) &&
 		    IN6_ARE_ADDR_EQUAL(&v6src, &atp->tun_faddr));
+		if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp,
+			inner_iph, NULL, NULL, outer_ip6h, 0)) {
+			data_mp = NULL;
+			ipsec_mp = NULL;
+			atomic_add_32(&atp->tun_InErrors, 1);
+			goto drop;
+		}
+		if (data_mp != orig_mp) {
+			/* mp has changed, reset appropriate pointers */
+
+			/* Outer hdrlen is already shaved off */
+			ASSERT(data_mp != NULL);
+			inner_iph = (ipha_t *)data_mp->b_rptr;
+		}
+
+		/*
+		 * Remember - ipsec_tun_inbound() may return a whole chain
+		 * of packets if there was per-port policy on the ITP and
+		 * we got a fragmented packet.
+		 */
 		if (CLASSD(inner_iph->ipha_dst)) {
-			atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
 		} else {
-			atomic_add_64(&atp->tun_HCInUcastPkts, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_64(&atp->tun_HCInUcastPkts, 1);
 		}
 	} else {
 		/* IPv6 in IPv6 */
-		ip6h = (ip6_t *)mp->b_rptr;
+		ip6h = (ip6_t *)data_mp->b_rptr;
 		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
-
 		ASSERT(IN6_ARE_ADDR_EQUAL(&v6dst, &atp->tun_laddr));
+
+		if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, NULL,
+			ip6h, NULL, outer_ip6h, 0)) {
+			data_mp = NULL;
+			ipsec_mp = NULL;
+			atomic_add_32(&atp->tun_InErrors, 1);
+			goto drop;
+		}
+		if (data_mp != orig_mp) {
+			/* mp has changed, reset appropriate pointers */
+			/* v6src should still be a valid and relevant ptr */
+			ASSERT(data_mp != NULL);
+			ip6h = (ip6_t *)data_mp->b_rptr;
+		}
+
+		/*
+		 * Remember - ipsec_tun_inbound() may return a whole chain
+		 * of packets if there was per-port policy on the ITP and
+		 * we got a fragmented packet.
+		 */
 		if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
-			atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
 		} else {
-			atomic_add_64(&atp->tun_HCInUcastPkts, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_64(&atp->tun_HCInUcastPkts, 1);
 		}
 
 		if (!IN6_ARE_ADDR_EQUAL(&v6src, &atp->tun_faddr)) {
@@ -2579,18 +3083,17 @@
 				sizeof (buf1)),
 			    inet_ntop(AF_INET6, &atp->tun_faddr, buf2,
 				sizeof (buf2))));
-			atomic_add_32(&atp->tun_InErrors, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_32(&atp->tun_InErrors, 1);
 			goto drop;
 		}
 	}
-
-	mutex_enter(&atp->tun_lock);
-	atp->tun_HCInOctets += msgdsize(mp);
-	mutex_exit(&atp->tun_lock);
-	putnext(q, mp);
+	TUN_PUTMSG_CHAIN_STATS(q, data_mp, nmp, &atp->tun_HCInOctets);
 	return (0);
 drop:
-	freemsg(mp);
+	if (ipsec_mp != NULL)
+		freeb(ipsec_mp);
+	tun_freemsg_chain(data_mp, NULL);
 	return (0);
 }
 
@@ -2601,13 +3104,12 @@
  * what's the worst that can happen if the header stuff changes?
  */
 static int
-tun_rdata_v4(queue_t *q, mblk_t *mp)
+tun_rdata_v4(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp)
 {
-	tun_t		*atp = (tun_t *)q->q_ptr;
 	ipha_t		*iph, *inner_iph;
 	ip6_t		*ip6h;
 	size_t		hdrlen;
-	mblk_t		*mp1;
+	mblk_t		*mp1, *nmp, *orig_mp = data_mp;
 	boolean_t	inner_v4;
 	ipaddr_t	v4src;
 	ipaddr_t	v4dst;
@@ -2616,33 +3118,19 @@
 	char		buf1[INET6_ADDRSTRLEN];
 	char		buf2[INET6_ADDRSTRLEN];
 	char		buf[TUN_WHO_BUF];
+	int		pullup_len;
 
 	/* need at least an IP header */
-	ASSERT((mp->b_wptr - mp->b_rptr) >= sizeof (ipha_t));
-
-	if (atp->tun_state != DL_IDLE) {
-		atomic_add_32(&atp->tun_InErrors, 1);
-		/* need to count packet */
-		atomic_add_64(&atp->tun_HCInUcastPkts, 1);
-		goto drop;
-	}
-
-	if (!canputnext(q)) {
-		tun1dbg(("tun_rdata_v4: flow controlled\n"));
-		ASSERT(mp->b_datap->db_type < QPCTL);
-		atomic_add_32(&atp->tun_nocanput, 1);
-		(void) putbq(q, mp);
-		return (ENOMEM);	/* to stop service procedure */
-	}
-
-	iph = (ipha_t *)mp->b_rptr;
+	ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
+
+	iph = (ipha_t *)data_mp->b_rptr;
 
 	hdrlen = IPH_HDR_LENGTH(iph);
 	/* check IP version number */
 	ASSERT(IPH_HDR_VERSION(iph) == IPV4_VERSION);
 
 	ASSERT(hdrlen >= sizeof (ipha_t));
-	ASSERT(hdrlen <= (mp->b_wptr - mp->b_rptr));
+	ASSERT(hdrlen <= (data_mp->b_wptr - data_mp->b_rptr));
 
 	v4src = iph->ipha_src;
 	v4dst = iph->ipha_dst;
@@ -2650,52 +3138,93 @@
 	IN6_IPADDR_TO_V4MAPPED(v4dst, &v4mapped_dst);
 	inner_v4 = (iph->ipha_protocol == IPPROTO_ENCAP);
 
-	/* shave off the IPv4 header */
-	if ((mp->b_wptr - mp->b_rptr) == hdrlen) {
-		tun1dbg(("tun_rdata_v4: new path hdrlen= %lu\n", hdrlen));
-		mp1 = mp->b_cont;
-		freeb(mp);
-		mp = mp1;
-		if (mp == NULL) {
-			tun0dbg(("tun_rdata_v4: b_cont null, no data\n"));
-			atomic_add_32(&atp->tun_InErrors, 1);
-			return (0);
-		}
-	} else {
-		mp->b_rptr += hdrlen;
-	}
-
-	if ((mp->b_wptr - mp->b_rptr) <
-	    (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t))) {
-		if (!pullupmsg(mp,
-		    (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t)))) {
+	/*
+	 * NOTE:  The "+ 4" is for the upper-layer protocol headers
+	 * so we can enforce policy.
+	 */
+	pullup_len = hdrlen + (inner_v4 ? sizeof (ipha_t) : sizeof (ip6_t)) + 4;
+	if ((data_mp->b_wptr - data_mp->b_rptr) < pullup_len) {
+		if (!pullupmsg(data_mp, hdrlen + pullup_len)) {
 			atomic_add_32(&atp->tun_InErrors, 1);
 			atomic_add_32(&atp->tun_InDiscard, 1);
 			goto drop;
 		}
+		iph = (ipha_t *)data_mp->b_rptr;
 	}
 
+	/* Shave off the IPv4 header. */
+	data_mp->b_rptr += hdrlen;
+
 	if (inner_v4) {
 		/* IPv4 in IPv4 */
-		inner_iph = (ipha_t *)mp->b_rptr;
+		inner_iph = (ipha_t *)data_mp->b_rptr;
 		ASSERT(IPH_HDR_VERSION(inner_iph) == IPV4_VERSION);
 		ASSERT(IN6_ARE_ADDR_EQUAL(&v4mapped_dst, &atp->tun_laddr) &&
 		    IN6_ARE_ADDR_EQUAL(&v4mapped_src, &atp->tun_faddr));
+
+		if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp,
+			inner_iph, NULL, iph, NULL, 0)) {
+			data_mp = NULL;
+			ipsec_mp = NULL;
+			atomic_add_32(&atp->tun_InErrors, 1);
+			goto drop;
+		}
+		if (data_mp != orig_mp) {
+			/* mp has changed, reset appropriate pointers */
+
+			/* Outer hdrlen is already shaved off */
+			ASSERT(data_mp != NULL);
+			inner_iph = (ipha_t *)data_mp->b_rptr;
+		}
+
+		/*
+		 * Remember - ipsec_tun_inbound() may return a whole chain
+		 * of packets if there was per-port policy on the ITP and
+		 * we got a fragmented packet.
+		 */
 		if (CLASSD(inner_iph->ipha_dst)) {
-			atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
 		} else {
-			atomic_add_64(&atp->tun_HCInUcastPkts, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_64(&atp->tun_HCInUcastPkts, 1);
 		}
+
 	} else {
 		/* IPv6 in IPv4 */
-		ip6h = (ip6_t *)mp->b_rptr;
+		ip6h = (ip6_t *)data_mp->b_rptr;
 		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
 
+		if (!ipsec_tun_inbound(ipsec_mp, &data_mp, atp->tun_itp, NULL,
+			ip6h, iph, NULL, 0)) {
+			data_mp = NULL;
+			ipsec_mp = NULL;
+			atomic_add_32(&atp->tun_InErrors, 1);
+			goto drop;
+		}
+		if (data_mp != orig_mp) {
+			/* mp has changed, reset appropriate pointers */
+
+			/*
+			 * v6src and v4dst should still be
+			 * valid and relevant pointers
+			 */
+			ASSERT(data_mp != NULL);
+			ip6h = (ip6_t *)data_mp->b_rptr;
+		}
+
+		/*
+		 * Remember - ipsec_tun_inbound() may return a whole chain
+		 * of packets if there was per-port policy on the ITP and
+		 * we got a fragmented packet.
+		 */
 		ASSERT(IN6_ARE_ADDR_EQUAL(&v4mapped_dst, &atp->tun_laddr));
 		if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
-			atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_64(&atp->tun_HCInMulticastPkts, 1);
 		} else {
-			atomic_add_64(&atp->tun_HCInUcastPkts, 1);
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_64(&atp->tun_HCInUcastPkts, 1);
 		}
 
 		/* Is this an automatic tunnel ? */
@@ -2711,7 +3240,10 @@
 				    tun_who(q, buf),
 				    inet_ntop(AF_INET, &v4dst,
 					buf1, sizeof (buf1))));
-				atomic_add_32(&atp->tun_InErrors, 1);
+				for (nmp = data_mp; nmp != NULL;
+				    nmp = nmp->b_next) {
+					atomic_add_32(&atp->tun_InErrors, 1);
+				}
 				goto drop;
 			}
 
@@ -2727,15 +3259,16 @@
 				atomic_add_32(&atp->tun_allocbfail, 1);
 				goto drop;
 			}
-			mp1->b_cont = mp;
-			mp = mp1;
+			mp1->b_cont = data_mp;
+			data_mp = mp1;
 			/*
 			 * create dl_unitdata_ind with group address set so
 			 * we don't forward
 			 */
-			mp->b_wptr = mp->b_rptr + sizeof (dl_unitdata_ind_t);
-			mp->b_datap->db_type = M_PROTO;
-			dludindp = (dl_unitdata_ind_t *)mp->b_rptr;
+			data_mp->b_wptr = data_mp->b_rptr +
+			    sizeof (dl_unitdata_ind_t);
+			data_mp->b_datap->db_type = M_PROTO;
+			dludindp = (dl_unitdata_ind_t *)data_mp->b_rptr;
 			dludindp->dl_primitive = DL_UNITDATA_IND;
 			dludindp->dl_dest_addr_length = 0;
 			dludindp->dl_dest_addr_offset = 0;
@@ -2771,7 +3304,10 @@
 				    "IPv4 dest (%s)\n", tun_who(q, buf),
 				    inet_ntop(AF_INET, &v4dst, buf1,
 					sizeof (buf1))));
-				atomic_add_32(&atp->tun_InErrors, 1);
+				for (nmp = data_mp; nmp != NULL;
+				    nmp = nmp->b_next) {
+					atomic_add_32(&atp->tun_InErrors, 1);
+				}
 				goto drop;
 			}
 
@@ -2789,7 +3325,10 @@
 					buf1, sizeof (buf1)),
 				    inet_ntop(AF_INET, &v4dst,
 					buf2, sizeof (buf2))));
-				atomic_add_32(&atp->tun_InDiscard, 1);
+				for (nmp = data_mp; nmp != NULL;
+				    nmp = nmp->b_next) {
+					atomic_add_32(&atp->tun_InErrors, 1);
+				}
 				goto drop;
 			}
 
@@ -2814,7 +3353,11 @@
 						buf1, sizeof (buf1)),
 					    inet_ntop(AF_INET, &v4src,
 						buf2, sizeof (buf2))));
-					atomic_add_32(&atp->tun_InDiscard, 1);
+					for (nmp = data_mp; nmp != NULL;
+					    nmp = nmp->b_next) {
+						atomic_add_32(
+						    &atp->tun_InErrors, 1);
+					}
 					goto drop;
 				}
 
@@ -2847,7 +3390,11 @@
 						sizeof (buf1)),
 					    inet_ntop(AF_INET, &v4src, buf2,
 						sizeof (buf2))));
-					atomic_add_32(&atp->tun_InDiscard, 1);
+					for (nmp = data_mp; nmp != NULL;
+					    nmp = nmp->b_next) {
+						atomic_add_32(
+						    &atp->tun_InErrors, 1);
+					}
 					goto drop;
 				}
 			}
@@ -2870,48 +3417,45 @@
 				buf1, sizeof (buf1)),
 			    inet_ntop(AF_INET6, &atp->tun_faddr,
 				buf2, sizeof (buf2))));
-			atomic_add_32(&atp->tun_InErrors, 1);
+			/* XXX - should this be per-frag? */
+			for (nmp = data_mp; nmp != NULL; nmp = nmp->b_next)
+				atomic_add_32(&atp->tun_InErrors, 1);
 			goto drop;
 		}
 	}
-	atomic_add_64(&atp->tun_HCInOctets, (int64_t)msgdsize(mp));
-	putnext(q, mp);
+	TUN_PUTMSG_CHAIN_STATS(q, data_mp, nmp, &atp->tun_HCInOctets);
 	return (0);
 drop:
-	freemsg(mp);
+	if (ipsec_mp != NULL)
+		freeb(ipsec_mp);
+	tun_freemsg_chain(data_mp, NULL);
 	return (0);
 }
 
-/* ARGSUSED */
 static void
-tun_rput_icmp_err_v6(queue_t *q, mblk_t *mp)
+tun_rput_icmp_err_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp)
 {
 	tun_t		*atp = (tun_t *)q->q_ptr;
 	ip6_t		*ip6;
 	icmp6_t		*icmph;
 	int		hdr_length;
 
-	if (!canputnext(q)) {
-		atomic_add_32(&atp->tun_nocanput, 1);
-		atomic_add_32(&atp->tun_InDiscard, 1);
-		freemsg(mp);
-		return;
-	}
-
 	ip6 = (ip6_t *)mp->b_rptr;
 	hdr_length = ip_hdr_length_v6(mp, ip6);
 	icmph = (icmp6_t *)(&mp->b_rptr[hdr_length]);
 
 	switch (atp->tun_flags & TUN_UPPER_MASK) {
 	case TUN_U_V6:
-		icmp_ricmp_err_v6_v6(q, mp, icmph);
+		icmp_ricmp_err_v6_v6(q, mp, ipsec_mp, icmph);
 		break;
 	case TUN_U_V4:
-		icmp_ricmp_err_v4_v6(q, mp, icmph);
+		icmp_ricmp_err_v4_v6(q, mp, ipsec_mp, icmph);
 		break;
 	default:
 		atomic_add_32(&atp->tun_InErrors, 1);
 		ASSERT(0);
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 	}
 }
@@ -2922,28 +3466,22 @@
  * lower processing function.
  */
 static void
-tun_rput_icmp_err_v4(queue_t *q, mblk_t *mp)
+tun_rput_icmp_err_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp)
 {
 	tun_t		*atp = (tun_t *)q->q_ptr;
 
-	/* XXX - should we trust this or do the same logic as IP */
-	if (!canputnext(q)) {
-		atomic_add_32(&atp->tun_nocanput, 1);
-		atomic_add_32(&atp->tun_InDiscard, 1);
-		freemsg(mp);
-		return;
-	}
-
 	switch (atp->tun_flags & TUN_UPPER_MASK) {
 	case TUN_U_V6:
-		icmp_ricmp_err_v6_v4(q, mp);
+		icmp_ricmp_err_v6_v4(q, mp, ipsec_mp);
 		break;
 	case TUN_U_V4:
-		icmp_ricmp_err_v4_v4(q, mp);
+		icmp_ricmp_err_v4_v4(q, mp, ipsec_mp);
 		break;
 	default:
 		atomic_add_32(&atp->tun_InErrors, 1);
 		ASSERT(0);
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 	}
 }
@@ -2956,7 +3494,7 @@
  * the upper layer IP)
  */
 static void
-icmp_ricmp_err_v4_v4(queue_t *q, mblk_t *mp)
+icmp_ricmp_err_v4_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp)
 {
 	tun_t		*atp = (tun_t *)q->q_ptr;
 	ipha_t		*outer_ipha, *inner_ipha;
@@ -2969,38 +3507,25 @@
 	char		buf1[INET_ADDRSTRLEN];
 	char		buf2[INET_ADDRSTRLEN];
 	icmph_t		*icmph;
+	mblk_t		*orig_mp = mp;
 
 	/*
 	 * The packet looks like this :
 	 *
-	 *		[IPv4][ICMPv4][IPv4][IPv4][ULP]
+	 *		[IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP]
 	 *
 	 * We want most of this in one piece. But if the ULP is ICMP, we
 	 * need to see whether it is an ICMP error or not. We should not
-	 * send icmp errors in response to icmp errors. To see whether
-	 * ULP is ICMP or not, we need to do the following :
+	 * send icmp errors in response to icmp errors.  "outer_ipha" points
+	 * to IP header (1), "inner_ipha" points to IP header (2).  Inbound
+	 * policy lookups for ICMP need to reverse the src/dst of things.
+	 * Fortunately, ipsec_tun_inbound() can determine if this is an ICMP
+	 * message or not.
 	 *
-	 *	- First pullup the outer IP header (i.e outer_ipha)
-	 *	  and the inner IP header without IP options.
-	 *	- Then obtain the length of the inner IP header,
-	 *	  pullup the inner IP header including the options and
-	 *	  the ICMP header following that.
-	 *
-	 * To keep it simple, we pullup the whole message.
+	 * The caller already pulled up the entire message, or should have!
 	 */
-	if (mp->b_cont != NULL) {
-		mp->b_datap->db_type = M_DATA;
-		if (!pullupmsg(mp, -1)) {
-			atomic_add_32(&atp->tun_InDiscard, 1);
-			freemsg(mp);
-			return;
-		}
-		mp->b_datap->db_type = M_CTL;
-	}
-	/*
-	 * icmp_inbound has pulled up the message until the
-	 * outer IP header excluding any IP options.
-	 */
+	ASSERT(mp->b_cont == NULL);
+
 	hlen = IPH_HDR_LENGTH((ipha_t *)mp->b_rptr);
 	icmph = (icmph_t *)(&mp->b_rptr[hlen]);
 	outer_ipha = (ipha_t *)&icmph[1];
@@ -3009,6 +3534,8 @@
 
 	if (((uchar_t *)inner_ipha + sizeof (ipha_t)) > mp->b_wptr) {
 		atomic_add_32(&atp->tun_InDiscard, 1);
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 		return;
 	}
@@ -3020,6 +3547,8 @@
 
 		if (((uchar_t *)inner_icmph + sizeof (icmph_t)) > mp->b_wptr) {
 			atomic_add_32(&atp->tun_InDiscard, 1);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			return;
 		}
@@ -3032,6 +3561,8 @@
 		case ICMP_REDIRECT:
 			atomic_add_32(&atp->tun_InDiscard, 1);
 			freemsg(mp);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			return;
 		default :
 			break;
@@ -3041,6 +3572,21 @@
 	type = icmph->icmph_type;
 	code = icmph->icmph_code;
 
+	/*
+	 * NOTE:  icmp_inbound() in IP already checked global policy on the
+	 * outermost header.  If we got here, IP thought it was okay for
+	 * us to receive it.  We now have to use inner policy to see if
+	 * we want to percolate it up (like conn_t's are checked).
+	 *
+	 * Use -outer_hlen to indicate this is an ICMP packet.
+	 */
+	if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, inner_ipha, NULL,
+		outer_ipha, NULL, -outer_hlen)) {
+		/* Callee did all of the freeing */
+		return;
+	}
+	ASSERT(mp == orig_mp);
+
 	/* New packet will contain all of old packet */
 
 	mp->b_rptr = (uchar_t *)inner_ipha;
@@ -3056,6 +3602,8 @@
 				tun0dbg(("icmp_ricmp_err_v4_v4: invalid " \
 				    "icmp mtu\n"));
 				atomic_add_32(&atp->tun_InErrors, 1);
+				if (ipsec_mp != NULL)
+					freeb(ipsec_mp);
 				freemsg(mp);
 				return;
 			}
@@ -3109,6 +3657,8 @@
 			break;
 		default:
 			atomic_add_32(&atp->tun_InErrors, 1);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			return;
 		}
@@ -3124,6 +3674,8 @@
 			tun0dbg(("icmp_ricmp_err_v4_v4: ICMP_PARAM_PROBLEM " \
 			    "too short\n"));
 			atomic_add_32(&atp->tun_InErrors, 1);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			return;
 		}
@@ -3133,6 +3685,8 @@
 		break;
 	default:
 		atomic_add_32(&atp->tun_InErrors, 1);
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 		return;
 	}
@@ -3150,7 +3704,7 @@
  * the upper layer IP)
  */
 static void
-icmp_ricmp_err_v4_v6(queue_t *q, mblk_t *mp, icmp6_t *icmph)
+icmp_ricmp_err_v4_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp, icmp6_t *icmph)
 {
 	tun_t		*atp = (tun_t *)q->q_ptr;
 	ip6_t		*ip6;
@@ -3165,13 +3719,35 @@
 	struct ip6_opt	*optp;
 	boolean_t	found = B_FALSE;
 	ip6_pkt_t	pkt;
+	mblk_t		*orig_mp = mp;
 
 	ip6 = (ip6_t *)&(icmph[1]);
 
+	/*
+	 * The packet looks like this:
+	 *
+	 *		[IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP]
+	 *
+	 * "ip6" points to the IPv6 header labelled (1).
+	 */
 	outer_hlen = ip_hdr_length_v6(mp, ip6);
 	ipha = (ipha_t *)((uint8_t *)ip6 + outer_hlen);
 	type = icmph->icmp6_type;
 
+	/*
+	 * NOTE:  icmp_inbound() in IP already checked global policy on the
+	 * outermost header.  If we got here, IP thought it was okay for
+	 * us to receive it.  We now have to use inner policy to see if
+	 * we want to percolate it up (like conn_t's are checked).
+	 *
+	 * Use -outer_hlen to indicate this is an ICMP packet.
+	 */
+	if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, ipha, NULL, NULL,
+		ip6, -outer_hlen))
+		/* Callee did all of the freeing */
+		return;
+	ASSERT(mp == orig_mp);
+
 	/* new packet will contain all of old packet */
 
 	mp->b_rptr = (uchar_t *)ipha;
@@ -3231,6 +3807,8 @@
 		}
 
 		if (found != B_TRUE) {
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			return;
 		}
@@ -3259,6 +3837,8 @@
 		break;
 	}
 	default:
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 		return;
 	}
@@ -3277,7 +3857,7 @@
  * the upper layer IP).  Otherwise, drop the message.
  */
 static void
-icmp_ricmp_err_v6_v6(queue_t *q, mblk_t *mp, icmp6_t *icmph)
+icmp_ricmp_err_v6_v6(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp, icmp6_t *icmph)
 {
 	ip6_t		*ip6;
 	ip6_t		*inner_ip6;
@@ -3292,12 +3872,35 @@
 	struct ip6_opt	*optp;
 	boolean_t	found = B_FALSE;
 	ip6_pkt_t	pkt;
-
+	mblk_t		*orig_mp = mp;
+
+	/*
+	 * The packet looks like this :
+	 *
+	 *		[IPv6(0)][ICMPv4][IPv6(1)][IPv6(2)][ULP]
+	 *
+	 * "ip6" points to the IPv6 header labelled (1), and inner_ip6 points
+	 * to IPv6 header (2).
+	 */
 	ip6 = (ip6_t *)&icmph[1];
 	outer_hlen = ip_hdr_length_v6(mp, ip6);
 	inner_ip6 = (ip6_t *)((uint8_t *)ip6 + outer_hlen);
 	type = icmph->icmp6_type;
 
+	/*
+	 * NOTE:  icmp_inbound() in IP already checked global policy on the
+	 * outermost header.  If we got here, IP thought it was okay for
+	 * us to receive it.  We now have to use inner policy to see if
+	 * we want to percolate it up (like conn_t's are checked).
+	 *
+	 * Use -outer_hlen to indicate this is an ICMP packet.
+	 */
+	if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, NULL, inner_ip6,
+		NULL, ip6, -outer_hlen))
+		/* Callee did all of the freeing */
+		return;
+	ASSERT(mp == orig_mp);
+
 	/* new packet will contain all of old packet */
 
 	mp->b_rptr = (uchar_t *)inner_ip6;
@@ -3357,6 +3960,8 @@
 		}
 
 		if (found != B_TRUE) {
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			return;	/* case */
 		}
@@ -3386,6 +3991,8 @@
 		break;
 	}
 	default:
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 		return;
 	}
@@ -3405,7 +4012,7 @@
  * the upper layer IP)
  */
 static void
-icmp_ricmp_err_v6_v4(queue_t *q, mblk_t *mp)
+icmp_ricmp_err_v6_v4(queue_t *q, mblk_t *mp, mblk_t *ipsec_mp)
 {
 	tun_t		*atp = (tun_t *)q->q_ptr;
 	ip6_t		*ip6h;
@@ -3421,6 +4028,7 @@
 	icmph_t		*icmph;
 	uint16_t	ip6_hdr_length;
 	uint8_t		*nexthdrp;
+	mblk_t		*orig_mp = mp;
 
 	/*
 	 * The case here is pretty easy when compared to IPv4 in IPv4
@@ -3428,25 +4036,18 @@
 	 *
 	 * The packet looks like this :
 	 *
-	 *		[IPv4][ICMPv4][IPv4][IPv6][ULP]
+	 *		[IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP]
 	 *
 	 * We want most of this in one piece. But if the ULP is ICMPv6, we
 	 * need to see whether it is an ICMPv6 error or not. We should not
-	 * send icmp errors in response to icmp errors. To see whether
-	 * ULP is ICMPv6 or not, we need to call ip_hdr_length_nexthdr_v6
-	 * function which expects everything to be pulled up. So, we
-	 * pullup the whole message and see whether it is an ICMPv6 error
-	 * and discard if it is. Otherwise, we do the normal processing.
+	 * send icmp errors in response to icmp errors. "outer_ipha" points to
+	 * IP header (1).  "ip6h" is obvious.  To see whether ULP is ICMPv6 or
+	 * not, we need to call ip_hdr_length_nexthdr_v6 function which
+	 * expects everything to be pulled up.  Fortunately, the caller
+	 * should've done all of the pulling up.
 	 */
-	if (mp->b_cont != NULL) {
-		mp->b_datap->db_type = M_DATA;
-		if (!pullupmsg(mp, -1)) {
-			atomic_add_32(&atp->tun_InErrors, 1);
-			freemsg(mp);
-			return;
-		}
-		mp->b_datap->db_type = M_CTL;
-	}
+	ASSERT(mp->b_cont == NULL);
+
 	/*
 	 * icmp_inbound has pulled up the message until the
 	 * outer IP header excluding any IP options.
@@ -3459,6 +4060,8 @@
 
 	if (((uchar_t *)ip6h + sizeof (ip6_t)) > mp->b_wptr) {
 		atomic_add_32(&atp->tun_InDiscard, 1);
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 		return;
 	}
@@ -3468,6 +4071,8 @@
 	 */
 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip6_hdr_length, &nexthdrp)) {
 		atomic_add_32(&atp->tun_InErrors, 1);
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 		return;
 	}
@@ -3481,6 +4086,8 @@
 		    (ICMP6_IS_ERROR(inner_icmp6->icmp6_type)) ||
 		    inner_icmp6->icmp6_type == ND_REDIRECT) {
 			atomic_add_32(&atp->tun_InErrors, 1);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			return;
 		}
@@ -3490,6 +4097,20 @@
 	code = icmph->icmph_code;
 	hoplim = outer_ipha->ipha_ttl;
 
+	/*
+	 * NOTE:  icmp_inbound() in IP already checked global policy on the
+	 * outermost header.  If we got here, IP thought it was okay for
+	 * us to receive it.  We now have to use inner policy to see if
+	 * we want to percolate it up (like conn_t's are checked).
+	 *
+	 * Use -outer_hlen to indicate this is an ICMP packet.
+	 */
+	if (!ipsec_tun_inbound(ipsec_mp, &mp, atp->tun_itp, NULL, ip6h,
+		outer_ipha, NULL, -outer_hlen))
+		/* Callee did all of the freeing */
+		return;
+	ASSERT(mp == orig_mp);
+
 	/* New packet will contain all of old packet */
 
 	mp->b_rptr = (uchar_t *)ip6h;
@@ -3505,6 +4126,8 @@
 				tun0dbg(("icmp_ricmp_err_v6_v4: invalid " \
 				    "icmp mtu\n"));
 				atomic_add_32(&atp->tun_InErrors, 1);
+				if (ipsec_mp != NULL)
+					freeb(ipsec_mp);
 				freemsg(mp);
 				return;
 			}
@@ -3572,6 +4195,8 @@
 			break;
 		default:
 			atomic_add_32(&atp->tun_InErrors, 1);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			return;
 		}
@@ -3587,6 +4212,8 @@
 			tun0dbg(("icmp_ricmp_err_v6_v4: ICMP_PARAM_PROBLEM " \
 			    "too short\n"));
 			atomic_add_32(&atp->tun_InErrors, 1);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			return;
 		}
@@ -3597,6 +4224,8 @@
 
 	default:
 		atomic_add_32(&atp->tun_InErrors, 1);
+		if (ipsec_mp != NULL)
+			freeb(ipsec_mp);
 		freemsg(mp);
 		return;
 	}
@@ -3809,6 +4438,68 @@
 	}
 }
 
+static int
+tun_rdata(queue_t *q, mblk_t *ipsec_mp, mblk_t *data_mp, tun_t *atp,
+    uint_t lvers)
+{
+	char buf[TUN_WHO_BUF];
+	int error = 0;
+
+	ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp);
+
+#define	MESSAGE ((ipsec_mp == NULL) ? data_mp : ipsec_mp)
+
+	/*
+	 * If it's an IPSEC_IN w/o any security properties, start treating
+	 * it like a cleartext packet.
+	 */
+	if (ipsec_mp != NULL && !ipsec_in_is_secure(ipsec_mp)) {
+		freeb(ipsec_mp);
+		ipsec_mp = NULL;
+	}
+
+	if (atp->tun_state != DL_IDLE) {
+		atomic_add_32(&atp->tun_InErrors, 1);
+		atomic_add_64(&atp->tun_HCInUcastPkts, 1);
+		freemsg(MESSAGE);
+		return (error);	/* pre-set to 0 */
+	}
+
+	if (!canputnext(q)) {
+		tun1dbg(("tun_rdata: flow controlled\n"));
+		ASSERT(data_mp->b_datap->db_type < QPCTL);
+		atomic_add_32(&atp->tun_nocanput, 1);
+		(void) putbq(q, MESSAGE);
+		error = ENOMEM;
+		goto bail;
+	}
+
+	if (lvers != TUN_L_V4 && lvers != TUN_L_V6) {
+		tun0dbg(("tun_rproc: %s no lower version\n",
+			    tun_who(q, buf)));
+		atomic_add_32(&atp->tun_InErrors, 1);
+		freemsg(MESSAGE);
+		error = EIO;
+		goto bail;
+	}
+
+#undef MESSAGE
+
+	error = (lvers == TUN_L_V4) ? tun_rdata_v4(q, ipsec_mp, data_mp, atp) :
+	    tun_rdata_v6(q, ipsec_mp, data_mp, atp);
+
+bail:
+	if (error) {
+		/* only record non flow control problems */
+		if (error != EBUSY) {
+			tun0dbg(("tun_rproc: %s error encounterd %d\n",
+				    tun_who(q, buf), error));
+		}
+	}
+
+	return (error);
+}
+
 /*
  * Process read side messages
  */
@@ -3819,30 +4510,15 @@
 	uint_t	lvers;
 	int	error = 0;
 	char	buf[TUN_WHO_BUF];
+	ipsec_in_t *ii;
+	mblk_t *ipsec_mp;
 
 	/* no lock needed, won't ever change */
 	lvers = atp->tun_flags & TUN_LOWER_MASK;
 
 	switch (mp->b_datap->db_type) {
 	case M_DATA:
-
-		if (lvers == TUN_L_V4) {
-			error = tun_rdata_v4(q, mp);
-		} else if (lvers == TUN_L_V6) {
-			error = tun_rdata_v6(q, mp);
-		} else {
-			tun0dbg(("tun_rproc: %s no lower version\n",
-			    tun_who(q, buf)));
-			atomic_add_32(&atp->tun_InErrors, 1);
-			freemsg(mp);
-		}
-		if (error) {
-			/* only record non flow control problems */
-			if (error != EBUSY) {
-				tun0dbg(("tun_rproc: %s error encounterd %d\n",
-				    tun_who(q, buf), error));
-			}
-		}
+		error = tun_rdata(q, NULL, mp, atp, lvers);
 		break;
 
 	case M_PROTO:
@@ -3852,24 +4528,56 @@
 		break;
 
 	case M_CTL:
-		/* its an ICMP error message from IP */
-
+		/* its either an IPsec-protect packet... */
+		ii = (ipsec_in_t *)mp->b_rptr;
+		if (ii->ipsec_in_type == IPSEC_IN) {
+			if (mp->b_cont->b_datap->db_type == M_DATA) {
+				error = tun_rdata(q, mp, mp->b_cont, atp,
+				    lvers);
+				break;	/* Out of switch. */
+			} else {
+				ASSERT(mp->b_cont->b_datap->db_type == M_CTL);
+				/*
+				 * ICMP message protected by IPsec.
+				 * Split out IPSEC_IN and pass it up separately.
+				 */
+				ipsec_mp = mp;
+				mp = mp->b_cont;
+			}
+		} else {
+			ipsec_mp = NULL;
+		}
+
+		/* ... or an ICMP error message from IP */
 		atomic_add_64(&atp->tun_HCInUcastPkts, 1);
 
+		if (!canputnext(q)) {
+			atomic_add_32(&atp->tun_nocanput, 1);
+			atomic_add_32(&atp->tun_InDiscard, 1);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
+			freemsg(mp);
+			break;
+		}
+
 		/* Pull everything up into mp. */
 		mp->b_datap->db_type = M_DATA;
 		if (!pullupmsg(mp, -1)) {
 			atomic_add_32(&atp->tun_InErrors, 1);
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 			break;
 		}
 		mp->b_datap->db_type = M_CTL;
 
 		if (lvers == TUN_L_V4) {
-			tun_rput_icmp_err_v4(q, mp);
+			tun_rput_icmp_err_v4(q, mp, ipsec_mp);
 		} else if (lvers == TUN_L_V6) {
-			tun_rput_icmp_err_v6(q, mp);
+			tun_rput_icmp_err_v6(q, mp, ipsec_mp);
 		} else {
+			if (ipsec_mp != NULL)
+				freeb(ipsec_mp);
 			freemsg(mp);
 		}
 		break;
@@ -3894,7 +4602,13 @@
 		tun1dbg(("tun_rproc: received IRE_DB_TYPE, "
 		    "ipsec_overhead is %d bytes", ire->ire_ipsec_overhead));
 		mutex_enter(&atp->tun_lock);
-		atp->tun_ipsec_overhead = ire->ire_ipsec_overhead;
+		/*
+		 * Take advice from lower-layer if it is bigger than what we
+		 * have cached now.  We do manage per-tunnel policy, but
+		 * there may be global overhead to account for.
+		 */
+		atp->tun_ipsec_overhead = max(ire->ire_ipsec_overhead,
+		    atp->tun_ipsec_overhead);
 		if (atp->tun_flags & TUN_DST) {
 			(void) tun_update_link_mtu(q, ire->ire_max_frag,
 			    B_FALSE);
@@ -3919,10 +4633,10 @@
 static void
 tun_wdata_v4(queue_t *q, mblk_t *mp)
 {
-	ipha_t *outer_ipha, *inner_ipha;
-	ip6_t *ip6;
+	ipha_t *outer_ipha = NULL, *inner_ipha;
+	ip6_t *ip6 = NULL;
 	tun_t *atp = (tun_t *)q->q_ptr;
-	mblk_t *newmp;
+	mblk_t *nmp;
 	size_t hdrlen;
 	int16_t encap_limit;
 
@@ -3971,16 +4685,16 @@
 		if ((mp->b_rptr - mp->b_datap->db_base) < sizeof (ipha_t)) {
 			/* no */
 
-			newmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset,
+			nmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset,
 			    BPRI_HI);
-			if (newmp == NULL) {
+			if (nmp == NULL) {
 				atomic_add_32(&atp->tun_OutDiscard, 1);
 				atomic_add_32(&atp->tun_allocbfail, 1);
 				freemsg(mp);
 				return;
 			}
-			newmp->b_cont = mp;
-			mp = newmp;
+			nmp->b_cont = mp;
+			mp = nmp;
 			mp->b_wptr = mp->b_datap->db_lim;
 			mp->b_rptr = mp->b_wptr - sizeof (ipha_t);
 		} else {
@@ -4016,16 +4730,16 @@
 
 		if ((mp->b_rptr - mp->b_datap->db_base) < hdrlen) {
 			/* no */
-			newmp = allocb(hdrlen + atp->tun_extra_offset,
+			nmp = allocb(hdrlen + atp->tun_extra_offset,
 			    BPRI_HI);
-			if (newmp == NULL) {
+			if (nmp == NULL) {
 				atomic_add_32(&atp->tun_OutDiscard, 1);
 				atomic_add_32(&atp->tun_allocbfail, 1);
 				freemsg(mp);
 				return;
 			}
-			newmp->b_cont = mp;
-			mp = newmp;
+			nmp->b_cont = mp;
+			mp = nmp;
 			mp->b_wptr = mp->b_datap->db_lim;
 			mp->b_rptr = mp->b_wptr - hdrlen;
 		} else {
@@ -4058,7 +4772,14 @@
 		tun_send_ire_req(q);
 
 	atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgdsize(mp));
-	putnext(q, mp);
+
+	mp = ipsec_tun_outbound(mp, atp, inner_ipha, NULL, outer_ipha, ip6,
+	    hdrlen);
+	if (mp == NULL)
+		return;
+
+	/* send the packet chain down the transport stream to IPv4/IPv6 */
+	TUN_PUTMSG_CHAIN(q, mp, nmp);
 }
 
 /*
@@ -4069,9 +4790,10 @@
 tun_wputnext_v4(queue_t *q, mblk_t *mp)
 {
 	tun_t *atp = (tun_t *)q->q_ptr;
-	ipha_t *inner_ipha, *outer_ipha;
-	ip6_t *ip6;
+	ipha_t *inner_ipha, *outer_ipha = NULL;
+	ip6_t *ip6 = NULL;
 	uint_t	hdrlen;
+	mblk_t *nmp;
 
 	mp->b_rptr += atp->tun_extra_offset;
 	if ((atp->tun_flags & TUN_L_V4) != 0) {
@@ -4178,6 +4900,11 @@
 
 	atomic_add_64(&atp->tun_HCOutOctets, (int64_t)msgsize(mp));
 
+	mp = ipsec_tun_outbound(mp, atp, inner_ipha, NULL, outer_ipha, ip6,
+	    hdrlen);
+	if (mp == NULL)
+		return (0);
+
 	/*
 	 * Request the destination ire regularly in case Path MTU has
 	 * increased.
@@ -4185,8 +4912,8 @@
 	if (TUN_IRE_TOO_OLD(atp))
 		tun_send_ire_req(q);
 
-	/* send the packet down the transport stream to IPv4/IPv6 */
-	putnext(q, mp);
+	/* send the packet chain down the transport stream to IPv4/IPv6 */
+	TUN_PUTMSG_CHAIN(q, mp, nmp);
 	return (0);
 }
 
@@ -4199,9 +4926,12 @@
 {
 	tun_t	*atp = (tun_t *)q->q_ptr;
 	ip6_t	*ip6h;
+	ip6_t *outer_ip6 = NULL;
 	uint_t	hdrlen;
 	struct ip6_opt_tunnel *encap_opt;
 	int	encap_limit = 0;
+	ipha_t	*ipha = NULL;
+	mblk_t	*nmp;
 
 	/*
 	 * fastpath reserves a bit more then we can use.
@@ -4209,8 +4939,6 @@
 	 */
 	mp->b_rptr += atp->tun_extra_offset;
 	if ((atp->tun_flags & TUN_L_V4) != 0) {
-		ipha_t	*ipha;
-
 		ipha = (ipha_t *)mp->b_rptr;
 		hdrlen = IPH_HDR_LENGTH(ipha);
 
@@ -4237,8 +4965,6 @@
 		    (uint16_t)sizeof (ip6_t) + (uint16_t)sizeof (ipha_t));
 
 	} else if ((atp->tun_flags & TUN_L_V6) != 0) {
-		ip6_t *outer_ip6;
-
 		outer_ip6 = (ip6_t *)mp->b_rptr;
 		ASSERT(outer_ip6->ip6_nxt == IPPROTO_IPV6 ||
 		    outer_ip6->ip6_nxt == IPPROTO_DSTOPTS);
@@ -4331,7 +5057,12 @@
 		tun_send_ire_req(q);
 
 	/* send the packet down the transport stream to IPv4/IPv6 */
-	putnext(q, mp);
+	mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen);
+	if (mp == NULL)
+		return (0);
+
+	/* send the packet chain down the transport stream to IPv4/IPv6 */
+	TUN_PUTMSG_CHAIN(q, mp, nmp);
 	return (0);
 }
 
@@ -4432,9 +5163,9 @@
 tun_wdata_v6(queue_t *q, mblk_t *mp)
 {
 	tun_t		*atp = (tun_t *)q->q_ptr;
-	ipha_t		*ipha;
-	ip6_t		*ip6h, *outer_ip6;
-	mblk_t		*newmp;
+	ipha_t		*ipha = NULL;
+	ip6_t		*ip6h, *outer_ip6 = NULL;
+	mblk_t		*nmp;
 	ipaddr_t	v4addr;
 	char		buf1[INET6_ADDRSTRLEN];
 	char		buf2[INET6_ADDRSTRLEN];
@@ -4471,18 +5202,19 @@
 	switch (atp->tun_flags & TUN_LOWER_MASK) {
 	case TUN_L_V4:
 		/* room for IPv4 header? */
+		hdrlen = sizeof (ipha_t);
 		if ((mp->b_rptr - mp->b_datap->db_base) < sizeof (ipha_t)) {
 			/* no */
 
-			newmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset,
+			nmp = allocb(sizeof (ipha_t) + atp->tun_extra_offset,
 			    BPRI_HI);
-			if (newmp == NULL) {
+			if (nmp == NULL) {
 				atomic_add_32(&atp->tun_OutDiscard, 1);
 				atomic_add_32(&atp->tun_allocbfail, 1);
 				goto drop;
 			}
-			newmp->b_cont = mp;
-			mp = newmp;
+			nmp->b_cont = mp;
+			mp = nmp;
 			mp->b_wptr = mp->b_datap->db_lim;
 			mp->b_rptr = mp->b_wptr - sizeof (ipha_t);
 		} else {
@@ -4659,16 +5391,16 @@
 
 		if ((mp->b_rptr - mp->b_datap->db_base) < hdrlen) {
 			/* no */
-			newmp = allocb(hdrlen + atp->tun_extra_offset,
+			nmp = allocb(hdrlen + atp->tun_extra_offset,
 			    BPRI_HI);
-			if (newmp == NULL) {
+			if (nmp == NULL) {
 				atomic_add_32(&atp->tun_OutDiscard, 1);
 				atomic_add_32(&atp->tun_allocbfail, 1);
 				freemsg(mp);
 				return;
 			}
-			newmp->b_cont = mp;
-			mp = newmp;
+			nmp->b_cont = mp;
+			mp = nmp;
 			mp->b_wptr = mp->b_datap->db_lim;
 			mp->b_rptr = mp->b_wptr - hdrlen;
 		} else {
@@ -4711,7 +5443,12 @@
 		tun_send_ire_req(q);
 
 	/* send the packet down the transport stream to IP */
-	putnext(q, mp);
+	mp = ipsec_tun_outbound(mp, atp, NULL, ip6h, ipha, outer_ip6, hdrlen);
+	if (mp == NULL)
+		return;
+
+	/* send the packet chain down the transport stream to IPv4/IPv6 */
+	TUN_PUTMSG_CHAIN(q, mp, nmp);
 	return;
 drop:
 	freemsg(mp);
@@ -4943,7 +5680,7 @@
 	tunsp->tuns_HCOutUcastPkts.value.ui64 = 0;
 	tunsp->tuns_HCOutMulticastPkts.value.ui64 = 0;
 
-	for (tunp = tstats->ts_atp; tunp; tunp = tunp->tun_next) {
+	for (tunp = tstats->ts_atp; tunp; tunp = tunp->tun_kstat_next) {
 		tunsp->tuns_nocanput.value.ui32 += tunp->tun_nocanput;
 		tunsp->tuns_xmtretry.value.ui32 += tunp->tun_xmtretry;
 		tunsp->tuns_allocbfail.value.ui32 += tunp->tun_allocbfail;
--- a/usr/src/uts/common/inet/ipclassifier.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ipclassifier.h	Fri Nov 03 07:10:24 2006 -0800
@@ -76,6 +76,7 @@
 #define	IPCL_SCTPCONN		0x00000002
 #define	IPCL_IPCCONN		0x00000004
 #define	IPCL_ISV6		0x00000008	/* Is a V6 connection */
+#define	IPCL_IPTUN		0x00000010	/* Has "tun" plumbed above it */
 
 /* Conn Masks */
 #define	IPCL_TCP		(IPCL_TCP4|IPCL_TCP6)
@@ -115,8 +116,9 @@
 	((connp)->conn_flags & IPCL_UDP)
 
 #define	IPCL_IS_IPTUN(connp)						\
-	((connp)->conn_ulp == IPPROTO_ENCAP || \
-	(connp)->conn_ulp == IPPROTO_IPV6)
+	(((connp)->conn_ulp == IPPROTO_ENCAP ||				\
+	(connp)->conn_ulp == IPPROTO_IPV6) &&				\
+	((connp)->conn_flags & IPCL_IPTUN))
 
 typedef struct connf_s connf_t;
 typedef struct
--- a/usr/src/uts/common/inet/ipdrop.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ipdrop.h	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -92,6 +91,12 @@
 	kstat_named_t ipds_spd_esp_badid;
 	kstat_named_t ipds_spd_ah_innermismatch;
 	kstat_named_t ipds_spd_esp_innermismatch;
+	kstat_named_t ipds_spd_no_policy;
+	kstat_named_t ipds_spd_malformed_packet;
+	kstat_named_t ipds_spd_malformed_frag;
+	kstat_named_t ipds_spd_overlap_frag;
+	kstat_named_t ipds_spd_evil_frag;
+	kstat_named_t ipds_spd_max_frags;
 
 	/* ESP-specific drop statistics. */
 	kstat_named_t ipds_esp_nomem;
@@ -158,6 +163,12 @@
 				ip_drop_types->ipds_spd_ah_innermismatch
 #define	ipdrops_spd_esp_innermismatch	\
 				ip_drop_types->ipds_spd_esp_innermismatch
+#define	ipdrops_spd_no_policy		ip_drop_types->ipds_spd_no_policy
+#define	ipdrops_spd_malformed_packet	ip_drop_types->ipds_spd_malformed_packet
+#define	ipdrops_spd_malformed_frag	ip_drop_types->ipds_spd_malformed_frag
+#define	ipdrops_spd_overlap_frag	ip_drop_types->ipds_spd_overlap_frag
+#define	ipdrops_spd_evil_frag		ip_drop_types->ipds_spd_evil_frag
+#define	ipdrops_spd_max_frags		ip_drop_types->ipds_spd_max_frags
 
 /* ESP-specific drop statistics. */
 #define	ipdrops_esp_nomem		ip_drop_types->ipds_esp_nomem
--- a/usr/src/uts/common/inet/ipsec_impl.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ipsec_impl.h	Fri Nov 03 07:10:24 2006 -0800
@@ -47,6 +47,8 @@
 #define	IPSEC_CONF_IPSEC_DIR 		12	/* Direction of traffic */
 #define	IPSEC_CONF_ICMP_TYPE 		13	/* ICMP type */
 #define	IPSEC_CONF_ICMP_CODE 		14	/* ICMP code */
+#define	IPSEC_CONF_NEGOTIATE		15	/* Negotiation */
+#define	IPSEC_CONF_TUNNEL		16	/* Tunnel */
 
 /* Type of an entry */
 
@@ -300,7 +302,7 @@
 }
 
 /*
- * Merged address structure, for cheezy address-family independant
+ * Merged address structure, for cheezy address-family independent
  * matches in policy code.
  */
 
@@ -363,7 +365,9 @@
 	uint8_t		ipsl_remote_pfxlen;	/* #bits of prefix */
 	uint8_t		ipsl_mbz;
 
-	uint32_t	ipsl_hval;
+	/* Insert new elements above this line */
+	uint32_t	ipsl_pol_hval;
+	uint32_t	ipsl_sel_hval;
 } ipsec_selkey_t;
 
 typedef struct ipsec_sel
@@ -405,6 +409,12 @@
 	(ipp) = 0;						\
 }
 
+#define	IPPOL_UNCHAIN(php, ip) 						\
+	HASHLIST_UNCHAIN((ip), ipsp_hash);				\
+	avl_remove(&(php)->iph_rulebyid, (ip));				\
+	IPPOL_REFRELE(ip);
+
+
 /*
  * Policy ruleset.  One per (protocol * direction) for system policy.
  */
@@ -448,6 +458,93 @@
 }
 
 /*
+ * IPsec fragment related structures
+ */
+
+typedef struct ipsec_fragcache_entry {
+	struct ipsec_fragcache_entry *itpfe_next;	/* hash list chain */
+	mblk_t *itpfe_fraglist;			/* list of fragments */
+	time_t itpfe_exp;			/* time when entry is stale */
+	int itpfe_depth;			/* # of fragments in list */
+	ipsec_addr_t itpfe_frag_src;
+	ipsec_addr_t itpfe_frag_dst;
+#define	itpfe_src itpfe_frag_src.ipsad_v4
+#define	itpfe_src6 itpfe_frag_src.ipsad_v6
+#define	itpfe_dst itpfe_frag_dst.ipsad_v4
+#define	itpfe_dst6 itpfe_frag_dst.ipsad_v6
+	uint32_t itpfe_id;			/* IP datagram ID */
+	uint8_t itpfe_proto;			/* IP Protocol */
+	uint8_t itpfe_last;			/* Last packet */
+} ipsec_fragcache_entry_t;
+
+typedef struct ipsec_fragcache {
+	kmutex_t itpf_lock;
+	struct ipsec_fragcache_entry **itpf_ptr;
+	struct ipsec_fragcache_entry *itpf_freelist;
+	time_t itpf_expire_hint;	/* time when oldest entry is stale */
+} ipsec_fragcache_t;
+
+/*
+ * Tunnel policies.  We keep a minature of the transport-mode/global policy
+ * per each tunnel instance.
+ *
+ * People who need both an itp held down AND one of its polheads need to
+ * first lock the itp, THEN the polhead, otherwise deadlock WILL occur.
+ */
+typedef struct ipsec_tun_pol_s {
+	avl_node_t itp_node;
+	kmutex_t itp_lock;
+	uint64_t itp_next_policy_index;
+	ipsec_policy_head_t *itp_policy;
+	ipsec_policy_head_t *itp_inactive;
+	uint32_t itp_flags;
+	uint32_t itp_refcnt;
+	char itp_name[LIFNAMSIZ];
+	ipsec_fragcache_t itp_fragcache;
+} ipsec_tun_pol_t;
+/* NOTE - Callers (tun code) synchronize their own instances for these flags. */
+#define	ITPF_P_ACTIVE 0x1	/* Are we using IPsec right now? */
+#define	ITPF_P_TUNNEL 0x2	/* Negotiate tunnel-mode */
+/* Optimization -> Do we have per-port security entries in this polhead? */
+#define	ITPF_P_PER_PORT_SECURITY 0x4
+#define	ITPF_PFLAGS 0x7
+#define	ITPF_SHIFT 3
+
+#define	ITPF_I_ACTIVE 0x8	/* Is the inactive using IPsec right now? */
+#define	ITPF_I_TUNNEL 0x10	/* Negotiate tunnel-mode (on inactive) */
+/* Optimization -> Do we have per-port security entries in this polhead? */
+#define	ITPF_I_PER_PORT_SECURITY 0x20
+#define	ITPF_IFLAGS 0x38
+
+/* NOTE:  f cannot be an expression. */
+#define	ITPF_CLONE(f) (f) = (((f) & ITPF_PFLAGS) | \
+	    (((f) & ITPF_PFLAGS) << ITPF_SHIFT));
+#define	ITPF_SWAP(f) (f) = ((((f) & ITPF_PFLAGS) << ITPF_SHIFT) | \
+	    (((f) & ITPF_IFLAGS) >> ITPF_SHIFT))
+
+#define	ITP_P_ISACTIVE(itp, iph) ((itp)->itp_flags & \
+	(((itp)->itp_policy == (iph)) ? ITPF_P_ACTIVE : ITPF_I_ACTIVE))
+
+#define	ITP_P_ISTUNNEL(itp, iph) ((itp)->itp_flags & \
+	(((itp)->itp_policy == (iph)) ? ITPF_P_TUNNEL : ITPF_I_TUNNEL))
+
+#define	ITP_P_ISPERPORT(itp, iph) ((itp)->itp_flags & \
+	(((itp)->itp_policy == (iph)) ? ITPF_P_PER_PORT_SECURITY : \
+	ITPF_I_PER_PORT_SECURITY))
+
+#define	ITP_REFHOLD(itp) { \
+	atomic_add_32(&((itp)->itp_refcnt), 1);	\
+	ASSERT((itp)->itp_refcnt != 0); \
+}
+
+#define	ITP_REFRELE(itp) { \
+	ASSERT((itp)->itp_refcnt != 0); \
+	membar_exit(); \
+	if (atomic_add_32_nv(&((itp)->itp_refcnt), -1) == 0) \
+		itp_free(itp); \
+}
+
+/*
  * Certificate identity.
  */
 
@@ -544,8 +641,10 @@
 
 extern void ipsec_policy_destroy(void);
 extern void ipsec_policy_init(void);
-extern boolean_t ipsec_inherit_global_policy(conn_t *, ipsec_req_t *,
-    ipsec_selector_t *, boolean_t);
+extern int ipsec_alloc_table(ipsec_policy_head_t *, int, int, boolean_t);
+extern void ipsec_polhead_init(ipsec_policy_head_t *, int);
+extern void ipsec_polhead_destroy(ipsec_policy_head_t *);
+extern void ipsec_polhead_free_table(ipsec_policy_head_t *);
 extern mblk_t *ipsec_check_global_policy(mblk_t *, conn_t *, ipha_t *,
 		    ip6_t *, boolean_t);
 extern mblk_t *ipsec_check_inbound_policy(mblk_t *, conn_t *, ipha_t *, ip6_t *,
@@ -555,7 +654,6 @@
 extern void ipsec_log_policy_failure(queue_t *, int, char *, ipha_t *,
 		    ip6_t *, boolean_t);
 extern boolean_t ipsec_inbound_accept_clear(mblk_t *, ipha_t *, ip6_t *);
-extern int ipsec_policy_alloc(conn_t *);
 extern int ipsec_conn_cache_policy(conn_t *, boolean_t);
 extern mblk_t *ipsec_alloc_ipsec_out(void);
 extern mblk_t	*ipsec_attach_ipsec_out(mblk_t *, conn_t *, ipsec_policy_t *,
@@ -565,7 +663,8 @@
 struct ipsec_in_s;
 extern ipsec_action_t *ipsec_in_to_out_action(struct ipsec_in_s *);
 extern boolean_t ipsec_check_ipsecin_latch(struct ipsec_in_s *, mblk_t *,
-    struct ipsec_latch_s *, ipha_t *, ip6_t *, const char **, kstat_named_t **);
+    struct ipsec_latch_s *, ipha_t *, ip6_t *, const char **, kstat_named_t **,
+    conn_t *);
 extern void ipsec_latch_inbound(ipsec_latch_t *ipl, struct ipsec_in_s *ii);
 
 extern void ipsec_policy_free(ipsec_policy_t *);
@@ -575,17 +674,20 @@
 extern ipsec_policy_head_t *ipsec_polhead_create(void);
 extern ipsec_policy_head_t *ipsec_system_policy(void);
 extern ipsec_policy_head_t *ipsec_inactive_policy(void);
-extern void ipsec_swap_policy(void);
+extern void ipsec_swap_policy(ipsec_policy_head_t *, ipsec_policy_head_t *);
+extern void ipsec_swap_global_policy(void);
 
 extern int ipsec_clone_system_policy(void);
 extern ipsec_policy_t *ipsec_policy_create(ipsec_selkey_t *,
-    const ipsec_act_t *, int, int);
+    const ipsec_act_t *, int, int, uint64_t *);
 extern boolean_t ipsec_policy_delete(ipsec_policy_head_t *,
     ipsec_selkey_t *, int);
 extern int ipsec_policy_delete_index(ipsec_policy_head_t *, uint64_t);
 extern void ipsec_polhead_flush(ipsec_policy_head_t *);
+extern int ipsec_copy_polhead(ipsec_policy_head_t *, ipsec_policy_head_t *);
 extern void ipsec_actvec_from_req(ipsec_req_t *, ipsec_act_t **, uint_t *);
 extern void ipsec_actvec_free(ipsec_act_t *, uint_t);
+extern int ipsec_req_from_head(ipsec_policy_head_t *, ipsec_req_t *, int);
 extern mblk_t *ipsec_construct_inverse_acquire(sadb_msg_t *, sadb_ext_t **);
 extern mblk_t *ip_wput_attach_policy(mblk_t *, ipha_t *, ip6_t *, ire_t *,
     conn_t *, boolean_t, zoneid_t);
@@ -604,10 +706,6 @@
 extern void ipsec_enter_policy(ipsec_policy_head_t *, ipsec_policy_t *, int);
 extern boolean_t ipsec_check_action(ipsec_act_t *, int *);
 
-extern void ipsec_config_list_compat(queue_t *, mblk_t *);
-extern int ipsec_config_add_compat(mblk_t *);
-extern int ipsec_config_delete_compat(mblk_t *);
-
 extern mblk_t *ipsec_out_tag(mblk_t *, mblk_t *);
 extern mblk_t *ipsec_in_tag(mblk_t *, mblk_t *);
 extern mblk_t *ip_copymsg(mblk_t *mp);
@@ -618,6 +716,35 @@
 
 extern void ipsec_insert_always(avl_tree_t *tree, void *new_node);
 
+extern int32_t ipsec_act_ovhd(const ipsec_act_t *act);
+
+
+extern boolean_t iph_ipvN(ipsec_policy_head_t *, boolean_t);
+
+/*
+ * Tunnel-support SPD functions and variables.
+ */
+struct tun_s;	/* Defined in inet/tun.h. */
+extern boolean_t ipsec_tun_inbound(mblk_t *, mblk_t **,  ipsec_tun_pol_t *,
+    ipha_t *, ip6_t *, ipha_t *, ip6_t *, int);
+extern mblk_t *ipsec_tun_outbound(mblk_t *, struct tun_s *, ipha_t *,
+    ip6_t *, ipha_t *, ip6_t *, int);
+extern void itp_free(ipsec_tun_pol_t *);
+extern ipsec_tun_pol_t *create_tunnel_policy(char *, int *, uint64_t *);
+extern ipsec_tun_pol_t *get_tunnel_policy(char *);
+extern void itp_unlink(ipsec_tun_pol_t *);
+extern void itp_free(ipsec_tun_pol_t *node);
+extern void itp_walk(void (*)(ipsec_tun_pol_t *, void *), void *);
+
+extern ipsec_tun_pol_t *(*itp_get_byaddr)(uint32_t *, uint32_t *, int);
+extern ipsec_tun_pol_t *itp_get_byaddr_dummy(uint32_t *, uint32_t *,
+    int);
+extern krwlock_t itp_get_byaddr_rw_lock;
+
+extern krwlock_t tunnel_policy_lock;
+extern uint64_t tunnel_policy_gen;
+extern avl_tree_t tunnel_policies;
+
 /*
  * IPsec AH/ESP functions called from IP.
  */
@@ -677,6 +804,9 @@
 extern boolean_t ipsec_outbound_sa(mblk_t *, uint_t);
 extern esph_t *ipsec_inbound_esp_sa(mblk_t *);
 extern ah_t *ipsec_inbound_ah_sa(mblk_t *);
+extern ipsec_policy_t *ipsec_find_policy_head(ipsec_policy_t *,
+    ipsec_policy_head_t *, int, ipsec_selector_t *);
+
 
 /*
  * NAT-Traversal cleanup
@@ -684,6 +814,11 @@
 extern void nattymod_clean_ipif(ipif_t *);
 
 /*
+ * Common functions
+ */
+extern boolean_t ip_addr_match(uint8_t *, int, in6_addr_t *);
+
+/*
  * AH and ESP counters types.
  */
 typedef uint32_t ah_counter;
--- a/usr/src/uts/common/inet/ipsec_info.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/ipsec_info.h	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -126,20 +125,9 @@
 	crypto_data_t ipsec_in_crypto_mac;	/* to store the MAC */
 
 	zoneid_t ipsec_in_zoneid;	/* target zone for the datagram */
-
-#ifdef DEBUG
-	/*
-	 * To aid in IPSEC_IN leak detection, save a copy of the inbound IPsec
-	 * header in DEBUG kernels.  It takes no more space overall because
-	 * ipsec_info_t is more than sizeof (IPv4 or IPv6) bytes larger than
-	 * ipsec_in_t.  (60 bytes was chosen as a nice safe number.  Using
-	 * an IP or IPv6 constant increases the header file count for
-	 * consumers of this header file.)
-	 */
-	uint8_t ipsec_in_saved_hdr[60];
-#endif
 } ipsec_in_t;
 
+#define	IPSECOUT_MAX_ADDRLEN 4	/* Max addr len. (in 32-bit words) */
 /*
  * This is used for communication between IP and IPSEC (AH/ESP)
  * for Outbound datagrams. IPSEC_OUT is allocated by IP before IPSEC
@@ -170,10 +158,17 @@
 	uint16_t ipsec_out_dst_port;	/* Destination port number of d-gram. */
 	uint8_t  ipsec_out_icmp_type;	/* ICMP type of d-gram */
 	uint8_t  ipsec_out_icmp_code;	/* ICMP code of d-gram */
+
+	sa_family_t ipsec_out_inaf;	/* Inner address family */
+	uint32_t ipsec_out_insrc[IPSECOUT_MAX_ADDRLEN];	/* Inner src address */
+	uint32_t ipsec_out_indst[IPSECOUT_MAX_ADDRLEN];	/* Inner dest address */
+	uint8_t  ipsec_out_insrcpfx;	/* Inner source prefix */
+	uint8_t  ipsec_out_indstpfx;	/* Inner destination prefix */
+
 	uint_t ipsec_out_ill_index;	/* ill index used for multicast etc. */
 	uint8_t ipsec_out_proto;	/* IP protocol number for d-gram. */
 	unsigned int
-		ipsec_out_encaps : 1,	/* Encapsualtion done ? */
+		ipsec_out_tunnel : 1,	/* Tunnel mode? */
 		ipsec_out_use_global_policy : 1, /* Inherit global policy ? */
 		ipsec_out_secure : 1,	/* Is this secure ? */
 		ipsec_out_proc_begin : 1, /* IPSEC processing begun */
@@ -273,6 +268,9 @@
  * NOTE: Keysock_hello is simply an ipsec_info_t
  */
 
+/* TUN_HELLO is just like KEYSOCK_HELLO, except for tunnels to talk with IP. */
+#define	TUN_HELLO		KEYSOCK_HELLO
+
 /*
  * KEYSOCK_HELLO_ACK is sent by a consumer to acknowledge a KEYSOCK_HELLO.
  * It contains the PF_KEYv2 sa_type, so keysock can redirect PF_KEY messages
@@ -290,6 +288,7 @@
 #define	KS_IN_ADDR_ME 3
 #define	KS_IN_ADDR_NOTME 4
 #define	KS_IN_ADDR_MBCAST 5
+#define	KS_IN_ADDR_DONTCARE 6
 
 /*
  * KEYSOCK_IN is a PF_KEY message from a PF_KEY socket destined for a consumer.
@@ -305,7 +304,6 @@
 	struct sadb_ext *ks_in_extv[SADB_EXT_MAX + 1];
 	int ks_in_srctype;	/* Source address type. */
 	int ks_in_dsttype;	/* Dest address type. */
-	int ks_in_proxytype;	/* Proxy address type. */
 	minor_t ks_in_serial;	/* Serial # of sending socket. */
 } keysock_in_t;
 
--- a/usr/src/uts/common/inet/sadb.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/sadb.h	Fri Nov 03 07:10:24 2006 -0800
@@ -83,7 +83,6 @@
 
 	struct ipsid_s *ipsa_src_cid;	/* Source certificate identity */
 	struct ipsid_s *ipsa_dst_cid;	/* Destination certificate identity */
-	struct ipsid_s *ipsa_proxy_cid;	/* (src) Proxy agent's cert. id. */
 	uint64_t *ipsa_integ;	/* Integrity bitmap */
 	uint64_t *ipsa_sens;	/* Sensitivity bitmap */
 	mblk_t	*ipsa_lpkt;	/* Packet received while larval (CAS me) */
@@ -175,7 +174,7 @@
 	uint32_t ipsa_kmp;	/* key management proto */
 	uint32_t ipsa_kmc;	/* key management cookie */
 
-	boolean_t ipsa_haspeer;	/* Has peer in another table. */
+	boolean_t ipsa_haspeer;		/* Has peer in another table. */
 
 	/*
 	 * Address storage.
@@ -185,12 +184,15 @@
 	 * used sockaddr_storage
 	 */
 	sa_family_t ipsa_addrfam;
-	sa_family_t ipsa_proxyfam;	/* Proxy AF can be != src/dst AF. */
+	sa_family_t ipsa_innerfam;	/* Inner AF can be != src/dst AF. */
 
 	uint32_t ipsa_srcaddr[IPSA_MAX_ADDRLEN];
 	uint32_t ipsa_dstaddr[IPSA_MAX_ADDRLEN];
-	uint32_t ipsa_proxysrc[IPSA_MAX_ADDRLEN];
-	uint32_t ipsa_proxydst[IPSA_MAX_ADDRLEN];
+	uint32_t ipsa_innersrc[IPSA_MAX_ADDRLEN];
+	uint32_t ipsa_innerdst[IPSA_MAX_ADDRLEN];
+
+	uint8_t ipsa_innersrcpfx;
+	uint8_t ipsa_innerdstpfx;
 
 	/* these can only be v4 */
 	uint32_t ipsa_natt_addr_loc[IPSA_MAX_ADDRLEN];
@@ -323,6 +325,7 @@
 #define	IPSA_F_NATT_REM	SADB_X_SAFLAGS_NATT_REM
 #define	IPSA_F_NATT	(SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM)
 #define	IPSA_F_CINVALID	0x40000		/* SA shouldn't be cached */
+#define	IPSA_F_TUNNEL	SADB_X_SAFLAGS_TUNNEL
 
 /* SA states are important for handling UPDATE PF_KEY messages. */
 #define	IPSA_STATE_LARVAL	SADB_SASTATE_LARVAL
@@ -374,6 +377,7 @@
 	struct ipsec_action_s  *ipsacq_act;
 
 	sa_family_t ipsacq_addrfam;	/* Address family. */
+	sa_family_t ipsacq_inneraddrfam; /* Inner-packet address family. */
 	int ipsacq_numpackets;		/* How many packets queued up so far. */
 	uint32_t ipsacq_seq;		/* PF_KEY sequence number. */
 	uint64_t ipsacq_unique_id;	/* Unique ID for SAs that need it. */
@@ -386,13 +390,18 @@
 	uint32_t *ipsacq_srcaddr;
 	uint32_t *ipsacq_dstaddr;
 
-	/* uint32_t ipsacq_proxysrc[IPSA_MAX_ADDRLEN]; */	/* For later */
-	/* uint32_t ipsacq_proxydst[IPSA_MAX_ADDRLEN]; */	/* For later */
+	/* Cache these instead of point so we can mask off accordingly */
+	uint32_t ipsacq_innersrc[IPSA_MAX_ADDRLEN];
+	uint32_t ipsacq_innerdst[IPSA_MAX_ADDRLEN];
 
 	/* These may change per-acquire. */
 	uint16_t ipsacq_srcport;
 	uint16_t ipsacq_dstport;
 	uint8_t ipsacq_proto;
+	uint8_t ipsacq_inner_proto;
+	uint8_t ipsacq_innersrcpfx;
+	uint8_t ipsacq_innerdstpfx;
+
 	/* icmp type and code of triggering packet (if applicable) */
 	uint8_t	ipsacq_icmp_type;
 	uint8_t ipsacq_icmp_code;
@@ -462,31 +471,49 @@
 
 #define	SA_FORM_UNIQUE_ID(io)				\
 	SA_UNIQUE_ID((io)->ipsec_out_src_port, (io)->ipsec_out_dst_port, \
-		(io)->ipsec_out_proto)
+		((io)->ipsec_out_tunnel ? ((io)->ipsec_out_inaf == AF_INET6 ? \
+		    IPPROTO_IPV6 : IPPROTO_ENCAP) : (io)->ipsec_out_proto), \
+		((io)->ipsec_out_tunnel ? (io)->ipsec_out_proto : 0))
 
 /*
- * This macro is used to generate unique ids (along with the addresses) for
- * outbound datagrams that require unique SAs.
+ * This macro is used to generate unique ids (along with the addresses, both
+ * inner and outer) for outbound datagrams that require unique SAs.
  *
  * N.B. casts and unsigned shift amounts discourage unwarranted
- * sign extension of dstport and proto.
+ * sign extension of dstport, proto, and iproto.
+ *
+ * Unique ID is 64-bits allocated as follows (pardon my big-endian bias):
+ *
+ *   6               4      43      33              11
+ *   3               7      09      21              65              0
+ *   +---------------*-------+-------+--------------+---------------+
+ *   |  MUST-BE-ZERO |<iprot>|<proto>| <src port>   |  <dest port>  |
+ *   +---------------*-------+-------+--------------+---------------+
+ *
+ * If there are inner addresses (tunnel mode) the ports come from the
+ * inner addresses.  If there are no inner addresses, the ports come from
+ * the outer addresses (transport mode).  Tunnel mode MUST have <proto>
+ * set to either IPPROTO_ENCAP or IPPPROTO_IPV6.
  */
-#define	SA_UNIQUE_ID(srcport, dstport, proto) 		\
-	((srcport) | ((uint64_t)(dstport) << 16U) | ((uint64_t)(proto) << 32U))
+#define	SA_UNIQUE_ID(srcport, dstport, proto, iproto) 	\
+	((srcport) | ((uint64_t)(dstport) << 16U) | \
+	((uint64_t)(proto) << 32U) | ((uint64_t)(iproto) << 40U))
 
 /*
  * SA_UNIQUE_MASK generates a mask value to use when comparing the unique value
  * from a packet to an SA.
  */
 
-#define	SA_UNIQUE_MASK(srcport, dstport, proto) 		\
-	SA_UNIQUE_ID((srcport != 0)? 0xffff : 0,		\
-		    (dstport != 0)? 0xffff : 0,			\
-		    (proto != 0)? 0xff : 0)
+#define	SA_UNIQUE_MASK(srcport, dstport, proto, iproto) 	\
+	SA_UNIQUE_ID((srcport != 0) ? 0xffff : 0,		\
+		    (dstport != 0) ? 0xffff : 0,		\
+		    (proto != 0) ? 0xff : 0,			\
+		    (iproto != 0) ? 0xff : 0)
 
 /*
  * Decompose unique id back into its original fields.
  */
+#define	SA_IPROTO(ipsa) ((ipsa)->ipsa_unique_id>>40)&0xff
 #define	SA_PROTO(ipsa) ((ipsa)->ipsa_unique_id>>32)&0xff
 #define	SA_SRCPORT(ipsa) ((ipsa)->ipsa_unique_id & 0xffff)
 #define	SA_DSTPORT(ipsa) (((ipsa)->ipsa_unique_id >> 16) & 0xffff)
@@ -522,18 +549,17 @@
 void sadb_pfkey_error(queue_t *, mblk_t *, int, int, uint_t);
 void sadb_keysock_hello(queue_t **, queue_t *, mblk_t *, void (*)(void *),
     timeout_id_t *, int);
-int sadb_addrcheck(queue_t *, queue_t *, mblk_t *, sadb_ext_t *, uint_t);
-void sadb_srcaddrfix(keysock_in_t *);
+int sadb_addrcheck(queue_t *, mblk_t *, sadb_ext_t *, uint_t);
+boolean_t sadb_addrfix(keysock_in_t *, queue_t *, mblk_t *);
 int sadb_addrset(ire_t *);
 int sadb_delget_sa(mblk_t *, keysock_in_t *, sadbp_t *, int *, queue_t *,
     boolean_t);
 #define	sadb_get_sa(m, k, s, i, q)	sadb_delget_sa(m, k, s, i, q, B_FALSE)
 #define	sadb_del_sa(m, k, s, i, q)	sadb_delget_sa(m, k, s, i, q, B_TRUE)
 
-int sadb_purge_sa(mblk_t *, keysock_in_t *, sadb_t *, int *,
-    queue_t *, queue_t *);
+int sadb_purge_sa(mblk_t *, keysock_in_t *, sadb_t *, queue_t *, queue_t *);
 int sadb_common_add(queue_t *, queue_t *, mblk_t *, sadb_msg_t *,
-    keysock_in_t *, isaf_t *, isaf_t *, ipsa_t *, boolean_t, boolean_t);
+    keysock_in_t *, isaf_t *, isaf_t *, ipsa_t *, boolean_t, boolean_t, int *);
 void sadb_set_usetime(ipsa_t *);
 boolean_t sadb_age_bytes(queue_t *, ipsa_t *, uint64_t, boolean_t);
 int sadb_update_sa(mblk_t *, keysock_in_t *, sadb_t *,
@@ -541,12 +567,11 @@
 void sadb_acquire(mblk_t *, ipsec_out_t *, boolean_t, boolean_t);
 
 void sadb_destroy_acquire(ipsacq_t *);
-uint8_t *sadb_setup_acquire(uint8_t *, uint8_t *, ipsacq_t *);
+mblk_t *sadb_setup_acquire(ipsacq_t *, uint8_t);
 ipsa_t *sadb_getspi(keysock_in_t *, uint32_t, int *);
 void sadb_in_acquire(sadb_msg_t *, sadbp_t *, queue_t *);
 boolean_t sadb_replay_check(ipsa_t *, uint32_t);
 boolean_t sadb_replay_peek(ipsa_t *, uint32_t);
-mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
 int sadb_dump(queue_t *, mblk_t *, minor_t, sadb_t *);
 void sadb_replay_delete(ipsa_t *);
 void sadb_ager(sadb_t *, queue_t *, queue_t *, int);
--- a/usr/src/uts/common/inet/spdsock.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/spdsock.h	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -65,6 +64,12 @@
 	ipsec_policy_t 		*spdsock_dump_cur_rule;
 	uint32_t		spdsock_dump_cur_chain;
 	uint32_t		spdsock_dump_count;
+	/* These are used for all-polhead dumps. */
+	int			spdsock_dump_tun_gen;
+	boolean_t		spdsock_dump_active;
+	boolean_t		spdsock_dump_tunnel;
+	int			spdsock_dump_remaining_polheads;
+	char			spdsock_dump_name[LIFNAMSIZ];
 } spdsock_t;
 
 #define	LOADCHECK_INTERVAL	(drv_usectohz(30000))
--- a/usr/src/uts/common/inet/tcp/tcp.c	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/tcp/tcp.c	Fri Nov 03 07:10:24 2006 -0800
@@ -11926,7 +11926,7 @@
 	ii = (ipsec_in_t *)first_mp->b_rptr;
 
 	if (ipsec_check_ipsecin_latch(ii, data_mp, ipl, ipha, ip6h, &reason,
-	    &counter)) {
+	    &counter, tcp->tcp_connp)) {
 		BUMP_MIB(&ip_mib, ipsecInSucceeded);
 		return (B_TRUE);
 	}
--- a/usr/src/uts/common/inet/tun.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/inet/tun.h	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -106,7 +105,7 @@
 };
 
 typedef struct tun_stats_s {
-	/* protected by t_global_lock */
+	/* Protected by tun_global_lock. */
 	struct tun_stats_s *ts_next;
 	kmutex_t	ts_lock;		/* protects from here down */
 	struct tun_s	*ts_atp;
@@ -136,17 +135,32 @@
 /* per-instance data structure */
 /* Note: if t_recnt > 1, then t_indirect must be null */
 typedef struct tun_s {
-	struct tun_s	*tun_next;
+	struct tun_s	*tun_next;	/* For linked-list of tunnels by */
+	struct tun_s	**tun_ptpn;	/* ip address. */
+
+	/* Links v4-upper and v6-upper instances so they can share kstats. */
+	struct tun_s	*tun_kstat_next;
+
+	queue_t		*tun_wq;
 	kmutex_t	tun_lock;		/* protects from here down */
 	eventid_t	tun_events;
 	t_uscalar_t	tun_state;		/* protected by qwriter */
 	t_uscalar_t	tun_ppa;
 	mblk_t		*tun_iocmp;
-	ipsec_req_t	tun_secinfo;		/* Security preferences. */
+	ipsec_req_t	tun_secinfo;
+	/*
+	 * tun_polcy_index is used to keep track if a tunnel's policy
+	 * was altered by ipsecconf(1m)/PF_POLICY instead of ioctl()s.
+	 * (Only ioctl()s can update this field.)
+	 */
+	uint64_t	tun_policy_index;
+	struct ipsec_tun_pol_s *tun_itp;
+	uint64_t	tun_itp_gen;
 	uint_t		tun_ipsec_overhead;	/* Length of IPsec headers. */
 	uint_t		tun_flags;
 	in6_addr_t	tun_laddr;
 	in6_addr_t	tun_faddr;
+	zoneid_t	tun_zoneid;
 	uint32_t	tun_mtu;
 	uint32_t	tun_notifications;	/* For DL_NOTIFY_IND */
 	int16_t		tun_encap_lim;
@@ -166,6 +180,7 @@
 #define	tun_ip6h		tun_u.tun_u_ip6hdrs.tun_u_ip6h
 #define	tun_telopt		tun_u.tun_u_ip6hdrs.tun_u_telopt
 	tun_stats_t	*tun_stats;
+	char tun_lifname[LIFNAMSIZ];
 	uint32_t tun_nocanput;		/* # input canput() returned false */
 	uint32_t tun_xmtretry;		/* # output canput() returned false */
 	uint32_t tun_allocbfail;	/* # esballoc/allocb failed */
@@ -213,6 +228,7 @@
 #define	TUN_HOP_LIM		0x800	/* Hop limit non-default */
 #define	TUN_ENCAP_LIM		0x1000	/* Encapsulation limit non-default */
 #define	TUN_6TO4		0x2000	/* tunnel is 6to4 tunnel */
+#define	TUN_COMPLEX_SECURITY	0x4000	/* tunnel has full tunnel-mode policy */
 
 struct old_iftun_req {
 	char		ifta_lifr_name[LIFNAMSIZ]; /* if name */
@@ -242,6 +258,8 @@
 void	tun_wput(queue_t *q, mblk_t  *mp);
 void	tun_wsrv(queue_t *q);
 
+extern void tun_ipsec_load_complete(void);
+
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
--- a/usr/src/uts/common/net/if.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/net/if.h	Fri Nov 03 07:10:24 2006 -0800
@@ -712,6 +712,7 @@
 #define	IFTUN_SECURITY			0x04	/* Pay attention to secinfo */
 #define	IFTUN_ENCAP			0x08	/* Pay attention to encap */
 #define	IFTUN_HOPLIMIT			0x10	/* Pay attention to hoplimit */
+#define	IFTUN_COMPLEX_SECURITY		0x20	/* Policy too big for ioctl */
 
 #endif /* !defined(_XOPEN_SOURCE) || defined(__EXTENSIONS__) */
 
--- a/usr/src/uts/common/net/pfkeyv2.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/net/pfkeyv2.h	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -521,9 +520,15 @@
 #define	SADB_X_SAFLAGS_KM2	 0x1000000	/* Key mgmt. specific flag 2 */
 #define	SADB_X_SAFLAGS_KM3	  0x800000	/* Key mgmt. specific flag 3 */
 #define	SADB_X_SAFLAGS_KM4	  0x400000	/* Key mgmt. specific flag 4 */
-	/* skip x200000 because it's used in the kernel */
+#define	SADB_X_SAFLAGS_KRES1	  0x200000	/* Reserved by the kernel */
 #define	SADB_X_SAFLAGS_NATT_LOC	  0x100000	/* this has a natted src SA */
 #define	SADB_X_SAFLAGS_NATT_REM	   0x80000	/* this has a natted dst SA */
+#define	SADB_X_SAFLAGS_KRES2	   0x40000	/* Reserved by the kernel */
+#define	SADB_X_SAFLAGS_TUNNEL	   0x20000	/* tunnel mode */
+
+#define	SADB_X_SAFLAGS_KRES	\
+	SADB_X_SAFLAGS_KRES1 | SADB_X_SAFLAGS_KRES2
+
 /*
  * SA state.
  */
@@ -586,7 +591,9 @@
 #define	SADB_EXT_LIFETIME_SOFT		4
 #define	SADB_EXT_ADDRESS_SRC		5
 #define	SADB_EXT_ADDRESS_DST		6
+/* These two are synonyms. */
 #define	SADB_EXT_ADDRESS_PROXY		7
+#define	SADB_X_EXT_ADDRESS_INNER_SRC	SADB_EXT_ADDRESS_PROXY
 #define	SADB_EXT_KEY_AUTH		8
 #define	SADB_EXT_KEY_ENCRYPT		9
 #define	SADB_EXT_IDENTITY_SRC		10
@@ -601,8 +608,9 @@
 #define	SADB_X_EXT_KM_COOKIE		19
 #define	SADB_X_EXT_ADDRESS_NATT_LOC	20
 #define	SADB_X_EXT_ADDRESS_NATT_REM	21
+#define	SADB_X_EXT_ADDRESS_INNER_DST	22
 
-#define	SADB_EXT_MAX			21
+#define	SADB_EXT_MAX			22
 
 /*
  * Identity types.
@@ -653,7 +661,10 @@
 #define	SADB_X_DIAGNOSTIC_BAD_SRC_AF		8
 /* in sockaddr->sa_family. */
 #define	SADB_X_DIAGNOSTIC_BAD_DST_AF		9
+/* These two are synonyms. */
 #define	SADB_X_DIAGNOSTIC_BAD_PROXY_AF		10
+#define	SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF	10
+
 #define	SADB_X_DIAGNOSTIC_AF_MISMATCH		11
 
 #define	SADB_X_DIAGNOSTIC_BAD_SRC		12
@@ -713,7 +724,28 @@
 #define	SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC	55
 #define	SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM	56
 #define	SADB_X_DIAGNOSTIC_DUPLICATE_NATT_PORTS	57
-#define	SADB_X_DIAGNOSTIC_MAX			57
+
+#define	SADB_X_DIAGNOSTIC_MISSING_INNER_SRC	58
+#define	SADB_X_DIAGNOSTIC_MISSING_INNER_DST	59
+#define	SADB_X_DIAGNOSTIC_DUPLICATE_INNER_SRC	60
+#define	SADB_X_DIAGNOSTIC_DUPLICATE_INNER_DST	61
+#define	SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC	62
+#define	SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST	63
+
+#define	SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC	64
+#define	SADB_X_DIAGNOSTIC_PREFIX_INNER_DST	65
+#define	SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF	66
+#define	SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH	67
+
+#define	SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF	68
+#define	SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF	69
+
+#define	SADB_X_DIAGNOSTIC_PROTO_MISMATCH	70
+#define	SADB_X_DIAGNOSTIC_INNER_PROTO_MISMATCH	71
+
+#define	SADB_X_DIAGNOSTIC_DUAL_PORT_SETS	72
+
+#define	SADB_X_DIAGNOSTIC_MAX			72
 
 /* Algorithm type for sadb_x_algdesc above... */
 
--- a/usr/src/uts/common/net/pfpolicy.h	Fri Nov 03 06:01:36 2006 -0800
+++ b/usr/src/uts/common/net/pfpolicy.h	Fri Nov 03 07:10:24 2006 -0800
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -128,9 +127,11 @@
 #define	SPD_EXT_ACTION				6
 #define	SPD_EXT_RULE				7
 #define	SPD_EXT_RULESET				8
-#define	SPD_EXT_ICMP_TYPECODE  	9
+#define	SPD_EXT_ICMP_TYPECODE  			9
 
-#define	SPD_EXT_MAX				9
+#define	SPD_EXT_TUN_NAME			10
+
+#define	SPD_EXT_MAX				10
 
 /*
  * base policy rule (attributes which every rule has)
@@ -154,6 +155,8 @@
  */
 #define	SPD_RULE_FLAG_INBOUND		0x0001
 #define	SPD_RULE_FLAG_OUTBOUND		0x0002
+/* Only applies to tunnel policy heads. */
+#define	SPD_RULE_FLAG_TUNNEL		0x0004
 
 /*
  * Address selectors.   Different from PF_KEY because we want a
@@ -365,6 +368,29 @@
 #define	SPD_ATTR_PROTO_EXEC_MODE	0x00000121
 
 /*
+ * An interface extension identifies a network interface.
+ * It is used for configuring Tunnel Mode policies on a tunnelling
+ * interface for now.
+ */
+typedef struct spd_if_s {
+	union {
+		struct {
+			uint16_t spd_if_ulen;
+			uint16_t spd_if_uexttype;
+			union {
+				uint8_t spd_if_iuname[4];
+				uint32_t spd_if_iuindex;
+			} spd_if_iu;
+		} spd_if_actual;
+		uint64_t spd_if_alignment;
+	} spd_if_u;
+#define	spd_if_len spd_if_u.spd_if_actual.spd_if_ulen
+#define	spd_if_exttype spd_if_u.spd_if_actual.spd_if_uexttype
+#define	spd_if_name spd_if_u.spd_if_actual.spd_if_iu.spd_if_iuname
+#define	spd_if_index spd_if_u.spd_if_actual.spd_if_iu.spd_if_iuindex
+} spd_if_t;
+
+/*
  * Minimum, maximum key lengths in bits.
  */
 #define	SPD_MIN_MINBITS		0x0000
@@ -483,6 +509,8 @@
 #define	SPD_DIAGNOSTIC_ALG_IPSEC_NOT_LOADED	41
 #define	SPD_DIAGNOSTIC_MALFORMED_ICMP_TYPECODE	42
 #define	SPD_DIAGNOSTIC_DUPLICATE_ICMP_TYPECODE	43
+#define	SPD_DIAGNOSTIC_NOT_GLOBAL_OP		44
+#define	SPD_DIAGNOSTIC_NO_TUNNEL_SELECTORS	45
 
 /*
  * Helper macros.