Mercurial > illumos > illumos-gate

--- a/usr/src/cmd/cmd-inet/etc/sock2path	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/cmd/cmd-inet/etc/sock2path	Thu Sep 24 07:28:12 2009 -0700
@@ -54,3 +54,5 @@
 	29	4	1	/dev/spdsock

 	31	1	0	trill
+	32	1	0	sockpfp
+	32	4	0	sockpfp
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c	Thu Sep 24 07:28:12 2009 -0700
@@ -1689,15 +1689,15 @@
 		datalen = blen;
 	}

-	if (dl.dli_srczone == ALL_ZONES)
+	if (dl.dli_zsrc == ALL_ZONES)
 		sprintf(szone, "Unknown");
 	else
-		sprintf(szone, "%llu", BE_64(dl.dli_srczone));
+		sprintf(szone, "%lu", BE_32(dl.dli_zsrc));

-	if (dl.dli_dstzone == ALL_ZONES)
+	if (dl.dli_zdst == ALL_ZONES)
 		sprintf(dzone, "Unknown");
 	else
-		sprintf(dzone, "%llu", BE_64(dl.dli_dstzone));
+		sprintf(dzone, "%lu", BE_32(dl.dli_zdst));

 	if (flags & F_SUM) {
 		(void) snprintf(get_sum_line(), MAXLINE,
@@ -1718,20 +1718,20 @@
 		(void) snprintf(get_line(0, 0), get_line_remain(),
 		    "dli_version = %d", dl.dli_version);
 		(void) snprintf(get_line(0, 0), get_line_remain(),
-		    "dli_type = %d", dl.dli_ipver);
+		    "dli_family = %d", dl.dli_family);
 		(void) snprintf(get_line(0, 2), get_line_remain(),
-		    "dli_srczone = %s", szone);
+		    "dli_zsrc = %s", szone);
 		(void) snprintf(get_line(0, 2), get_line_remain(),
-		    "dli_dstzone = %s", dzone);
+		    "dli_zdst = %s", dzone);
 		show_space();
 	}
 	memcpy(data, off, len);

-	switch (dl.dli_ipver) {
-	case IPV4_VERSION:
+	switch (dl.dli_family) {
+	case AF_INET:
 		(void) interpret_ip(flags, (struct ip *)data, len);
 		break;
-	case IPV6_VERSION:
+	case AF_INET6:
 		(void) interpret_ipv6(flags, (ip6_t *)data, len);
 		break;
 	default:
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_filter.c	Thu Sep 24 07:28:12 2009 -0700
@@ -96,8 +96,8 @@
 /*
  * Offset for the source and destination zoneid in the ipnet header.
  */
-#define	IPNET_SRCZONE_OFFSET 8
-#define	IPNET_DSTZONE_OFFSET 16
+#define	IPNET_SRCZONE_OFFSET 16
+#define	IPNET_DSTZONE_OFFSET 20

 int eaddr;	/* need ethernet addr */

@@ -1047,17 +1047,13 @@
  * byte order.
  */
 static void
-compare_value_zone(uint_t offset, uint64_t val)
+compare_value_zone(uint_t offset, uint32_t val)
 {
 	int i;

-	for (i = 0; i < sizeof (uint64_t) / 4; i++) {
-		load_const(ntohl(((uint32_t *)&val)[i]));
-		load_value(offset + i * 4, 4);
-		emitop(OP_EQ);
-		if (i != 0)
-			emitop(OP_AND);
-	}
+	load_const(ntohl(((uint32_t *)&val)[i]));
+	load_value(offset + i * 4, 4);
+	emitop(OP_EQ);
 }

 /* Emit an operator into the code array */
@@ -1728,7 +1724,7 @@
  * Match on zoneid. The arg zone passed in is in network byte order.
  */
 static void
-zone_match(enum direction which, uint64_t zone)
+zone_match(enum direction which, uint32_t zone)
 {

 	switch (which) {
@@ -2546,7 +2542,7 @@
 			next();
 			if (tokentype != NUMBER)
 				pr_err("zoneid expected");
-			zone_match(dir, BE_64((uint64_t)(tokenval)));
+			zone_match(dir, BE_32((uint32_t)(tokenval)));
 			opstack++;
 			next();
 			break;
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_pf.c	Thu Sep 24 07:28:12 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */

@@ -131,8 +131,8 @@
 };

 static network_table_t ipnet_network_mapping_table[] = {
-	{ "ip", (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION) },
-	{ "ip6", (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION) },
+	{ "ip", (DL_IPNETINFO_VERSION << 8 | AF_INET) },
+	{ "ip6", (DL_IPNETINFO_VERSION << 8 | AF_INET6) },
 	{ "NULL", -1 }

 };
@@ -157,35 +157,35 @@
 };

 static transport_table_t ipnet_transport_mapping_table[] = {
-	{IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	{IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
 	    IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	{IPPROTO_TCP, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
 	    IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	{IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
 	    IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	{IPPROTO_UDP, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
 	    IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	{IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | AF_INET),
 	    IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	{IPPROTO_OSPF, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
 	    IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	{IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
 	    IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	{IPPROTO_SCTP, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
 	    IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_ICMP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	{IPPROTO_ICMP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
 	    IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_ICMPV6, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	{IPPROTO_ICMPV6, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
 	    IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_ENCAP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	{IPPROTO_ENCAP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
 	    IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	{IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | AF_INET),
 	    IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	{IPPROTO_ESP, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
 	    IPV6_TYPE_HEADER_OFFSET},
-	{IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | IPV4_VERSION),
+	{IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | AF_INET),
 	    IPV4_TYPE_HEADER_OFFSET},
-	{IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | IPV6_VERSION),
+	{IPPROTO_AH, (DL_IPNETINFO_VERSION << 8 | AF_INET6),
 	    IPV6_TYPE_HEADER_OFFSET},
 	{-1, 0, 0}	/* must be the final entry */
 };
@@ -228,8 +228,8 @@
 #define	IPV6_SRCADDR_OFFSET	(dl.dl_link_header_len + 8)
 #define	IPV6_DSTADDR_OFFSET	(dl.dl_link_header_len + 24)

-#define	IPNET_SRCZONE_OFFSET 8
-#define	IPNET_DSTZONE_OFFSET 16
+#define	IPNET_SRCZONE_OFFSET 16
+#define	IPNET_DSTZONE_OFFSET 20

 static int inBrace = 0, inBraceOR = 0;
 static int foundOR = 0;
@@ -577,15 +577,15 @@
 }

 /*
- * Like pf_compare_value() but compare on a 64-bit zoneid value.
+ * Like pf_compare_value() but compare on a 32-bit zoneid value.
  * The argument val passed in is in network byte order.
  */
 static void
-pf_compare_zoneid(int offset, uint64_t val)
+pf_compare_zoneid(int offset, uint32_t val)
 {
 	int i;

-	for (i = 0; i < sizeof (uint64_t) / 2; i ++) {
+	for (i = 0; i < sizeof (uint32_t) / 2; i ++) {
 		pf_emit(ENF_PUSHWORD + offset / 2 + i);
 		pf_emit(ENF_PUSHLIT | ENF_EQ);
 		pf_emit(((uint16_t *)&val)[i]);
@@ -950,7 +950,7 @@
  * The zoneid passed in is in network byte order.
  */
 static void
-pf_match_zone(enum direction which, uint64_t zoneid)
+pf_match_zone(enum direction which, uint32_t zoneid)
 {
 	if (dl.dl_type != DL_IPNET)
 		pr_err("zone filter option unsupported on media");
@@ -1440,7 +1440,7 @@
 			next();
 			if (tokentype != NUMBER)
 				pr_err("zoneid expected after inet");
-			pf_match_zone(dir, BE_64((uint64_t)(tokenval)));
+			pf_match_zone(dir, BE_32((uint32_t)(tokenval)));
 			opstack++;
 			next();
 			break;
--- a/usr/src/cmd/devfsadm/misc_link.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/cmd/devfsadm/misc_link.c	Thu Sep 24 07:28:12 2009 -0700
@@ -104,7 +104,8 @@
 	    "(^ip$)|(^tcp$)|(^udp$)|(^icmp$)|(^sctp$)|"
 	    "(^ip6$)|(^tcp6$)|(^udp6$)|(^icmp6$)|(^sctp6$)|"
 	    "(^rts$)|(^arp$)|(^ipsecah$)|(^ipsecesp$)|(^keysock$)|(^spdsock$)|"
-	    "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)|(^iptunq)",
+	    "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)|(^iptunq)|"
+	    "(^bpf$)",
 	    TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name
 	},
 	{ "pseudo", "ddi_pseudo",
--- a/usr/src/cmd/truss/systable.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/cmd/truss/systable.c	Thu Sep 24 07:28:12 2009 -0700
@@ -1500,9 +1500,10 @@
 	"NCA",		/* 28 */
 	"POLICY",	/* 29 */
 	"RDS",		/* 30 */
-	"TRILL"		/* 31 */
+	"TRILL",	/* 31 */
+	"PACKET"	/* 32 */
 };
-#if MAX_AFCODES != 32
+#if MAX_AFCODES != 33
 #error Need to update address-family table
 #endif
--- a/usr/src/lib/brand/native/zone/platform.xml	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/lib/brand/native/zone/platform.xml	Thu Sep 24 07:28:12 2009 -0700
@@ -44,6 +44,7 @@

 	<!-- Devices to create under /dev -->
 	<device match="arp" />
+	<device match="bpf" />
 	<device match="conslog" />
 	<device match="cpu/self/cpuid" />
 	<device match="crypto" />
--- a/usr/src/pkgdefs/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/pkgdefs/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -377,6 +377,8 @@
 	SUNWosdem \
 	SUNWypr \
 	SUNWypu \
+	SUNWpacketh \
+	SUNWpacketu \
 	SUNWpamsc \
 	SUNWpapi \
 	SUNWpcan \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketh/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,37 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+DATAFILES += depend
+LICENSEFILES += ../../cmd/ipf/tools/IPFILTER.LICENCE
+CDDL=
+
+.KEEP_STATE:
+
+all: $(FILES)
+install: all pkg
+
+include ../Makefile.targ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketh/pkginfo.tmpl	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,56 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWpacketh"
+NAME="Solaris Packet header files"
+ARCH="ISA"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGTYPE="usr"
+MAXINST="1000"
+CATEGORY="system"
+DESC="C header files for BPF/PF_PACKET"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+CLASSES="none"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="true"
+SUNW_PKG_THISZONE="false"
+#VSTOCK="<reserved by Release Engineering for package part #>"
+#ISTATES="<developer defined>"
+#RSTATES='<developer defined>'
+#ULIMIT="<developer defined>"
+#ORDER="<developer defined>"
+#PSTAMP="<developer defined>"
+#INTONLY="<developer defined>"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketh/prototype_com	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,51 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+i depend
+#
+# source locations relative to the prototype file
+#
+# SUNWpacketh
+#
+d none usr 755 root sys
+d none usr/include 755 root bin
+d none usr/include/net 755 root bin
+f none usr/include/net/bpf.h 644 root bin
+f none usr/include/net/bpfdesc.h 644 root bin
+f none usr/include/net/dlt.h 644 root bin
+d none usr/include/netpacket 755 root bin
+f none usr/include/netpacket/packet.h 644 root bin
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketh/prototype_i386	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketh/prototype_sparc	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketu/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.com
+
+TMPLFILES += postinstall preremove
+LICENSEFILES += ../../uts/common/io/bpf/BPF.LICENCE
+CDDL=
+
+.KEEP_STATE:
+
+all: $(FILES)
+install: all pkg
+
+include ../Makefile.targ
+include ../Makefile.prtarg
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketu/pkginfo.tmpl	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,56 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWpacketu"
+NAME="Solaris Kernel Packet (Usr)"
+ARCH="ISA"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGTYPE="root"
+MAXINST="1000"
+CATEGORY="system"
+DESC="BPF/PF_PACKET kernel packet modules"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+CLASSES="none preserve manifest"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="false"
+SUNW_PKG_THISZONE="false"
+#VSTOCK="<reserved by Release Engineering for package part #>"
+#ISTATES="<developer defined>"
+#RSTATES='<developer defined>'
+#ULIMIT="<developer defined>"
+#ORDER="<developer defined>"
+#PSTAMP="<developer defined>"
+#INTONLY="<developer defined>"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketu/postinstall.tmpl	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+include drv_utils
+
+pkg_drvadd bpf
+
+exit $?
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketu/preremove.tmpl	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+include drv_utils
+
+pkg_drvrem bpf
+
+exit $?
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketu/prototype_com	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+#
+# source locations relative to the prototype file
+#
+# SUNWpacketu
+#
+d none usr 755 root sys
+d none usr/kernel 755 root sys
+d none usr/kernel/drv 755 root sys
+f none usr/kernel/drv/bpf.conf 644 root sys
+d none usr/kernel/socketmod 755 root sys
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketu/prototype_i386	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are I386 specific here
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWpacketu
+#
+f none usr/kernel/drv/bpf 755 root sys
+d none usr/kernel/drv/amd64 755 root sys
+f none usr/kernel/drv/amd64/bpf 755 root sys
+f none usr/kernel/socketmod/sockpfp 755 root sys
+d none usr/kernel/socketmod/amd64 755 root sys
+f none usr/kernel/socketmod/amd64/sockpfp 755 root sys
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWpacketu/prototype_sparc	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+#
+# List files which are SPARC specific here
+#
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWpacketu
+#
+d none usr/kernel/drv/sparcv9 755 root sys
+f none usr/kernel/drv/sparcv9/bpf 755 root sys
+d none usr/kernel/socketmod/sparcv9 755 root sys
+f none usr/kernel/socketmod/sparcv9/sockpfp 755 root sys
--- a/usr/src/tools/scripts/bfu.sh	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/tools/scripts/bfu.sh	Thu Sep 24 07:28:12 2009 -0700
@@ -809,6 +809,37 @@
 	mv -f /tmp/aac.conf.$$ $conffile
 }

+update_etc_inet_sock2path()
+{
+	#
+	# The PF_PACKET module may need to be added to the configuration
+	# file socket sockets.
+	#
+	# When being added to the system, the socket itself will remain
+	# inactive until the next reboot when soconfig is run. When being
+	# removed, the kernel configuration stays active until the system
+	# is rebooted and the sockets will continue to work until it is
+	# unloaded from the kernel, after which applications will fail.
+	#
+	sockfile=$rootprefix/etc/inet/sock2path
+	xgrep=/usr/xpg4/bin/grep
+
+	${ZCAT} ${cpiodir}/generic.usr$ZFIX | cpio -it 2>/dev/null |
+	    ${xgrep} -q sockpfp
+	if [ $? -eq 1 ] ; then
+		${xgrep} -v -E '^	32	[14]	0	sockpfp' \
+		    ${sockfile} > /tmp/sock2path.tmp.$$
+		cp /tmp/sock2path.tmp.$$ ${sockfile}
+	else
+		if ! ${xgrep} -q -E \
+		    '^	31	[14]	0	sockpfp' ${sockfile}; then
+			echo '' >> ${sockfile}
+			echo '	32	1	0	sockpfp' >> ${sockfile}
+			echo '	32	4	0	sockpfp' >> ${sockfile}
+		fi
+	fi
+}
+
 # update x86 version mpt.conf for property tape
 mpttapeprop='[ 	]*tape[ 	]*=[ 	]*"sctp"[ 	]*;'
 update_mptconf_i386()
@@ -8525,6 +8556,8 @@

 	update_aac_conf

+	update_etc_inet_sock2path
+
 	if [ $target_isa = i386 ]; then
 	    update_mptconf_i386
--- a/usr/src/uts/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -140,6 +140,8 @@
 		common/inet/ipf/netinet \
 		common/inet/kssl \
 		common/inet/nca \
+		common/inet/sockmods/netpacket \
+		common/io/bpf/net \
 		common/ipp \
 		common/net \
 		common/netinet \
--- a/usr/src/uts/Makefile.targ	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/Makefile.targ	Thu Sep 24 07:28:12 2009 -0700
@@ -221,6 +221,9 @@
 $(USR_SCHED_DIR)/%:	$(OBJS_DIR)/% $(USR_SCHED_DIR) FRC
 	$(INS.file)

+$(USR_SOCK_DIR)/%:	$(OBJS_DIR)/% $(USR_SOCK_DIR) FRC
+	$(INS.file)
+
 $(USR_STRMOD_DIR)/%:	$(OBJS_DIR)/% $(USR_STRMOD_DIR) FRC
 	$(INS.file)
--- a/usr/src/uts/Makefile.uts	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/Makefile.uts	Thu Sep 24 07:28:12 2009 -0700
@@ -557,6 +557,7 @@
 USR_MOD_DIRS_32		+= $(USR_MISC_DIR_32) $(USR_DACF_DIR_32)
 USR_MOD_DIRS_32		+= $(USR_PCBE_DIR_32)
 USR_MOD_DIRS_32		+= $(USR_DTRACE_DIR_32) $(USR_BRAND_DIR_32)
+USR_MOD_DIRS_32		+= $(USR_SOCK_DIR_32)

 #
 #
--- a/usr/src/uts/common/Makefile.files	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/Makefile.files	Thu Sep 24 07:28:12 2009 -0700
@@ -541,7 +541,7 @@

 KEYSOCK_OBJS +=	keysockddi.o keysock.o keysock_opt_data.o

-IPNET_OBJS += ipnet.o
+IPNET_OBJS +=	ipnet.o ipnet_bpf.o

 SPDSOCK_OBJS += spdsockddi.o spdsock.o spdsock_opt_data.o

@@ -572,6 +572,8 @@

 SCTP_SOCK_MOD_OBJS += sockmod_sctp.o socksctp.o socksctpsubr.o

+PFP_SOCK_MOD_OBJS += sockmod_pfp.o
+
 RDS_OBJS +=	rdsddi.o rdssubr.o rds_opt.o rds_ioctl.o

 RDSIB_OBJS +=	rdsib.o rdsib_ib.o rdsib_cm.o rdsib_ep.o rdsib_buf.o \
@@ -596,6 +598,8 @@

 DUMP_OBJS +=	dump.o

+BPF_OBJS +=	bpf.o bpf_filter.o bpf_mod.o bpf_dlt.o bpf_mac.o
+
 CLONE_OBJS +=	clone.o

 CN_OBJS +=	cons.o
@@ -1218,7 +1222,8 @@
 SOCK_OBJS +=	socksubr.o	sockvfsops.o	sockparams.o	\
 		socksyscalls.o	socktpi.o	sockstr.o	sockssl.o \
 		sockcommon_vnops.o	sockcommon_subr.o \
-		sockcommon_sops.o	sockcommon.o		socknotify.o \
+		sockcommon_sops.o	sockcommon.o	\
+		sock_notsupp.o	socknotify.o \
 		nl7c.o		nl7curi.o	nl7chttp.o	nl7clogd.o \
 		nl7cnca.o	sodirect.o
--- a/usr/src/uts/common/Makefile.rules	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/Makefile.rules	Thu Sep 24 07:28:12 2009 -0700
@@ -620,6 +620,10 @@
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)

+$(OBJS_DIR)/%.o:		$(UTSBASE)/common/io/bpf/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 $(OBJS_DIR)/%.o:		$(UTSBASE)/common/io/cardbus/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
@@ -1882,6 +1886,9 @@
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/common/io/bfe/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))

+$(LINTS_DIR)/%.ln:		$(UTSBASE)/common/io/bpf/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/common/io/bge/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/fs/sockfs/sock_notsupp.c	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,178 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/socket_proto.h>
+
+
+/*ARGSUSED*/
+int
+sock_accept_notsupp(sock_lower_handle_t low1, sock_lower_handle_t low2,
+    sock_upper_handle_t upper, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_bind_notsupp(sock_lower_handle_t handle, struct sockaddr *name,
+    socklen_t namelen, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_listen_notsupp(sock_lower_handle_t handle, int backlog,
+    struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_connect_notsupp(sock_lower_handle_t handle,
+    const struct sockaddr *name, socklen_t namelen, sock_connid_t *conp,
+    struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_getsockname_notsupp(sock_lower_handle_t handle, struct sockaddr *sa,
+    socklen_t *len, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_getpeername_notsupp(sock_lower_handle_t handle, struct sockaddr *addr,
+    socklen_t *addrlen, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_getsockopt_notsupp(sock_lower_handle_t handle, int level,
+    int option_name, void *optval, socklen_t *optlenp, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_setsockopt_notsupp(sock_lower_handle_t handle, int level,
+    int option_name, const void *optval, socklen_t optlen, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_send_notsupp(sock_lower_handle_t handle, mblk_t *mp,
+    struct msghdr *msg, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_senduio_notsupp(sock_lower_handle_t handle, struct uio *uiop,
+    struct nmsghdr *msg, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_recvuio_notsupp(sock_lower_handle_t handle, struct uio *uiop,
+    struct nmsghdr *msg, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+short
+sock_poll_notsupp(sock_lower_handle_t handle, short events, int anyyet,
+    cred_t *cred)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+int
+sock_shutdown_notsupp(sock_lower_handle_t handle, int how, struct cred *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+/*ARGSUSED*/
+void
+sock_clr_flowctrl_notsupp(sock_lower_handle_t proto_handle)
+{
+}
+
+/*ARGSUSED*/
+int
+sock_ioctl_notsupp(sock_lower_handle_t handle, int cmd, intptr_t arg,
+    int mode, int32_t *rvalp, cred_t *cred)
+{
+	return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+int
+sock_close_notsupp(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
+{
+	return (EOPNOTSUPP);
+}
+
+sock_downcalls_t sock_down_notsupp = {
+	NULL,
+	sock_accept_notsupp,
+	sock_bind_notsupp,
+	sock_listen_notsupp,
+	sock_connect_notsupp,
+	sock_getpeername_notsupp,
+	sock_getsockname_notsupp,
+	sock_getsockopt_notsupp,
+	sock_setsockopt_notsupp,
+	sock_send_notsupp,
+	sock_senduio_notsupp,
+	sock_recvuio_notsupp,
+	sock_poll_notsupp,
+	sock_shutdown_notsupp,
+	sock_clr_flowctrl_notsupp,
+	sock_ioctl_notsupp,
+	sock_close_notsupp,
+};
--- a/usr/src/uts/common/inet/ip.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/ip.h	Thu Sep 24 07:28:12 2009 -0700
@@ -3531,56 +3531,6 @@
 #endif

 /*
- * IP observability hook support
- */
-
-/*
- * ipobs_hooktype_t describes the hook types supported
- * by the ip module. IPOBS_HOOK_LOCAL refers to packets
- * which are looped back internally within the ip module.
- */
-
-typedef enum ipobs_hook_type {
-	IPOBS_HOOK_LOCAL,
-	IPOBS_HOOK_OUTBOUND,
-	IPOBS_HOOK_INBOUND
-} ipobs_hook_type_t;
-
-typedef void ipobs_cbfunc_t(mblk_t *);
-
-typedef struct ipobs_cb {
-	ipobs_cbfunc_t	*ipobs_cbfunc;
-	list_node_t	ipobs_cbnext;
-} ipobs_cb_t;
-
-/*
- * This structure holds the data passed back from the ip module to
- * observability consumers.
- *
- * ihd_mp	  Pointer to the IP packet.
- * ihd_zsrc	  Source zoneid; set to ALL_ZONES when unknown.
- * ihd_zdst	  Destination zoneid; set to ALL_ZONES when unknown.
- * ihd_htype	  IPobs hook type, see above for the defined types.
- * ihd_ipver	  IP version of the packet.
- * ihd_ifindex	  Interface index that the packet was received/sent over.
- *		  For local packets, this is the index of the interface
- *		  associated with the local destination address.
- * ihd_grifindex  IPMP group interface index (zero unless ihd_ifindex
- *		  is an IPMP underlying interface).
- * ihd_stack	  Netstack the packet is from.
- */
-typedef struct ipobs_hook_data {
-	mblk_t			*ihd_mp;
-	zoneid_t		ihd_zsrc;
-	zoneid_t		ihd_zdst;
-	ipobs_hook_type_t	ihd_htype;
-	uint16_t		ihd_ipver;
-	uint64_t		ihd_ifindex;
-	uint64_t 		ihd_grifindex;
-	netstack_t		*ihd_stack;
-} ipobs_hook_data_t;
-
-/*
  * Per-ILL Multidata Transmit capabilities.
  */
 struct ill_mdt_capab_s {
@@ -3725,10 +3675,10 @@
 extern int	ip_fill_mtuinfo(struct in6_addr *, in_port_t,
 	struct ip6_mtuinfo *, netstack_t *);
 extern	ipif_t *conn_get_held_ipif(conn_t *, ipif_t **, int *);
-extern void ipobs_register_hook(netstack_t *, ipobs_cbfunc_t *);
-extern void ipobs_unregister_hook(netstack_t *, ipobs_cbfunc_t *);
-extern void ipobs_hook(mblk_t *, int, zoneid_t, zoneid_t, const ill_t *, int,
-    uint32_t, ip_stack_t *);
+extern hook_t *ipobs_register_hook(netstack_t *, pfv_t);
+extern void ipobs_unregister_hook(netstack_t *, hook_t *);
+extern void ipobs_hook(mblk_t *, int, zoneid_t, zoneid_t, const ill_t *,
+    ip_stack_t *);
 typedef void    (*ipsq_func_t)(ipsq_t *, queue_t *, mblk_t *, void *);

 /*
--- a/usr/src/uts/common/inet/ip/ip.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/ip/ip.c	Thu Sep 24 07:28:12 2009 -0700
@@ -5745,6 +5745,7 @@
 	 * protocols are going away have been run, meaning that we can
 	 * now set about starting to clean things up.
 	 */
+	ipobs_fini(ipst);
 	ipv4_hook_destroy(ipst);
 	ipv6_hook_destroy(ipst);
 	ip_net_destroy(ipst);
@@ -5829,7 +5830,6 @@
 	mutex_destroy(&ipst->ips_ip_addr_avail_lock);
 	rw_destroy(&ipst->ips_ill_g_lock);

-	ipobs_fini(ipst);
 	ip_ire_fini(ipst);
 	ip6_asp_free(ipst);
 	conn_drain_fini(ipst);
@@ -6032,11 +6032,11 @@
 	ipst->ips_ip_src_id = 1;
 	rw_init(&ipst->ips_srcid_lock, NULL, RW_DEFAULT, NULL);

-	ipobs_init(ipst);
 	ip_net_init(ipst, ns);
 	ipv4_hook_init(ipst);
 	ipv6_hook_init(ipst);
 	ipmp_init(ipst);
+	ipobs_init(ipst);

 	/*
 	 * Create the taskq dispatcher thread and initialize related stuff.
@@ -13957,13 +13957,20 @@
 	    ip6_t *, NULL, int, 0);

 	if (mp != NULL) {
-		if (ipst->ips_ipobs_enabled) {
+		if (ipst->ips_ip4_observe.he_interested) {
 			zoneid_t szone;

 			szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
 			    ipst, ALL_ZONES);
+			/*
+			 * The IP observability hook expects b_rptr to be
+			 * where the IP header starts, so advance past the
+			 * link layer header.
+			 */
+			mp->b_rptr += hlen;
 			ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
-			    ALL_ZONES, ill, IPV4_VERSION, hlen, ipst);
+			    ALL_ZONES, ill, ipst);
+			mp->b_rptr -= hlen;
 		}
 		ILL_SEND_TX(stq_ill, ire, dst, mp, IP_DROP_ON_NO_DESC, NULL);
 	}
@@ -15046,7 +15053,7 @@
 			continue;
 		}

-		if (ipst->ips_ipobs_enabled) {
+		if (ipst->ips_ip4_observe.he_interested) {
 			zoneid_t dzone;

 			/*
@@ -15055,7 +15062,7 @@
 			 */
 			dzone = ip_get_zoneid_v4(dst, mp, ipst, ALL_ZONES);
 			ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone,
-			    ill, IPV4_VERSION, 0, ipst);
+			    ill, ipst);
 		}

 		/*
@@ -22495,7 +22502,7 @@
 		if (mp == NULL)
 			goto release_ire_and_ill;

-		if (ipst->ips_ipobs_enabled) {
+		if (ipst->ips_ip4_observe.he_interested) {
 			zoneid_t szone;

 			/*
@@ -22506,7 +22513,7 @@
 			szone = ip_get_zoneid_v4(ipha->ipha_src, mp, ipst,
 			    ALL_ZONES);
 			ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, ALL_ZONES,
-			    ire->ire_ipif->ipif_ill, IPV4_VERSION, 0, ipst);
+			    ire->ire_ipif->ipif_ill, ipst);
 		}
 		mp->b_prev = SET_BPREV_FLAG(IPP_LOCAL_OUT);
 		DTRACE_PROBE2(ip__xmit__1, mblk_t *, mp, ire_t *, ire);
@@ -24901,7 +24908,7 @@
 	if (first_mp == NULL)
 		return;

-	if (ipst->ips_ipobs_enabled) {
+	if (ipst->ips_ip4_observe.he_interested) {
 		zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES;
 		zoneid_t stackzoneid = netstackid_to_zoneid(
 		    ipst->ips_netstack->netstack_stackid);
@@ -24915,8 +24922,7 @@
 			lookup_zoneid = zoneid;
 		szone = ip_get_zoneid_v4(ipha->ipha_src, mp, ipst,
 		    lookup_zoneid);
-		ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
-		    IPV4_VERSION, 0, ipst);
+		ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst);
 	}

 	DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *,
@@ -29805,121 +29811,81 @@
 /*
  * IP obserability hook support functions.
  */
-
 static void
 ipobs_init(ip_stack_t *ipst)
 {
-	ipst->ips_ipobs_enabled = B_FALSE;
-	list_create(&ipst->ips_ipobs_cb_list, sizeof (ipobs_cb_t),
-	    offsetof(ipobs_cb_t, ipobs_cbnext));
-	mutex_init(&ipst->ips_ipobs_cb_lock, NULL, MUTEX_DEFAULT, NULL);
-	ipst->ips_ipobs_cb_nwalkers = 0;
-	cv_init(&ipst->ips_ipobs_cb_cv, NULL, CV_DRIVER, NULL);
+	netid_t id;
+
+	id = net_getnetidbynetstackid(ipst->ips_netstack->netstack_stackid);
+
+	ipst->ips_ip4_observe_pr = net_protocol_lookup(id, NHF_INET);
+	VERIFY(ipst->ips_ip4_observe_pr != NULL);
+
+	ipst->ips_ip6_observe_pr = net_protocol_lookup(id, NHF_INET6);
+	VERIFY(ipst->ips_ip6_observe_pr != NULL);
 }

 static void
 ipobs_fini(ip_stack_t *ipst)
 {
-	ipobs_cb_t *cb;
-
-	mutex_enter(&ipst->ips_ipobs_cb_lock);
-	while (ipst->ips_ipobs_cb_nwalkers != 0)
-		cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
-
-	while ((cb = list_head(&ipst->ips_ipobs_cb_list)) != NULL) {
-		list_remove(&ipst->ips_ipobs_cb_list, cb);
-		kmem_free(cb, sizeof (*cb));
-	}
-	list_destroy(&ipst->ips_ipobs_cb_list);
-	mutex_exit(&ipst->ips_ipobs_cb_lock);
-	mutex_destroy(&ipst->ips_ipobs_cb_lock);
-	cv_destroy(&ipst->ips_ipobs_cb_cv);
-}
-
+
+	net_protocol_release(ipst->ips_ip4_observe_pr);
+	net_protocol_release(ipst->ips_ip6_observe_pr);
+}
+
+/*
+ * hook_pkt_observe_t is composed in network byte order so that the
+ * entire mblk_t chain handed into hook_run can be used as-is.
+ * The caveat is that use of the fields, such as the zone fields,
+ * requires conversion into host byte order first.
+ */
 void
 ipobs_hook(mblk_t *mp, int htype, zoneid_t zsrc, zoneid_t zdst,
-    const ill_t *ill, int ipver, uint32_t hlen, ip_stack_t *ipst)
-{
-	mblk_t *mp2;
-	ipobs_cb_t *ipobs_cb;
-	ipobs_hook_data_t *ihd;
-	uint64_t grifindex = 0;
+    const ill_t *ill, ip_stack_t *ipst)
+{
+	hook_pkt_observe_t *hdr;
+	uint64_t grifindex;
+	mblk_t *imp;
+
+	imp = allocb(sizeof (*hdr), BPRI_HI);
+	if (imp == NULL)
+		return;
+
+	hdr = (hook_pkt_observe_t *)imp->b_rptr;
+	/*
+	 * b_wptr is set to make the apparent size of the data in the mblk_t
+	 * to exclude the pointers at the end of hook_pkt_observer_t.
+	 */
+	imp->b_wptr = imp->b_rptr + sizeof (dl_ipnetinfo_t);
+	imp->b_cont = mp;

 	ASSERT(DB_TYPE(mp) == M_DATA);

 	if (IS_UNDER_IPMP(ill))
 		grifindex = ipmp_ill_get_ipmp_ifindex(ill);
-
-	mutex_enter(&ipst->ips_ipobs_cb_lock);
-	ipst->ips_ipobs_cb_nwalkers++;
-	mutex_exit(&ipst->ips_ipobs_cb_lock);
-	for (ipobs_cb = list_head(&ipst->ips_ipobs_cb_list); ipobs_cb != NULL;
-	    ipobs_cb = list_next(&ipst->ips_ipobs_cb_list, ipobs_cb)) {
-		mp2 = allocb(sizeof (ipobs_hook_data_t), BPRI_HI);
-		if (mp2 != NULL) {
-			ihd = (ipobs_hook_data_t *)mp2->b_rptr;
-			if (((ihd->ihd_mp = dupmsg(mp)) == NULL) &&
-			    ((ihd->ihd_mp = copymsg(mp)) == NULL)) {
-				freemsg(mp2);
-				continue;
-			}
-			ihd->ihd_mp->b_rptr += hlen;
-			ihd->ihd_htype = htype;
-			ihd->ihd_ipver = ipver;
-			ihd->ihd_zsrc = zsrc;
-			ihd->ihd_zdst = zdst;
-			ihd->ihd_ifindex = ill->ill_phyint->phyint_ifindex;
-			ihd->ihd_grifindex = grifindex;
-			ihd->ihd_stack = ipst->ips_netstack;
-			mp2->b_wptr += sizeof (*ihd);
-			ipobs_cb->ipobs_cbfunc(mp2);
-		}
-	}
-	mutex_enter(&ipst->ips_ipobs_cb_lock);
-	ipst->ips_ipobs_cb_nwalkers--;
-	if (ipst->ips_ipobs_cb_nwalkers == 0)
-		cv_broadcast(&ipst->ips_ipobs_cb_cv);
-	mutex_exit(&ipst->ips_ipobs_cb_lock);
-}
-
-void
-ipobs_register_hook(netstack_t *ns, pfv_t func)
-{
-	ipobs_cb_t   *cb;
-	ip_stack_t *ipst = ns->netstack_ip;
-
-	cb = kmem_alloc(sizeof (*cb), KM_SLEEP);
-
-	mutex_enter(&ipst->ips_ipobs_cb_lock);
-	while (ipst->ips_ipobs_cb_nwalkers != 0)
-		cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
-	ASSERT(ipst->ips_ipobs_cb_nwalkers == 0);
-
-	cb->ipobs_cbfunc = func;
-	list_insert_head(&ipst->ips_ipobs_cb_list, cb);
-	ipst->ips_ipobs_enabled = B_TRUE;
-	mutex_exit(&ipst->ips_ipobs_cb_lock);
-}
-
-void
-ipobs_unregister_hook(netstack_t *ns, pfv_t func)
-{
-	ipobs_cb_t	*curcb;
-	ip_stack_t	*ipst = ns->netstack_ip;
-
-	mutex_enter(&ipst->ips_ipobs_cb_lock);
-	while (ipst->ips_ipobs_cb_nwalkers != 0)
-		cv_wait(&ipst->ips_ipobs_cb_cv, &ipst->ips_ipobs_cb_lock);
-
-	for (curcb = list_head(&ipst->ips_ipobs_cb_list); curcb != NULL;
-	    curcb = list_next(&ipst->ips_ipobs_cb_list, curcb)) {
-		if (func == curcb->ipobs_cbfunc) {
-			list_remove(&ipst->ips_ipobs_cb_list, curcb);
-			kmem_free(curcb, sizeof (*curcb));
-			break;
-		}
-	}
-	if (list_is_empty(&ipst->ips_ipobs_cb_list))
-		ipst->ips_ipobs_enabled = B_FALSE;
-	mutex_exit(&ipst->ips_ipobs_cb_lock);
-}
+	else
+		grifindex = 0;
+
+	hdr->hpo_version = 1;
+	hdr->hpo_htype = htype;
+	hdr->hpo_pktlen = htons((ushort_t)msgdsize(mp));
+	hdr->hpo_ifindex = htonl(ill->ill_phyint->phyint_ifindex);
+	hdr->hpo_grifindex = htonl(grifindex);
+	hdr->hpo_zsrc = htonl(zsrc);
+	hdr->hpo_zdst = htonl(zdst);
+	hdr->hpo_pkt = imp;
+	hdr->hpo_ctx = ipst->ips_netstack;
+
+	if (ill->ill_isv6) {
+		hdr->hpo_family = AF_INET6;
+		(void) hook_run(ipst->ips_ipv6_net_data->netd_hooks,
+		    ipst->ips_ipv6observing, (hook_data_t)hdr);
+	} else {
+		hdr->hpo_family = AF_INET;
+		(void) hook_run(ipst->ips_ipv4_net_data->netd_hooks,
+		    ipst->ips_ipv4observing, (hook_data_t)hdr);
+	}
+
+	imp->b_cont = NULL;
+	freemsg(imp);
+}
--- a/usr/src/uts/common/inet/ip/ip6.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/ip/ip6.c	Thu Sep 24 07:28:12 2009 -0700
@@ -6563,13 +6563,13 @@
 	}

 	/* IP observability hook. */
-	if (ipst->ips_ipobs_enabled) {
+	if (ipst->ips_ip6_observe.he_interested) {
 		zoneid_t dzone;

 		dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst,
 		    ALL_ZONES);
-		ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill,
-		    IPV6_VERSION, 0, ipst);
+		ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone,
+		    ill, ipst);
 	}

 	if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) ==
@@ -10179,7 +10179,7 @@
 	if (first_mp == NULL)
 		return;

-	if (ipst->ips_ipobs_enabled) {
+	if (ipst->ips_ip6_observe.he_interested) {
 		zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES;
 		zoneid_t stackzoneid = netstackid_to_zoneid(
 		    ipst->ips_netstack->netstack_stackid);
@@ -10194,8 +10194,7 @@
 			lookup_zoneid = zoneid;
 		dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst,
 		    lookup_zoneid);
-		ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
-		    IPV6_VERSION, 0, ipst);
+		ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst);
 	}

 	DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *,
@@ -11885,14 +11884,23 @@
 				}
 			}

-			if (ipst->ips_ipobs_enabled) {
+			if (ipst->ips_ip6_observe.he_interested) {
 				zoneid_t	szone;

 				szone = ip_get_zoneid_v6(&ip6h->ip6_src,
 				    mp_ip6h, out_ill, ipst, ALL_ZONES);
+
+				/*
+				 * The IP observability hook expects b_rptr to
+				 * be where the IPv6 header starts, so advance
+				 * past the link layer header.
+				 */
+				if (fp_prepend)
+					mp_ip6h->b_rptr += hlen;
 				ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone,
-				    ALL_ZONES, out_ill, IPV6_VERSION,
-				    fp_prepend ? hlen : 0, ipst);
+				    ALL_ZONES, out_ill, ipst);
+				if (fp_prepend)
+					mp_ip6h->b_rptr -= hlen;
 			}

 			/*
--- a/usr/src/uts/common/inet/ip/ip_netinfo.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/ip/ip_netinfo.c	Thu Sep 24 07:28:12 2009 -0700
@@ -311,6 +311,16 @@
 		cmn_err(CE_NOTE, "ipv4_hook_init: "
 		    "net_event_register failed for ipv4/nic_events");
 	}
+
+	HOOK_EVENT_INIT(&ipst->ips_ip4_observe, NH_OBSERVE);
+	ipst->ips_ip4_observe.he_flags = HOOK_RDONLY;
+	ipst->ips_ipv4observing = net_event_register(
+	    ipst->ips_ipv4_net_data, &ipst->ips_ip4_observe);
+	if (ipst->ips_ipv4observing == NULL) {
+		cmn_err(CE_NOTE, "ipv4_hook_init: "
+		    "net_event_register failed for ipv4/observe");
+	}
+
 }

 void
@@ -346,6 +356,11 @@
 		    &ipst->ips_ip4_nic_events);
 	}

+	if (ipst->ips_ipv4observing != NULL) {
+		(void) net_event_shutdown(ipst->ips_ipv4_net_data,
+		    &ipst->ips_ip4_observe);
+	}
+
 	(void) net_family_shutdown(ipst->ips_ipv4_net_data,
 	    &ipst->ips_ipv4root);
 }
@@ -389,6 +404,12 @@
 			ipst->ips_ipv4nicevents = NULL;
 	}

+	if (ipst->ips_ipv4observing != NULL) {
+		if (net_event_unregister(ipst->ips_ipv4_net_data,
+		    &ipst->ips_ip4_observe) == 0)
+			ipst->ips_ipv4observing = NULL;
+	}
+
 	(void) net_family_unregister(ipst->ips_ipv4_net_data,
 	    &ipst->ips_ipv4root);
 }
@@ -455,6 +476,15 @@
 		cmn_err(CE_NOTE, "ipv6_hook_init: "
 		    "net_event_register failed for ipv6/nic_events");
 	}
+
+	HOOK_EVENT_INIT(&ipst->ips_ip6_observe, NH_OBSERVE);
+	ipst->ips_ip6_observe.he_flags = HOOK_RDONLY;
+	ipst->ips_ipv6observing = net_event_register(
+	    ipst->ips_ipv6_net_data, &ipst->ips_ip6_observe);
+	if (ipst->ips_ipv6observing == NULL) {
+		cmn_err(CE_NOTE, "ipv6_hook_init: "
+		    "net_event_register failed for ipv6/observe");
+	}
 }

 void
@@ -490,6 +520,11 @@
 		    &ipst->ips_ip6_nic_events);
 	}

+	if (ipst->ips_ipv6observing != NULL) {
+		(void) net_event_shutdown(ipst->ips_ipv6_net_data,
+		    &ipst->ips_ip6_observe);
+	}
+
 	(void) net_family_shutdown(ipst->ips_ipv6_net_data,
 	    &ipst->ips_ipv6root);
 }
@@ -533,6 +568,12 @@
 			ipst->ips_ipv6nicevents = NULL;
 	}

+	if (ipst->ips_ipv6observing != NULL) {
+		if (net_event_unregister(ipst->ips_ipv6_net_data,
+		    &ipst->ips_ip6_observe) == 0)
+			ipst->ips_ipv6observing = NULL;
+	}
+
 	(void) net_family_unregister(ipst->ips_ipv6_net_data,
 	    &ipst->ips_ipv6root);
 }
@@ -1424,18 +1465,33 @@
 	    neti->netd_stack->nts_netstack->netstack_ip, zoneid));
 }

+/*
+ * The behaviour here mirrors that for the SIOCFLIFFLAGS ioctl where the
+ * union of all of the relevant flags is returned.
+ */
 static int
 ip_getlifflags_impl(sa_family_t family, phy_if_t phy_ifdata, lif_if_t ifdata,
     ip_stack_t *ipst, uint64_t *flags)
 {
+	phyint_t *phyi;
 	ipif_t *ipif;
+	ill_t *ill;
+
+	ill = ill_lookup_on_ifindex(phy_ifdata,
+	    (family == AF_INET6), NULL, NULL, NULL, NULL, ipst);
+	if (ill == NULL)
+		return (-1);
+	phyi = ill->ill_phyint;

 	ipif = ipif_getby_indexes((uint_t)phy_ifdata,
 	    UNMAP_IPIF_ID((uint_t)ifdata), (family == AF_INET6), ipst);
-	if (ipif == NULL)
+	if (ipif == NULL) {
+		ill_refrele(ill);
 		return (-1);
-	*flags = ipif->ipif_flags;
+	}
+	*flags = ipif->ipif_flags | ill->ill_flags | phyi->phyint_flags;
 	ipif_refrele(ipif);
+	ill_refrele(ill);
 	return (0);
 }
--- a/usr/src/uts/common/inet/ip_stack.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/ip_stack.h	Thu Sep 24 07:28:12 2009 -0700
@@ -386,6 +386,9 @@
 	hook_family_t	ips_ipv4root;
 	hook_family_t	ips_ipv6root;

+	net_handle_t		ips_ipv4_net_data;
+	net_handle_t		ips_ipv6_net_data;
+
 	/*
 	 * Hooks for firewalling
 	 */
@@ -394,35 +397,34 @@
 	hook_event_t		ips_ip4_forwarding_event;
 	hook_event_t		ips_ip4_loopback_in_event;
 	hook_event_t		ips_ip4_loopback_out_event;
-	hook_event_t		ips_ip4_nic_events;
 	hook_event_t		ips_ip6_physical_in_event;
 	hook_event_t		ips_ip6_physical_out_event;
 	hook_event_t		ips_ip6_forwarding_event;
 	hook_event_t		ips_ip6_loopback_in_event;
 	hook_event_t		ips_ip6_loopback_out_event;
-	hook_event_t		ips_ip6_nic_events;

 	hook_event_token_t	ips_ipv4firewall_physical_in;
 	hook_event_token_t	ips_ipv4firewall_physical_out;
 	hook_event_token_t	ips_ipv4firewall_forwarding;
 	hook_event_token_t	ips_ipv4firewall_loopback_in;
 	hook_event_token_t	ips_ipv4firewall_loopback_out;
-	hook_event_token_t	ips_ipv4nicevents;
 	hook_event_token_t	ips_ipv6firewall_physical_in;
 	hook_event_token_t	ips_ipv6firewall_physical_out;
 	hook_event_token_t	ips_ipv6firewall_forwarding;
 	hook_event_token_t	ips_ipv6firewall_loopback_in;
 	hook_event_token_t	ips_ipv6firewall_loopback_out;
+
+	hook_event_t		ips_ip4_nic_events;
+	hook_event_t		ips_ip6_nic_events;
+	hook_event_token_t	ips_ipv4nicevents;
 	hook_event_token_t	ips_ipv6nicevents;

-	net_handle_t		ips_ipv4_net_data;
-	net_handle_t		ips_ipv6_net_data;
-
-	boolean_t		ips_ipobs_enabled;
-	list_t			ips_ipobs_cb_list;
-	kmutex_t		ips_ipobs_cb_lock;
-	uint_t			ips_ipobs_cb_nwalkers;
-	kcondvar_t		ips_ipobs_cb_cv;
+	net_handle_t		ips_ip4_observe_pr;
+	net_handle_t		ips_ip6_observe_pr;
+	hook_event_t		ips_ip4_observe;
+	hook_event_t		ips_ip6_observe;
+	hook_event_token_t	ips_ipv4observing;
+	hook_event_token_t	ips_ipv6observing;

 	struct __ldi_ident	*ips_ldi_ident;
--- a/usr/src/uts/common/inet/ipnet.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/ipnet.h	Thu Sep 24 07:28:12 2009 -0700
@@ -20,7 +20,7 @@
  */

 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */

@@ -36,11 +36,39 @@
 #include <sys/list.h>
 #include <netinet/in.h>
 #include <net/if.h>
+#include <net/bpf.h>
 #include <sys/avl.h>
 #include <sys/neti.h>
+#include <sys/hook_event.h>
+#include <sys/zone.h>
+#include <sys/kstat.h>
+
+typedef struct ipnet_kstats_s	{
+	kstat_named_t	ik_duplicationFail;
+	kstat_named_t	ik_dispatchOk;
+	kstat_named_t	ik_dispatchFail;
+	kstat_named_t	ik_dispatchHeaderDrop;
+	kstat_named_t	ik_dispatchDupDrop;
+	kstat_named_t	ik_dispatchPutDrop;
+	kstat_named_t	ik_dispatchDeliver;
+	kstat_named_t	ik_acceptOk;
+	kstat_named_t	ik_acceptFail;
+} ipnet_kstats_t;
+
+#define	IPSK_BUMP(_x, _y)	(_x)->ips_stats._y.value.ui64++

 /*
  * Structure used to hold information for both IPv4 and IPv6 addresses.
+ *
+ * When ifa_shared is non-NULL, it points to a "fake" ipnetif_t structure
+ * that represents the network interface for each zone that shares its
+ * network stack. This is used by BPF to build a list of interface names
+ * present in each zone. Multiple ipnetif_addr_t's may point to a single
+ * ipnetif_t using ifa_shared. The typical case is the global zone has
+ * a bge0 that other zones use as bge0:1, bge0:2, etc. In ipnet, the
+ * ipnetif_addr_t's that store the IP address for bge0:1, etc, would
+ * point to an ipnetif_t stored in the if_avl_by_shared tree that has
+ * the name "bge0".
  */
 typedef struct ipnetif_addr {
 	union {
@@ -51,6 +79,7 @@
 	zoneid_t	ifa_zone;
 	uint64_t	ifa_id;
 	list_node_t	ifa_link;
+	struct ipnetif	*ifa_shared;
 } ipnetif_addr_t;
 #define	ifa_ip4addr	ifa_addr.ifau_ip4addr
 #define	ifa_ip6addr	ifa_addr.ifau_ip6addr
@@ -60,11 +89,19 @@
  * The structure holds both IPv4 and IPv6 addresses, the address lists are
  * protected by a mutex. The ipnetif structures are held per stack instance
  * within avl trees indexed on name and ip index.
+ *
+ * if_avl_by_shared is used by zones that share their instance of IP with
+ * other zones. It is used to store ipnetif_t structures. An example of this
+ * is the global zone sharing its instance of IP with other local zones.
+ * In this case, if_avl_by_shared is a tree of names that are in active use
+ * by zones using a shared instance of IP.
+ * The value in if_sharecnt represents the number of ipnetif_addr_t's that
+ * point to it.
  */
 typedef struct ipnetif {
 	char		if_name[LIFNAMSIZ];
 	uint_t		if_flags;
-	uint64_t	if_index;
+	uint_t		if_index;
 	kmutex_t	if_addr_lock;	/* protects both addr lists */
 	list_t		if_ip4addr_list;
 	list_t		if_ip6addr_list;
@@ -73,7 +110,11 @@
 	dev_t		if_dev;
 	uint_t		if_multicnt;	/* protected by ips_event_lock */
 	kmutex_t	if_reflock;	/* protects if_refcnt */
-	uint_t		if_refcnt;
+	int		if_refcnt;	/* if_reflock */
+	zoneid_t	if_zoneid;
+	avl_node_t	if_avl_by_shared;	/* protected by ips_avl_lock */
+	struct ipnet_stack *if_stackp;
+	int		if_sharecnt;	/* protected by if_reflock */
 } ipnetif_t;

 /* if_flags */
@@ -81,6 +122,7 @@
 #define	IPNETIF_IPV6PLUMBED	0x02
 #define	IPNETIF_IPV4ALLMULTI	0x04
 #define	IPNETIF_IPV6ALLMULTI	0x08
+#define	IPNETIF_LOOPBACK	0x10

 /*
  * Structure used by the accept callback function.  This is simply an address
@@ -99,7 +141,7 @@

 struct ipnet;
 struct ipobs_hook_data;
-typedef boolean_t ipnet_acceptfn_t(struct ipnet *, struct ipobs_hook_data *,
+typedef boolean_t ipnet_acceptfn_t(struct ipnet *, struct hook_pkt_observe_s *,
     ipnet_addrp_t *, ipnet_addrp_t *);

 /*
@@ -111,12 +153,14 @@
 	minor_t		ipnet_minor;	/* minor number for this instance */
 	ipnetif_t	*ipnet_if;	/* ipnetif for this open instance */
 	zoneid_t	ipnet_zoneid;	/* zoneid the device was opened in */
-	uint16_t	ipnet_flags;	/* see below */
-	t_scalar_t	ipnet_sap;	/* sap this instance is bound to */
+	uint_t		ipnet_flags;	/* see below */
+	t_scalar_t	ipnet_family;	/* protocol family of this instance */
 	t_uscalar_t	ipnet_dlstate;	/* dlpi state */
 	list_node_t	ipnet_next;	/* list next member */
 	netstack_t	*ipnet_ns;	/* netstack of zone we were opened in */
 	ipnet_acceptfn_t *ipnet_acceptfn; /* accept callback function pointer */
+	hook_t		*ipnet_hook;	/* hook token to unregister */
+	void		*ipnet_data;	/* value to pass back to bpf_itap */
 } ipnet_t;

 /* ipnet_flags */
@@ -159,7 +203,12 @@
 	kcondvar_t	ips_walkers_cv;
 	uint_t		ips_walkers_cnt;
 	list_t		ips_str_list;
-	uint64_t	ips_drops;
+	kstat_t		*ips_kstatp;
+	ipnet_kstats_t	ips_stats;
+	bpf_attach_fn_t	ips_bpfattach_fn;
+	bpf_detach_fn_t	ips_bpfdetach_fn;
+	avl_tree_t	ips_avl_by_shared;
+	hook_t		*ips_hook;
 } ipnet_stack_t;

 /*
@@ -191,8 +240,22 @@
 }

 typedef void ipnet_walkfunc_t(const char *, void *, dev_t);
-extern void ipnet_walk_if(ipnet_walkfunc_t *, void *, zoneid_t);
-extern dev_t ipnet_if_getdev(char *, zoneid_t);
+
+extern int	ipnet_client_open(ipnetif_t *, ipnetif_t **);
+extern void	ipnet_client_close(ipnetif_t *);
+extern void	ipnet_close_byhandle(ipnetif_t *);
+extern int	ipnet_get_linkid_byname(const char *, datalink_id_t *,
+    zoneid_t);
+extern dev_t	ipnet_if_getdev(char *, zoneid_t);
+extern const char *ipnet_name(ipnetif_t *);
+extern int	ipnet_open_byname(const char *, ipnetif_t **, zoneid_t);
+extern int	ipnet_promisc_add(void *, uint_t, void *, uintptr_t *, int);
+extern void	ipnet_promisc_remove(void *);
+extern void	ipnet_set_bpfattach(bpf_attach_fn_t, bpf_detach_fn_t,
+    zoneid_t, bpf_itap_fn_t, bpf_provider_reg_fn_t);
+extern void	ipnet_walk_if(ipnet_walkfunc_t *, void *, zoneid_t);
+
+extern bpf_provider_t	bpf_ipnet;

 #ifdef __cplusplus
 }
--- a/usr/src/uts/common/inet/ipnet/ipnet.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/ipnet/ipnet.c	Thu Sep 24 07:28:12 2009 -0700
@@ -59,12 +59,17 @@
 #include <sys/list.h>
 #include <sys/ksynch.h>
 #include <sys/hook_event.h>
+#include <sys/sdt.h>
 #include <sys/stropts.h>
 #include <sys/sysmacros.h>
 #include <inet/ip.h>
+#include <inet/ip_if.h>
 #include <inet/ip_multi.h>
 #include <inet/ip6.h>
 #include <inet/ipnet.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+#include <net/dlt.h>

 static struct module_info ipnet_minfo = {
 	1,		/* mi_idnum */
@@ -116,6 +121,7 @@
 static const int 	IPNET_MINOR_MIN = 2; 	/* start of dynamic minors */
 static dl_info_ack_t	ipnet_infoack = IPNET_INFO_ACK_INIT;
 static ipnet_acceptfn_t	ipnet_accept, ipnet_loaccept;
+static bpf_itap_fn_t	ipnet_itap;

 static void	ipnet_input(mblk_t *);
 static int	ipnet_wput(queue_t *, mblk_t *);
@@ -137,16 +143,18 @@
 static void	ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
 static int	ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
 static void	ipnet_nicevent_task(void *);
-static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *);
-static void	ipnet_remove_if(ipnetif_t *, ipnet_stack_t *);
+static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *,
+    uint64_t);
+static void	ipnetif_remove(ipnetif_t *, ipnet_stack_t *);
 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
-static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *);
-static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *);
-static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
-static void	ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *);
+static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *);
+static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *);
+static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
+static void	ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *);
 static int	ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
-static int 	ipnet_if_compare_name(const void *, const void *);
-static int 	ipnet_if_compare_index(const void *, const void *);
+static int 	ipnetif_compare_name(const void *, const void *);
+static int 	ipnetif_compare_name_zone(const void *, const void *);
+static int 	ipnetif_compare_index(const void *, const void *);
 static void	ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
 static void	ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
 static void	ipnetif_refhold(ipnetif_t *);
@@ -156,6 +164,15 @@
 static void	ipnet_register_netihook(ipnet_stack_t *);
 static void	*ipnet_stack_init(netstackid_t, netstack_t *);
 static void	ipnet_stack_fini(netstackid_t, void *);
+static void	ipnet_dispatch(void *);
+static int	ipobs_bounce_func(hook_event_token_t, hook_data_t, void *);
+static void	ipnet_bpfattach(ipnetif_t *);
+static void	ipnet_bpfdetach(ipnetif_t *);
+static int	ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *);
+static void	ipnet_bpf_probe_shared(ipnet_stack_t *);
+static void	ipnet_bpf_release_shared(ipnet_stack_t *);
+static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t);
+static void	ipnetif_clone_release(ipnetif_t *);

 static struct qinit ipnet_rinit = {
 	NULL,		/* qi_putp */
@@ -194,6 +211,23 @@
 };

 /*
+ * This structure contains the template data (names and type) that is
+ * copied, in bulk, into the new kstats structure created by net_kstat_create.
+ * No actual statistical information is stored in this instance of the
+ * ipnet_kstats_t structure.
+ */
+static ipnet_kstats_t stats_template = {
+	{ "duplicationFail",	KSTAT_DATA_UINT64 },
+	{ "dispatchOk",		KSTAT_DATA_UINT64 },
+	{ "dispatchFail",	KSTAT_DATA_UINT64 },
+	{ "dispatchHeaderDrop",	KSTAT_DATA_UINT64 },
+	{ "dispatchDupDrop",	KSTAT_DATA_UINT64 },
+	{ "dispatchDeliver",	KSTAT_DATA_UINT64 },
+	{ "acceptOk",		KSTAT_DATA_UINT64 },
+	{ "acceptFail",		KSTAT_DATA_UINT64 }
+};
+
+/*
  * Walk the list of physical interfaces on the machine, for each
  * interface create a new ipnetif_t and add any addresses to it. We
  * need to do the walk twice, once for IPv4 and once for IPv6.
@@ -203,7 +237,7 @@
  * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
  */
 static int
-ipnet_if_init(void)
+ipnetif_init(void)
 {
 	netstack_handle_t	nh;
 	netstack_t		*ns;
@@ -229,8 +263,8 @@
 int
 _init(void)
 {
-	int ret;
-	boolean_t netstack_registered = B_FALSE;
+	int		ret;
+	boolean_t	netstack_registered = B_FALSE;

 	if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
 		return (ENODEV);
@@ -254,7 +288,7 @@
 	netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
 	netstack_registered = B_TRUE;

-	if ((ret = ipnet_if_init()) == 0)
+	if ((ret = ipnetif_init()) == 0)
 		ret = mod_install(&modlinkage);
 done:
 	if (ret != 0) {
@@ -272,7 +306,7 @@
 int
 _fini(void)
 {
-	int err;
+	int	err;

 	if ((err = mod_remove(&modlinkage)) != 0)
 		return (err);
@@ -327,6 +361,24 @@
 			    " in zone %d: %d", zoneid, ret);
 		}
 	}
+
+	/*
+	 * Create a local set of kstats for each zone.
+	 */
+	ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats",
+	    "misc", KSTAT_TYPE_NAMED,
+	    sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0);
+	if (ips->ips_kstatp != NULL) {
+		bcopy(&stats_template, &ips->ips_stats,
+		    sizeof (ips->ips_stats));
+		ips->ips_kstatp->ks_data = &ips->ips_stats;
+		ips->ips_kstatp->ks_private =
+		    (void *)(uintptr_t)ips->ips_netstack->netstack_stackid;
+		kstat_install(ips->ips_kstatp);
+	} else {
+		cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed",
+		    "ipnet", "ipnet_stats", "misc");
+	}
 }

 /*
@@ -338,13 +390,13 @@
 static int
 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
 {
-	phy_if_t		phyif;
-	lif_if_t		lif;
-	ipnetif_t		*ipnetif;
-	char			name[LIFNAMSIZ];
-	boolean_t		new_if = B_FALSE;
-	uint64_t		ifflags;
-	int			ret = 0;
+	phy_if_t	phyif;
+	lif_if_t	lif;
+	ipnetif_t	*ipnetif;
+	char		name[LIFNAMSIZ];
+	boolean_t	new_if = B_FALSE;
+	uint64_t	ifflags;
+	int		ret = 0;

 	/*
 	 * If ipnet_register_netihook() was unable to initialize this
@@ -368,8 +420,10 @@
 	    phyif = net_phygetnext(nd, phyif)) {
 		if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
 			continue;
-		if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) {
-			ipnetif = ipnet_create_if(name, phyif, ips);
+		ifflags =  0;
+		(void) net_getlifflags(nd, phyif, 0, &ifflags);
+		if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) {
+			ipnetif = ipnetif_create(name, phyif, ips, ifflags);
 			if (ipnetif == NULL) {
 				ret = ENOMEM;
 				goto done;
@@ -432,7 +486,7 @@
 static int
 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 {
-	int error = DDI_FAILURE;
+	int	error = DDI_FAILURE;

 	switch (infocmd) {
 	case DDI_INFO_DEVT2INSTANCE:
@@ -485,7 +539,6 @@
 	ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
 	ipnet->ipnet_zoneid = zoneid;
 	ipnet->ipnet_dlstate = DL_UNBOUND;
-	ipnet->ipnet_sap = 0;
 	ipnet->ipnet_ns = ns;

 	/*
@@ -499,9 +552,9 @@
 		ipnet->ipnet_acceptfn = ipnet_loaccept;
 	} else {
 		ipnet->ipnet_acceptfn = ipnet_accept;
-		ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips);
+		ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips);
 		if (ipnet->ipnet_if == NULL ||
-		    !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) {
+		    !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) {
 			err = ENODEV;
 			goto done;
 		}
@@ -519,7 +572,7 @@
 	 * unregister in close() for the last open client.
 	 */
 	if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
-		ipobs_register_hook(ns, ipnet_input);
+		ips->ips_hook = ipobs_register_hook(ns, ipnet_input);
 	mutex_exit(&ips->ips_walkers_lock);

 done:
@@ -555,10 +608,13 @@
 	if (ipnet->ipnet_if != NULL)
 		ipnetif_refrele(ipnet->ipnet_if);
 	id_free(ipnet_minor_space, ipnet->ipnet_minor);
-	kmem_free(ipnet, sizeof (*ipnet));

-	if (list_is_empty(&ips->ips_str_list))
-		ipobs_unregister_hook(ips->ips_netstack, ipnet_input);
+	if (list_is_empty(&ips->ips_str_list)) {
+		ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook);
+		ips->ips_hook = NULL;
+	}
+
+	kmem_free(ipnet, sizeof (*ipnet));

 	mutex_exit(&ips->ips_walkers_lock);
 	netstack_rele(ips->ips_netstack);
@@ -599,7 +655,7 @@
 static int
 ipnet_rsrv(queue_t *q)
 {
-	mblk_t *mp;
+	mblk_t	*mp;

 	while ((mp = getq(q)) != NULL) {
 		ASSERT(DB_TYPE(mp) == M_DATA);
@@ -616,7 +672,7 @@
 static void
 ipnet_ioctl(queue_t *q, mblk_t *mp)
 {
-	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
+	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;

 	switch (iocp->ioc_cmd) {
 	case DLIOCRAW:
@@ -639,7 +695,7 @@
 ipnet_iocdata(queue_t *q, mblk_t *mp)
 {
 	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
-	ipnet_t		*ipnet = q->q_ptr;
+	ipnet_t	*ipnet = q->q_ptr;

 	switch (iocp->ioc_cmd) {
 	case DLIOCIPNETINFO:
@@ -652,7 +708,7 @@
 		miocack(q, mp, 0, DL_IPNETINFO_VERSION);
 		break;
 	default:
-	iocnak:
+iocnak:
 		miocnak(q, mp, 0, EINVAL);
 		break;
 	}
@@ -717,23 +773,32 @@
 static void
 ipnet_bindreq(queue_t *q, mblk_t *mp)
 {
-	union   DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
-	int32_t sap;
-	ipnet_t	*ipnet = q->q_ptr;
+	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
+	ipnet_t			*ipnet = q->q_ptr;

 	if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
 		dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
 		return;
 	}

-	sap = dlp->bind_req.dl_sap;
-	if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) {
+	switch (dlp->bind_req.dl_sap) {
+	case 0 :
+		ipnet->ipnet_family = AF_UNSPEC;
+		break;
+	case IPV4_VERSION :
+		ipnet->ipnet_family = AF_INET;
+		break;
+	case IPV6_VERSION :
+		ipnet->ipnet_family = AF_INET6;
+		break;
+	default :
 		dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
-	} else {
-		ipnet->ipnet_sap = sap;
-		ipnet->ipnet_dlstate = DL_IDLE;
-		dlbindack(q, mp, sap, 0, 0, 0, 0);
+		return;
+		/*NOTREACHED*/
 	}
+
+	ipnet->ipnet_dlstate = DL_IDLE;
+	dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0);
 }

 static void
@@ -750,7 +815,7 @@
 		dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
 	} else {
 		ipnet->ipnet_dlstate = DL_UNBOUND;
-		ipnet->ipnet_sap = 0;
+		ipnet->ipnet_family = AF_UNSPEC;
 		dlokack(q, mp, DL_UNBIND_REQ);
 	}
 }
@@ -907,8 +972,14 @@
 	mutex_exit(&ips->ips_event_lock);
 }

+/*
+ * Allocate a new mblk_t and put a dl_ipnetinfo_t in it.
+ * The structure it copies the header information from,
+ * hook_pkt_observe_t, is constructed using network byte
+ * order in ipobs_hook(), so there is no conversion here.
+ */
 static mblk_t *
-ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp)
+ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp)
 {
 	mblk_t		*dlhdr;
 	dl_ipnetinfo_t	*dl;
@@ -919,10 +990,13 @@
 	}
 	dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
 	dl->dli_version = DL_IPNETINFO_VERSION;
-	dl->dli_len = htons(sizeof (*dl));
-	dl->dli_ipver = ihd->ihd_ipver;
-	dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc);
-	dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst);
+	dl->dli_family = hdr->hpo_family;
+	dl->dli_htype = hdr->hpo_htype;
+	dl->dli_pktlen = hdr->hpo_pktlen;
+	dl->dli_ifindex = hdr->hpo_ifindex;
+	dl->dli_grifindex = hdr->hpo_grifindex;
+	dl->dli_zsrc = hdr->hpo_zsrc;
+	dl->dli_zdst = hdr->hpo_zdst;
 	dlhdr->b_wptr += sizeof (*dl);
 	dlhdr->b_cont = mp;

@@ -989,16 +1063,17 @@
 }

 /*
- * Verify if the packet contained in ihd should be passed up to the
+ * Verify if the packet contained in hdr should be passed up to the
  * ipnet client stream.
  */
 static boolean_t
-ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
+ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
     ipnet_addrp_t *dst)
 {
 	boolean_t		obsif;
 	uint64_t		ifindex = ipnet->ipnet_if->if_index;
-	ipnet_addrtype_t	srctype, dsttype;
+	ipnet_addrtype_t	srctype;
+	ipnet_addrtype_t	dsttype;

 	srctype = ipnet_get_addrtype(ipnet, src);
 	dsttype = ipnet_get_addrtype(ipnet, dst);
@@ -1008,7 +1083,13 @@
 	 * matches ours, it's on the interface we're observing.  (Thus,
 	 * observing on the group ifindex matches all ifindexes in the group.)
 	 */
-	obsif = (ihd->ihd_ifindex == ifindex || ihd->ihd_grifindex == ifindex);
+	obsif = (ntohl(hdr->hpo_ifindex) == ifindex ||
+	    ntohl(hdr->hpo_grifindex) == ifindex);
+
+	DTRACE_PROBE5(ipnet_accept__addr,
+	    ipnet_addrtype_t, srctype, ipnet_addrp_t *, src,
+	    ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst,
+	    boolean_t, obsif);

 	/*
 	 * Do not allow an ipnet stream to see packets that are not from or to
@@ -1019,8 +1100,8 @@
 	 */
 	if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
 	    dsttype != IPNETADDR_MBCAST) {
-		if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
-		    ipnet->ipnet_zoneid != ihd->ihd_zdst)
+		if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
+		    ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
 			return (B_FALSE);
 	}

@@ -1029,7 +1110,7 @@
 	 * packet's IP version.
 	 */
 	if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
-	    ipnet->ipnet_sap != ihd->ihd_ipver)
+	    ipnet->ipnet_family != hdr->hpo_family)
 		return (B_FALSE);

 	/* If the destination address is ours, then accept the packet. */
@@ -1057,48 +1138,59 @@
 }

 /*
- * Verify if the packet contained in ihd should be passed up to the ipnet
+ * Verify if the packet contained in hdr should be passed up to the ipnet
  * client stream that's in IPNET_LOMODE.
  */
 /* ARGSUSED */
 static boolean_t
-ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src,
+ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
     ipnet_addrp_t *dst)
 {
-	if (ihd->ihd_htype != IPOBS_HOOK_LOCAL)
-		return (B_FALSE);
+	if (hdr->hpo_htype != IPOBS_HOOK_LOCAL) {
+		/*
+		 * ipnet_if is only NULL for IPNET_MINOR_LO devices.
+		 */
+		if (ipnet->ipnet_if == NULL)
+			return (B_FALSE);
+	}

 	/*
 	 * An ipnet stream must not see packets that are not from/to its zone.
 	 */
 	if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
-		if (ipnet->ipnet_zoneid != ihd->ihd_zsrc &&
-		    ipnet->ipnet_zoneid != ihd->ihd_zdst)
+		if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
+		    ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
 			return (B_FALSE);
 	}

-	return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver);
+	return (ipnet->ipnet_family == AF_UNSPEC ||
+	    ipnet->ipnet_family == hdr->hpo_family);
 }

 static void
 ipnet_dispatch(void *arg)
 {
 	mblk_t			*mp = arg;
-	ipobs_hook_data_t	*ihd = (ipobs_hook_data_t *)mp->b_rptr;
+	hook_pkt_observe_t	*hdr = (hook_pkt_observe_t *)mp->b_rptr;
 	ipnet_t			*ipnet;
 	mblk_t			*netmp;
 	list_t			*list;
-	ipnet_stack_t		*ips = ihd->ihd_stack->netstack_ipnet;
-	ipnet_addrp_t		src, dst;
+	ipnet_stack_t		*ips;
+	ipnet_addrp_t		src;
+	ipnet_addrp_t		dst;
+
+	ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;

-	if (ihd->ihd_ipver == IPV4_VERSION) {
-		src.iap_family = dst.iap_family = AF_INET;
-		src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src;
-		dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst;
+	netmp = hdr->hpo_pkt->b_cont;
+	src.iap_family = hdr->hpo_family;
+	dst.iap_family = hdr->hpo_family;
+
+	if (hdr->hpo_family == AF_INET) {
+		src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
+		dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
 	} else {
-		src.iap_family = dst.iap_family = AF_INET6;
-		src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src;
-		dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst;
+		src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
+		dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
 	}

 	ipnet_walkers_inc(ips);
@@ -1106,23 +1198,26 @@
 	list = &ips->ips_str_list;
 	for (ipnet = list_head(list); ipnet != NULL;
 	    ipnet = list_next(list, ipnet)) {
-		if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst))
+		if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
+			IPSK_BUMP(ips, ik_acceptFail);
 			continue;
+		}
+		IPSK_BUMP(ips, ik_acceptOk);

 		if (list_next(list, ipnet) == NULL) {
-			netmp = ihd->ihd_mp;
-			ihd->ihd_mp = NULL;
+			netmp = hdr->hpo_pkt->b_cont;
+			hdr->hpo_pkt->b_cont = NULL;
 		} else {
-			if ((netmp = dupmsg(ihd->ihd_mp)) == NULL &&
-			    (netmp = copymsg(ihd->ihd_mp)) == NULL) {
-				atomic_inc_64(&ips->ips_drops);
+			if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL &&
+			    (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) {
+				IPSK_BUMP(ips, ik_duplicationFail);
 				continue;
 			}
 		}

 		if (ipnet->ipnet_flags & IPNET_INFO) {
-			if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) {
-				atomic_inc_64(&ips->ips_drops);
+			if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) {
+				IPSK_BUMP(ips, ik_dispatchHeaderDrop);
 				continue;
 			}
 		}
@@ -1130,68 +1225,91 @@
 		if (ipnet->ipnet_rq->q_first == NULL &&
 		    canputnext(ipnet->ipnet_rq)) {
 			putnext(ipnet->ipnet_rq, netmp);
+			IPSK_BUMP(ips, ik_dispatchDeliver);
 		} else if (canput(ipnet->ipnet_rq)) {
 			(void) putq(ipnet->ipnet_rq, netmp);
+			IPSK_BUMP(ips, ik_dispatchDeliver);
 		} else {
 			freemsg(netmp);
-			atomic_inc_64(&ips->ips_drops);
+			IPSK_BUMP(ips, ik_dispatchPutDrop);
 		}
 	}

 	ipnet_walkers_dec(ips);

-	freemsg(ihd->ihd_mp);
 	freemsg(mp);
 }

 static void
 ipnet_input(mblk_t *mp)
 {
-	ipobs_hook_data_t  *ihd = (ipobs_hook_data_t *)mp->b_rptr;
+	hook_pkt_observe_t	*hdr = (hook_pkt_observe_t *)mp->b_rptr;
+	ipnet_stack_t		*ips;
+
+	ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;

 	if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
 	    DDI_SUCCESS) {
-		atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops);
-		freemsg(ihd->ihd_mp);
+		IPSK_BUMP(ips, ik_dispatchFail);
 		freemsg(mp);
+	} else {
+		IPSK_BUMP(ips, ik_dispatchOk);
 	}
 }

+static ipnetif_t *
+ipnet_alloc_if(ipnet_stack_t *ips)
+{
+	ipnetif_t	*ipnetif;
+
+	if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL)
+		return (NULL);
+
+	mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
+	list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
+	    offsetof(ipnetif_addr_t, ifa_link));
+	list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
+	    offsetof(ipnetif_addr_t, ifa_link));
+	mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
+
+	ipnetif->if_stackp = ips;
+
+	return (ipnetif);
+}
+
 /*
  * Create a new ipnetif_t and new minor node for it.  If creation is
  * successful the new ipnetif_t is inserted into an avl_tree
  * containing ipnetif's for this stack instance.
  */
 static ipnetif_t *
-ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips)
+ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips,
+    uint64_t ifflags)
 {
 	ipnetif_t	*ipnetif;
 	avl_index_t	where = 0;
 	minor_t		ifminor;

 	/*
-	 * Because ipnet_create_if() can be called from a NIC event
+	 * Because ipnetif_create() can be called from a NIC event
 	 * callback, it should not block.
 	 */
 	ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
 	if (ifminor == (minor_t)-1)
 		return (NULL);
-	if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) {
+	if ((ipnetif = ipnet_alloc_if(ips)) == NULL) {
 		id_free(ipnet_minor_space, ifminor);
 		return (NULL);
 	}

 	(void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
-	ipnetif->if_index = index;
+	ipnetif->if_index = (uint_t)index;
+	ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack);
+	ipnetif->if_dev = makedevice(ipnet_major, ifminor);

-	mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
-	list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
-	    offsetof(ipnetif_addr_t, ifa_link));
-	list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
-	    offsetof(ipnetif_addr_t, ifa_link));
-	ipnetif->if_dev = makedevice(ipnet_major, ifminor);
-	mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
 	ipnetif->if_refcnt = 1;
+	if ((ifflags & IFF_LOOPBACK) != 0)
+		ipnetif->if_flags = IPNETIF_LOOPBACK;

 	mutex_enter(&ips->ips_avl_lock);
 	VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
@@ -1199,12 +1317,17 @@
 	VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
 	avl_insert(&ips->ips_avl_by_name, ipnetif, where);
 	mutex_exit(&ips->ips_avl_lock);
+	/*
+	 * Now that the interface can be found by lookups back into ipnet,
+	 * allowing for sanity checking, call the BPF attach.
+	 */
+	ipnet_bpfattach(ipnetif);

 	return (ipnetif);
 }

 static void
-ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips)
+ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips)
 {
 	ipnet_t	*ipnet;

@@ -1220,25 +1343,34 @@
 	avl_remove(&ips->ips_avl_by_index, ipnetif);
 	avl_remove(&ips->ips_avl_by_name, ipnetif);
 	mutex_exit(&ips->ips_avl_lock);
-	/* Release the reference we implicitly held in ipnet_create_if(). */
+	/*
+	 * Now that the interface can't be found, do a BPF detach
+	 */
+	ipnet_bpfdetach(ipnetif);
+	/*
+	 * Release the reference we implicitly held in ipnetif_create().
+	 */
 	ipnetif_refrele(ipnetif);
 }

 static void
 ipnet_purge_addrlist(list_t *addrlist)
 {
-	ipnetif_addr_t *ifa;
+	ipnetif_addr_t	*ifa;

 	while ((ifa = list_head(addrlist)) != NULL) {
 		list_remove(addrlist, ifa);
+		if (ifa->ifa_shared != NULL)
+			ipnetif_clone_release(ifa->ifa_shared);
 		kmem_free(ifa, sizeof (*ifa));
 	}
 }

 static void
-ipnet_free_if(ipnetif_t *ipnetif)
+ipnetif_free(ipnetif_t *ipnetif)
 {
 	ASSERT(ipnetif->if_refcnt == 0);
+	ASSERT(ipnetif->if_sharecnt == 0);

 	/* Remove IPv4/v6 address lists from the ipnetif */
 	ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
@@ -1247,7 +1379,8 @@
 	list_destroy(&ipnetif->if_ip6addr_list);
 	mutex_destroy(&ipnetif->if_addr_lock);
 	mutex_destroy(&ipnetif->if_reflock);
-	id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
+	if (ipnetif->if_dev != 0)
+		id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
 	kmem_free(ipnetif, sizeof (*ipnetif));
 }

@@ -1270,11 +1403,12 @@
 	if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
 	    net_getlifzone(nd, phyif, lif, &zoneid) != 0)
 		return;
+
 	if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
 		return;
-
 	ifaddr->ifa_zone = zoneid;
 	ifaddr->ifa_id = lif;
+	ifaddr->ifa_shared = NULL;

 	switch (addr.ss_family) {
 	case AF_INET:
@@ -1295,6 +1429,12 @@
 	}

 	mutex_enter(&ipnetif->if_addr_lock);
+	if (zoneid != ipnetif->if_zoneid) {
+		ipnetif_t *ifp2;
+
+		ifp2 = ipnetif_clone_create(ipnetif, zoneid);
+		ifaddr->ifa_shared = ifp2;
+	}
 	list_insert_tail(addr.ss_family == AF_INET ?
 	    &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
 	mutex_exit(&ipnetif->if_addr_lock);
@@ -1304,6 +1444,9 @@
 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
 {
 	mutex_enter(&ipnetif->if_addr_lock);
+	if (ifaddr->ifa_shared != NULL)
+		ipnetif_clone_release(ifaddr->ifa_shared);
+
 	list_remove(isv6 ?
 	    &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
 	mutex_exit(&ipnetif->if_addr_lock);
@@ -1311,14 +1454,22 @@
 }

 static void
-ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips,
-    boolean_t isv6)
+ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6)
 {
 	ipnetif_t	*ipnetif;
 	boolean_t	refrele_needed = B_TRUE;
+	uint64_t	ifflags;
+	uint64_t	ifindex;
+	char		*ifname;

-	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) {
-		ipnetif = ipnet_create_if(ifname, ifindex, ips);
+	ifflags = 0;
+	ifname = ipne->ipne_ifname;
+	ifindex = ipne->ipne_ifindex;
+
+	(void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags);
+
+	if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) {
+		ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags);
 		refrele_needed = B_FALSE;
 	}
 	if (ipnetif != NULL) {
@@ -1343,7 +1494,7 @@
 {
 	ipnetif_t	*ipnetif;

-	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+	if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
 		return;

 	mutex_enter(&ipnetif->if_addr_lock);
@@ -1358,7 +1509,7 @@
 	 */
 	ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
 	if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
-		ipnet_remove_if(ipnetif, ips);
+		ipnetif_remove(ipnetif, ips);
 	ipnetif_refrele(ipnetif);
 }

@@ -1369,7 +1520,7 @@
 	ipnetif_t	*ipnetif;
 	ipnetif_addr_t	*ifaddr;

-	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+	if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
 		return;
 	if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
 		/*
@@ -1390,7 +1541,7 @@
 	ipnetif_t	*ipnetif;
 	ipnetif_addr_t	*ifaddr;

-	if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL)
+	if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
 		return;
 	if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
 		ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
@@ -1399,7 +1550,7 @@
 	 * Make sure that open streams on this ipnetif are still allowed to
 	 * have it open.
 	 */
-	ipnet_if_zonecheck(ipnetif, ips);
+	ipnetif_zonecheck(ipnetif, ips);
 }

 /*
@@ -1446,8 +1597,7 @@
 	mutex_enter(&ips->ips_event_lock);
 	switch (ipne->ipne_event) {
 	case NE_PLUMB:
-		ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips,
-		    isv6);
+		ipnet_plumb_ev(ipne, ips, isv6);
 		break;
 	case NE_UNPLUMB:
 		ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
@@ -1486,7 +1636,7 @@
 	ips = ns->netstack_ipnet;
 	mutex_enter(&ips->ips_avl_lock);
 	if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
-		if (ipnet_if_in_zone(ipnetif, zoneid, ips))
+		if (ipnetif_in_zone(ipnetif, zoneid, ips))
 			dev = ipnetif->if_dev;
 	}
 	mutex_exit(&ips->ips_avl_lock);
@@ -1496,7 +1646,7 @@
 }

 static ipnetif_t *
-ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips)
+ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips)
 {
 	ipnetif_t	*ipnetif;

@@ -1508,7 +1658,7 @@
 }

 static ipnetif_t *
-ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips)
+ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips)
 {
 	ipnetif_t	*ipnetif;
 	avl_tree_t	*tree;
@@ -1530,7 +1680,7 @@
 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
 {
 	ipnetif_addr_t	*ifaddr;
-	list_t		*list;
+	list_t	*list;

 	mutex_enter(&ipnetif->if_addr_lock);
 	list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
@@ -1552,10 +1702,12 @@
 	ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
 	ips->ips_netstack = ns;
 	mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
-	avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index,
+	avl_create(&ips->ips_avl_by_index, ipnetif_compare_index,
 	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
-	avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name,
+	avl_create(&ips->ips_avl_by_name, ipnetif_compare_name,
 	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
+	avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone,
+	    sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared));
 	mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
 	list_create(&ips->ips_str_list, sizeof (ipnet_t),
@@ -1571,6 +1723,12 @@
 	ipnet_stack_t	*ips = arg;
 	ipnetif_t	*ipnetif, *nipnetif;

+	if (ips->ips_kstatp != NULL) {
+		zoneid_t zoneid;
+
+		zoneid = netstackid_to_zoneid(stackid);
+		net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp);
+	}
 	if (ips->ips_ndv4 != NULL) {
 		VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
 		    ips->ips_nicevents) == 0);
@@ -1586,8 +1744,9 @@
 	for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
 	    ipnetif = nipnetif) {
 		nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
-		ipnet_remove_if(ipnetif, ips);
+		ipnetif_remove(ipnetif, ips);
 	}
+	avl_destroy(&ips->ips_avl_by_shared);
 	avl_destroy(&ips->ips_avl_by_index);
 	avl_destroy(&ips->ips_avl_by_name);
 	mutex_destroy(&ips->ips_avl_lock);
@@ -1601,7 +1760,7 @@
 static boolean_t
 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
 {
-	ipnetif_addr_t *ifa;
+	ipnetif_addr_t	*ifa;

 	for (ifa = list_head(addrlist); ifa != NULL;
 	    ifa = list_next(addrlist, ifa)) {
@@ -1613,9 +1772,9 @@

 /* Should the supplied ipnetif be visible from the supplied zoneid? */
 static boolean_t
-ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
+ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
 {
-	int ret;
+	int	ret;

 	/*
 	 * The global zone has visibility into all interfaces in the global
@@ -1645,7 +1804,7 @@
  * case, send the ipnet_t an M_HANGUP.
  */
 static void
-ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
+ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
 {
 	list_t	*strlist = &ips->ips_str_list;
 	ipnet_t	*ipnet;
@@ -1655,7 +1814,7 @@
 	    ipnet = list_next(strlist, ipnet)) {
 		if (ipnet->ipnet_if != ipnetif)
 			continue;
-		if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
+		if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
 			(void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
 	}
 	ipnet_walkers_dec(ips);
@@ -1664,7 +1823,7 @@
 void
 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
 {
-	ipnetif_t 		*ipnetif;
+	ipnetif_t		*ipnetif;
 	list_t			cbdata;
 	ipnetif_cbdata_t	*cbnode;
 	netstack_t		*ns;
@@ -1687,7 +1846,7 @@
 	mutex_enter(&ips->ips_avl_lock);
 	for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
 	    ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
-		if (!ipnet_if_in_zone(ipnetif, zoneid, ips))
+		if (!ipnetif_in_zone(ipnetif, zoneid, ips))
 			continue;
 		cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
 		(void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
@@ -1706,23 +1865,38 @@
 }

 static int
-ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp)
+ipnetif_compare_index(const void *index_ptr, const void *ipnetifp)
 {
-	int64_t index1 = *((int64_t *)index_ptr);
-	int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
+	int64_t	index1 = *((int64_t *)index_ptr);
+	int64_t	index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;

 	return (SIGNOF(index2 - index1));
 }

 static int
-ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp)
+ipnetif_compare_name(const void *name_ptr, const void *ipnetifp)
 {
-	int res;
+	int	res;

 	res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
 	return (SIGNOF(res));
 }

+static int
+ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp)
+{
+	const uintptr_t	*ptr = key_ptr;
+	const ipnetif_t	*ifp;
+	int		res;
+
+	ifp = ipnetifp;
+	res = ifp->if_zoneid - ptr[0];
+	if (res != 0)
+		return (SIGNOF(res));
+	res = strcmp(ifp->if_name, (char *)ptr[1]);
+	return (SIGNOF(res));
+}
+
 static void
 ipnetif_refhold(ipnetif_t *ipnetif)
 {
@@ -1735,9 +1909,9 @@
 ipnetif_refrele(ipnetif_t *ipnetif)
 {
 	mutex_enter(&ipnetif->if_reflock);
-	ASSERT(ipnetif->if_refcnt != 0);
+	ASSERT(ipnetif->if_refcnt > 0);
 	if (--ipnetif->if_refcnt == 0)
-		ipnet_free_if(ipnetif);
+		ipnetif_free(ipnetif);
 	else
 		mutex_exit(&ipnetif->if_reflock);
 }
@@ -1759,3 +1933,585 @@
 		cv_broadcast(&ips->ips_walkers_cv);
 	mutex_exit(&ips->ips_walkers_lock);
 }
+
+/*ARGSUSED*/
+static int
+ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg)
+{
+	hook_pkt_observe_t	*hdr;
+	pfv_t			func = (pfv_t)arg;
+	mblk_t			*mp;
+
+	hdr = (hook_pkt_observe_t *)info;
+	mp = dupmsg(hdr->hpo_pkt);
+	if (mp == NULL) {
+		mp = copymsg(hdr->hpo_pkt);
+		if (mp == NULL)  {
+			netstack_t *ns = hdr->hpo_ctx;
+			ipnet_stack_t *ips = ns->netstack_ipnet;
+
+			IPSK_BUMP(ips, ik_dispatchDupDrop);
+			return (0);
+		}
+	}
+
+	hdr = (hook_pkt_observe_t *)mp->b_rptr;
+	hdr->hpo_pkt = mp;
+
+	func(mp);
+
+	return (0);
+}
+
+hook_t *
+ipobs_register_hook(netstack_t *ns, pfv_t func)
+{
+	ip_stack_t	*ipst = ns->netstack_ip;
+	char		name[32];
+	hook_t		*hook;
+
+	HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func);
+	VERIFY(hook != NULL);
+
+	/*
+	 * To register multiple hooks with he same callback function,
+	 * a unique name is needed.
+	 */
+	(void) snprintf(name, sizeof (name), "ipobserve_%p", hook);
+	hook->h_name = strdup(name);
+
+	(void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
+	(void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
+
+	return (hook);
+}
+
+void
+ipobs_unregister_hook(netstack_t *ns, hook_t *hook)
+{
+	ip_stack_t	*ipst = ns->netstack_ip;
+
+	(void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
+
+	(void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
+
+	strfree(hook->h_name);
+
+	hook_free(hook);
+}
+
+/* ******************************************************************** */
+/* BPF Functions below							*/
+/* ******************************************************************** */
+
+/*
+ * Convenience function to make mapping a zoneid to an ipnet_stack_t easy.
+ */
+static ipnet_stack_t *
+ipnet_find_by_zoneid(zoneid_t zoneid)
+{
+	netstack_t	*ns;
+
+	VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL);
+	return (ns->netstack_ipnet);
+}
+
+/*
+ * Rather than weave the complexity of what needs to be done for a BPF
+ * device attach or detach into the code paths of where they're used,
+ * it is presented here in a couple of simple functions, along with
+ * other similar code.
+ *
+ * The refrele/refhold here provide the means by which it is known
+ * when the clone structures can be free'd.
+ */
+static void
+ipnet_bpfdetach(ipnetif_t *ifp)
+{
+	if (ifp->if_stackp->ips_bpfdetach_fn != NULL) {
+		ifp->if_stackp->ips_bpfdetach_fn((uintptr_t)ifp);
+		ipnetif_refrele(ifp);
+	}
+}
+
+static void
+ipnet_bpfattach(ipnetif_t *ifp)
+{
+	if (ifp->if_stackp->ips_bpfattach_fn != NULL) {
+		ipnetif_refhold(ifp);
+		ifp->if_stackp->ips_bpfattach_fn((uintptr_t)ifp, DL_IPNET,
+		    ifp->if_zoneid, BPR_IPNET);
+	}
+}
+
+/*
+ * Set the functions to call back to when adding or removing an interface so
+ * that BPF can keep its internal list of these up to date.
+ */
+void
+ipnet_set_bpfattach(bpf_attach_fn_t attach, bpf_detach_fn_t detach,
+    zoneid_t zoneid, bpf_itap_fn_t tapfunc, bpf_provider_reg_fn_t provider)
+{
+	ipnet_stack_t	*ips;
+	ipnetif_t	*ipnetif;
+	avl_tree_t	*tree;
+	ipnetif_t	*next;
+
+	if (zoneid == GLOBAL_ZONEID) {
+		ipnet_itap = tapfunc;
+	}
+
+	VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
+
+	/*
+	 * If we're setting a new attach function, call it for every
+	 * mac that has already been attached.
+	 */
+	if (attach != NULL && ips->ips_bpfattach_fn == NULL) {
+		ASSERT(detach != NULL);
+		if (provider != NULL) {
+			(void) provider(&bpf_ipnet);
+		}
+		/*
+		 * The call to ipnet_bpfattach() calls into bpf`bpfattach
+		 * which then wants to resolve the link name into a link id.
+		 * For ipnet, this results in a call back to
+		 * ipnet_get_linkid_byname which also needs to lock and walk
+		 * the AVL tree. Thus the call to ipnet_bpfattach needs to
+		 * be made without the avl_lock held.
+		 */
+		mutex_enter(&ips->ips_event_lock);
+		ips->ips_bpfattach_fn = attach;
+		ips->ips_bpfdetach_fn = detach;
+		mutex_enter(&ips->ips_avl_lock);
+		tree = &ips->ips_avl_by_index;
+		for (ipnetif = avl_first(tree); ipnetif != NULL;
+		    ipnetif = next) {
+			ipnetif_refhold(ipnetif);
+			mutex_exit(&ips->ips_avl_lock);
+			ipnet_bpfattach(ipnetif);
+			mutex_enter(&ips->ips_avl_lock);
+			next = avl_walk(tree, ipnetif, AVL_AFTER);
+			ipnetif_refrele(ipnetif);
+		}
+		mutex_exit(&ips->ips_avl_lock);
+		ipnet_bpf_probe_shared(ips);
+		mutex_exit(&ips->ips_event_lock);
+
+	} else if (attach == NULL && ips->ips_bpfattach_fn != NULL) {
+		ASSERT(ips->ips_bpfdetach_fn != NULL);
+		mutex_enter(&ips->ips_event_lock);
+		ips->ips_bpfattach_fn = NULL;
+		mutex_enter(&ips->ips_avl_lock);
+		tree = &ips->ips_avl_by_index;
+		for (ipnetif = avl_first(tree); ipnetif != NULL;
+		    ipnetif = next) {
+			ipnetif_refhold(ipnetif);
+			mutex_exit(&ips->ips_avl_lock);
+			ipnet_bpfdetach((ipnetif_t *)ipnetif);
+			mutex_enter(&ips->ips_avl_lock);
+			next = avl_walk(tree, ipnetif, AVL_AFTER);
+			ipnetif_refrele(ipnetif);
+		}
+		mutex_exit(&ips->ips_avl_lock);
+		ipnet_bpf_release_shared(ips);
+		ips->ips_bpfdetach_fn = NULL;
+		mutex_exit(&ips->ips_event_lock);
+
+		if (provider != NULL) {
+			(void) provider(&bpf_ipnet);
+		}
+	}
+}
+
+/*
+ * The list of interfaces available via ipnet is private for each zone,
+ * so the AVL tree of each zone must be searched for a given name, even
+ * if all names are unique.
+ */
+int
+ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid)
+{
+	ipnet_stack_t	*ips;
+	ipnetif_t	*ipnetif;
+
+	ASSERT(ptr != NULL);
+	VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
+
+	mutex_enter(&ips->ips_avl_lock);
+	ipnetif = avl_find(&ips->ips_avl_by_name, (char *)name, NULL);
+	if (ipnetif != NULL) {
+		ipnetif_refhold(ipnetif);
+	}
+	mutex_exit(&ips->ips_avl_lock);
+
+	*ptr = ipnetif;
+
+	if (ipnetif == NULL)
+		return (ESRCH);
+	return (0);
+}
+
+void
+ipnet_close_byhandle(ipnetif_t *ifp)
+{
+	ASSERT(ifp != NULL);
+	ipnetif_refrele(ifp);
+}
+
+const char *
+ipnet_name(ipnetif_t *ifp)
+{
+	ASSERT(ifp != NULL);
+	return (ifp->if_name);
+}
+
+/*
+ * To find the linkid for a given name, it is necessary to know which zone
+ * the interface name belongs to and to search the avl tree for that zone
+ * as there is no master list of all interfaces and which zone they belong
+ * to. It is assumed that the caller of this function is somehow already
+ * working with the ipnet interfaces and hence the ips_event_lock is held.
+ * When BPF calls into this function, it is doing so because of an event
+ * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id
+ * value returned has meaning without the need for grabbing a hold on the
+ * owning structure.
+ */
+int
+ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid)
+{
+	ipnet_stack_t	*ips;
+	ipnetif_t	*ifp;
+
+	VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
+	ASSERT(mutex_owned(&ips->ips_event_lock));
+
+	mutex_enter(&ips->ips_avl_lock);
+	ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
+	if (ifp != NULL)
+		*idp = (uint_t)ifp->if_index;
+
+	/*
+	 * Shared instance zone?
+	 */
+	if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
+		uintptr_t key[2] = { zoneid, (uintptr_t)name };
+
+		ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
+		if (ifp != NULL)
+			*idp = (uint_t)ifp->if_index;
+	}
+
+	mutex_exit(&ips->ips_avl_lock);
+
+	if (ifp == NULL)
+		return (ESRCH);
+	return (0);
+}
+
+/*
+ * Strictly speaking, there is no such thing as a "client" in ipnet, like
+ * there is in mac. BPF only needs to have this because it is required as
+ * part of interfacing correctly with mac. The reuse of the original
+ * ipnetif_t as a client poses no danger, so long as it is done with its
+ * own ref-count'd hold that is given up on close.
+ */
+int
+ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result)
+{
+	ASSERT(ptr != NULL);
+	ASSERT(result != NULL);
+	ipnetif_refhold(ptr);
+	*result = ptr;
+
+	return (0);
+}
+
+void
+ipnet_client_close(ipnetif_t *ptr)
+{
+	ASSERT(ptr != NULL);
+	ipnetif_refrele(ptr);
+}
+
+/*
+ * This is called from BPF when it needs to start receiving packets
+ * from ipnet.
+ *
+ * The use of the ipnet_t structure here is somewhat lightweight when
+ * compared to how it is used elsewhere but it already has all of the
+ * right fields in it, so reuse here doesn't seem out of order. Its
+ * primary purpose here is to provide the means to store pointers for
+ * use when ipnet_promisc_remove() needs to be called.
+ *
+ * This should never be called for the IPNET_MINOR_LO device as it is
+ * never created via ipnetif_create.
+ */
+/*ARGSUSED*/
+int
+ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle,
+    int flags)
+{
+	ip_stack_t	*ipst;
+	netstack_t	*ns;
+	ipnetif_t	*ifp;
+	ipnet_t		*ipnet;
+	char		name[32];
+	int		error;
+
+	ifp = (ipnetif_t *)handle;
+	ns = netstack_find_by_zoneid(ifp->if_zoneid);
+
+	if ((how == DL_PROMISC_PHYS) || (how == DL_PROMISC_MULTI)) {
+		error = ipnet_join_allmulti(ifp, ns->netstack_ipnet);
+		if (error != 0)
+			return (error);
+	} else {
+		return (EINVAL);
+	}
+
+	ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP);
+	ipnet->ipnet_if = ifp;
+	ipnet->ipnet_ns = ns;
+	ipnet->ipnet_flags = flags;
+
+	if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) {
+		ipnet->ipnet_acceptfn = ipnet_loaccept;
+	} else {
+		ipnet->ipnet_acceptfn = ipnet_accept;
+	}
+
+	/*
+	 * To register multiple hooks with the same callback function,
+	 * a unique name is needed.
+	 */
+	HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet);
+	(void) snprintf(name, sizeof (name), "ipnet_promisc_%p",
+	    ipnet->ipnet_hook);
+	ipnet->ipnet_hook->h_name = strdup(name);
+	ipnet->ipnet_data = data;
+	ipnet->ipnet_zoneid = ifp->if_zoneid;
+
+	ipst = ns->netstack_ip;
+
+	error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE,
+	    ipnet->ipnet_hook);
+	if (error != 0)
+		goto regfail;
+
+	error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE,
+	    ipnet->ipnet_hook);
+	if (error != 0) {
+		(void) net_hook_unregister(ipst->ips_ip4_observe_pr,
+		    NH_OBSERVE, ipnet->ipnet_hook);
+		goto regfail;
+	}
+
+	*mhandle = (uintptr_t)ipnet;
+
+	return (0);
+
+regfail:
+	cmn_err(CE_WARN, "net_hook_register failed: %d", error);
+	strfree(ipnet->ipnet_hook->h_name);
+	hook_free(ipnet->ipnet_hook);
+	return (error);
+}
+
+void
+ipnet_promisc_remove(void *data)
+{
+	ip_stack_t	*ipst;
+	ipnet_t		*ipnet;
+	hook_t		*hook;
+
+	ipnet = data;
+	ipst = ipnet->ipnet_ns->netstack_ip;
+	hook = ipnet->ipnet_hook;
+
+	VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE,
+	    hook) == 0);
+
+	VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE,
+	    hook) == 0);
+
+	strfree(hook->h_name);
+
+	hook_free(hook);
+
+	kmem_free(ipnet, sizeof (*ipnet));
+}
+
+/*
+ * arg here comes from the ipnet_t allocated in ipnet_promisc_add.
+ * An important field from that structure is "ipnet_data" that
+ * contains the "data" pointer passed into ipnet_promisc_add: it needs
+ * to be passed back to bpf when we call into ipnet_itap.
+ *
+ * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called
+ * from BPF.
+ */
+/*ARGSUSED*/
+static int
+ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg)
+{
+	hook_pkt_observe_t	*hdr;
+	ipnet_addrp_t		src;
+	ipnet_addrp_t		dst;
+	ipnet_stack_t		*ips;
+	ipnet_t			*ipnet;
+	mblk_t			*netmp;
+	mblk_t			*mp;
+
+	hdr = (hook_pkt_observe_t *)info;
+	mp = hdr->hpo_pkt;
+	ipnet = (ipnet_t *)arg;
+	ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
+
+	netmp = hdr->hpo_pkt->b_cont;
+	src.iap_family = hdr->hpo_family;
+	dst.iap_family = hdr->hpo_family;
+
+	if (hdr->hpo_family == AF_INET) {
+		src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
+		dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
+	} else {
+		src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
+		dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
+	}
+
+	if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
+		IPSK_BUMP(ips, ik_acceptFail);
+		return (0);
+	}
+	IPSK_BUMP(ips, ik_acceptOk);
+
+	ipnet_itap(ipnet->ipnet_data, mp,
+	    hdr->hpo_htype == IPOBS_HOOK_OUTBOUND,
+	    ntohs(hdr->hpo_pktlen) + (mp->b_wptr - mp->b_rptr));
+
+	return (0);
+}
+
+/*
+ * clone'd ipnetif_t's are created when a shared IP instance zone comes
+ * to life and configures an IP address. The model that BPF uses is that
+ * each interface must have a unique pointer and each interface must be
+ * representative of what it can capture. They are limited to one DLT
+ * per interface and one zone per interface. Thus every interface that
+ * can be seen in a zone must be announced via an attach to bpf. For
+ * shared instance zones, this means the ipnet driver needs to detect
+ * when an address is added to an interface in a zone for the first
+ * time (and also when the last address is removed.)
+ */
+static ipnetif_t *
+ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid)
+{
+	uintptr_t	key[2] = { zoneid, (uintptr_t)ifp->if_name };
+	ipnet_stack_t	*ips = ifp->if_stackp;
+	avl_index_t	where = 0;
+	ipnetif_t	*newif;
+
+	mutex_enter(&ips->ips_avl_lock);
+	newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where);
+	if (newif != NULL) {
+		ipnetif_refhold(newif);
+		newif->if_sharecnt++;
+		mutex_exit(&ips->ips_avl_lock);
+		return (newif);
+	}
+
+	newif = ipnet_alloc_if(ips);
+	if (newif == NULL) {
+		mutex_exit(&ips->ips_avl_lock);
+		return (NULL);
+	}
+
+	newif->if_refcnt = 1;
+	newif->if_sharecnt = 1;
+	newif->if_zoneid = zoneid;
+	(void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ);
+	newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK;
+	newif->if_index = ifp->if_index;
+
+	avl_insert(&ips->ips_avl_by_shared, newif, where);
+	mutex_exit(&ips->ips_avl_lock);
+
+	ipnet_bpfattach(newif);
+
+	return (newif);
+}
+
+static void
+ipnetif_clone_release(ipnetif_t *ipnetif)
+{
+	boolean_t	dofree = B_FALSE;
+	boolean_t	doremove = B_FALSE;
+	ipnet_stack_t	*ips = ipnetif->if_stackp;
+
+	mutex_enter(&ipnetif->if_reflock);
+	ASSERT(ipnetif->if_refcnt > 0);
+	if (--ipnetif->if_refcnt == 0)
+		dofree = B_TRUE;
+	ASSERT(ipnetif->if_sharecnt > 0);
+	if (--ipnetif->if_sharecnt == 0)
+		doremove = B_TRUE;
+	mutex_exit(&ipnetif->if_reflock);
+	if (doremove) {
+		mutex_enter(&ips->ips_avl_lock);
+		avl_remove(&ips->ips_avl_by_shared, ipnetif);
+		mutex_exit(&ips->ips_avl_lock);
+		ipnet_bpfdetach(ipnetif);
+	}
+	if (dofree) {
+		ASSERT(ipnetif->if_sharecnt == 0);
+		ipnetif_free(ipnetif);
+	}
+}
+
+/*
+ * Called when BPF loads, the goal is to tell BPF about all of the interfaces
+ * in use by zones that have a shared IP stack. These interfaces are stored
+ * in the ips_avl_by_shared tree. Note that if there are 1000 bge0's in use
+ * as bge0:1 through to bge0:1000, then this would be represented by a single
+ * bge0 on that AVL tree.
+ */
+static void
+ipnet_bpf_probe_shared(ipnet_stack_t *ips)
+{
+	ipnetif_t	*next;
+	ipnetif_t	*ifp;
+
+	mutex_enter(&ips->ips_avl_lock);
+
+	for (ifp = avl_first(&ips->ips_avl_by_shared); ifp != NULL;
+	    ifp = next) {
+		ipnetif_refhold(ifp);
+		mutex_exit(&ips->ips_avl_lock);
+		ipnet_bpfattach(ifp);
+		mutex_enter(&ips->ips_avl_lock);
+		next = avl_walk(&ips->ips_avl_by_shared, ifp, AVL_AFTER);
+		ipnetif_refrele(ifp);
+	}
+	mutex_exit(&ips->ips_avl_lock);
+}
+
+static void
+ipnet_bpf_release_shared(ipnet_stack_t *ips)
+{
+	ipnetif_t	*next;
+	ipnetif_t	*ifp;
+
+	mutex_enter(&ips->ips_avl_lock);
+
+	for (ifp = avl_first(&ips->ips_avl_by_shared); ifp != NULL;
+	    ifp = next) {
+		ipnetif_refhold(ifp);
+		mutex_exit(&ips->ips_avl_lock);
+		ipnet_bpfdetach(ifp);
+		mutex_enter(&ips->ips_avl_lock);
+		next = avl_walk(&ips->ips_avl_by_shared, ifp, AVL_AFTER);
+		ipnetif_refrele(ifp);
+	}
+	mutex_exit(&ips->ips_avl_lock);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/ipnet/ipnet_bpf.c	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,193 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/stream.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+#include <inet/ipnet.h>
+
+/*
+ * This file implements the function calls for ipnet that translate the
+ * calls from BPF into the correct arguments and functions inside of the
+ * ipnet device.
+ */
+static const char *ipnet_bpf_name(uintptr_t);
+static void ipnet_bpf_client_close(uintptr_t);
+static const char *ipnet_bpf_client_name(uintptr_t);
+static int ipnet_bpf_client_open(uintptr_t, uintptr_t *);
+static void ipnet_bpf_close(uintptr_t);
+static int ipnet_bpf_getlinkid(const char *, datalink_id_t *, zoneid_t);
+static int ipnet_bpf_open(const char *, uintptr_t *, zoneid_t);
+static uintptr_t ipnet_bpf_promisc_add(uintptr_t, int, void *,
+    uintptr_t *, int);
+static void ipnet_bpf_promisc_remove(uintptr_t);
+static void ipnet_bpf_sdu_get(uintptr_t, uint_t *);
+static int ipnet_bpf_tx(uintptr_t, mblk_t *);
+static int ipnet_bpf_type(uintptr_t);
+
+bpf_provider_t bpf_ipnet = {
+	BPR_IPNET,
+	ipnet_bpf_open,
+	ipnet_bpf_close,
+	ipnet_bpf_name,
+	ipnet_bpf_type,
+	ipnet_bpf_sdu_get,
+	ipnet_bpf_tx,
+	ipnet_bpf_promisc_add,
+	ipnet_bpf_promisc_remove,
+	ipnet_bpf_getlinkid,
+	ipnet_bpf_client_close,
+	ipnet_bpf_client_name,
+	ipnet_bpf_client_open
+};
+
+/*ARGSUSED*/
+static int
+ipnet_bpf_open(const char *name, uintptr_t *mhandlep, zoneid_t zoneid)
+{
+	return (ipnet_open_byname(name, (ipnetif_t **)mhandlep, zoneid));
+}
+
+/*ARGSUSED*/
+static void
+ipnet_bpf_close(uintptr_t mhandle)
+{
+	ipnet_close_byhandle((ipnetif_t *)mhandle);
+}
+
+static const char *
+ipnet_bpf_name(uintptr_t mhandle)
+{
+	return (ipnet_name((ipnetif_t *)mhandle));
+}
+
+/*ARGSUSED*/
+static int
+ipnet_bpf_type(uintptr_t mhandle)
+{
+	return (DL_IPNET);
+}
+
+/*ARGSUSED*/
+static void
+ipnet_bpf_sdu_get(uintptr_t mhandle, uint_t *mtup)
+{
+	/*
+	 * The choice of 65535 is arbitrary, it could be any smaller number
+	 * but it does matche the current default choice of libpcap as the
+	 * packet snap size.
+	 */
+	*mtup = 65535;
+}
+
+/*ARGSUSED*/
+static int
+ipnet_bpf_tx(uintptr_t chandle, mblk_t *pkt)
+{
+	/*
+	 * It is not clear what it would mean to send an ipnet packet,
+	 * especially since the ipnet device has been implemented to be
+	 * an observation (read-only) instrument. Thus a call to send a
+	 * packet using ipnet results in the packet being free'd and an
+	 * error returned.
+	 */
+	freemsg(pkt);
+
+	return (EBADF);
+}
+
+/*
+ * BPF does not provide the means to select which SAP is being sniffed,
+ * so for the purpose of ipnet, all BPF clients are in SAP promiscuous
+ * mode.
+ */
+static uintptr_t
+ipnet_bpf_promisc_add(uintptr_t chandle, int how, void *arg,
+    uintptr_t *promisc, int flags)
+{
+	int	newhow;
+
+	/*
+	 * Map the mac values into ipnet values.
+	 */
+	switch (how) {
+	case MAC_CLIENT_PROMISC_ALL :
+		newhow = DL_PROMISC_PHYS;
+		flags = IPNET_PROMISC_PHYS|IPNET_PROMISC_SAP;
+		break;
+	case MAC_CLIENT_PROMISC_MULTI :
+		newhow = DL_PROMISC_MULTI;
+		flags = IPNET_PROMISC_MULTI|IPNET_PROMISC_SAP;
+		break;
+	default :
+		newhow = 0;
+		break;
+	}
+
+	return (ipnet_promisc_add((void *)chandle, newhow,
+	    arg, promisc, flags));
+}
+
+static void
+ipnet_bpf_promisc_remove(uintptr_t phandle)
+{
+	ipnet_promisc_remove((void *)phandle);
+}
+
+static int
+ipnet_bpf_client_open(uintptr_t mhandle, uintptr_t *chandlep)
+{
+
+	return (ipnet_client_open((ipnetif_t *)mhandle,
+	    (ipnetif_t **)chandlep));
+}
+
+/*ARGSUSED*/
+static void
+ipnet_bpf_client_close(uintptr_t chandle)
+{
+	ipnet_client_close((ipnetif_t *)chandle);
+}
+
+static const char *
+ipnet_bpf_client_name(uintptr_t chandle)
+{
+	return (ipnet_bpf_name(chandle));
+}
+
+static int
+ipnet_bpf_getlinkid(const char *name, datalink_id_t *idp, zoneid_t zoneid)
+{
+	uint_t	index;
+	int	error;
+
+	index = 0;
+	error = ipnet_get_linkid_byname(name, &index, zoneid);
+	if (error == 0)
+		*idp = (datalink_id_t)index;
+	return (error);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/sockmods/netpacket/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# uts/common/inet/sockmods/netpacket/Makefile
+#
+# include global definitions
+include ../../../../../Makefile.master
+
+HDRS=	packet.h
+
+ROOTDIRS=	$(ROOT)/usr/include/netpacket
+
+ROOTHDRS=	$(HDRS:%=$(ROOT)/usr/include/netpacket/%)
+
+$(ROOTDIRS)/%:	%
+	$(INS.file)
+
+.KEEP_STATE:
+
+install_h:	$(ROOTDIRS) $(ROOTHDRS)
+
+$(ROOTDIRS):
+	$(INS.dir)
+
+check:	$(CHECKHDRS)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/sockmods/netpacket/packet.h	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,203 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _PACKET_H
+#define	_PACKET_H
+
+#include <sys/socket_impl.h>
+#include <net/bpf.h>
+
+/*
+ * With which we do the reverse of what it libpcap does....
+ */
+#define	PACKET_OUTGOING		LINUX_SLL_OUTGOING
+#define	PACKET_HOST		LINUX_SLL_HOST
+#define	PACKET_BROADCAST	LINUX_SLL_BROADCAST
+#define	PACKET_MULTICAST	LINUX_SLL_MULTICAST
+#define	PACKET_OTHERHOST	LINUX_SLL_OTHERHOST
+
+#define	PACKET_STATISTICS	1
+#define	PACKET_ADD_MEMBERSHIP	2
+#define	PACKET_DROP_MEMBERSHIP	3
+#define	PACKET_AUXDATA		4
+
+
+struct packet_mreq {
+	uint32_t	mr_ifindex;
+	uint16_t	mr_type;
+	uint16_t	mr_alen;
+	uint8_t		mr_address[8];
+};
+
+#define	PACKET_MR_MULTICAST	1
+#define	PACKET_MR_PROMISC	2
+#define	PACKET_MR_ALLMULTI	3
+
+typedef enum tpkt_status_e {
+	TP_STATUS_KERNEL,
+	TP_STATUS_USER,
+	TP_STATUS_COPY,
+	TP_STATUS_LOSING,
+	TP_STATUS_CSUMNOTREADY
+} tpkt_status_t;
+
+struct tpacket_auxdata {		/* tp_macoff/tp_netoff ?? */
+	tpkt_status_t	tp_status;
+	uint32_t	tp_len;
+	uint32_t	tp_snaplen;
+	uint16_t	tp_macoff;
+	uint16_t	tp_netoff;
+	uint16_t	tp_vlan_vci;
+};
+
+struct tpacket_hdr {			/* tp_macoff/tp_netoff ?? */
+	uint64_t	tp_status;
+	uint32_t	tp_len;
+	uint32_t	tp_snaplen;
+	uint16_t	tp_macoff;
+	uint16_t	tp_netoff;
+	uint32_t	tp_sec;
+	uint32_t	tp_usec;
+};
+
+struct tpacket2_hdr {			/* tp_macoff/tp_netoff ?? */
+	tpkt_status_t	tp_status;
+	uint32_t	tp_len;
+	uint32_t	tp_snaplen;
+	uint16_t	tp_macoff;
+	uint16_t	tp_netoff;
+	uint32_t	tp_sec;
+	uint32_t	tp_nsec;
+	uint16_t	tp_vlan_tci;
+};
+
+struct tpacket_stats {
+	uint16_t	tp_packets;
+	uint16_t	tp_drops;
+};
+
+struct sock_filter {			/* Fields named from bpf_insn */
+	uint16_t	code;
+	uint8_t		jt;
+	uint8_t		jf;
+	uint32_t	k;
+};
+
+struct sock_fprog {
+	uint16_t		len;
+	struct sock_filter	*filter;
+};
+
+/*
+ * Linux ARPHRD_ symbols needed...
+ *
+ * The numbers above 50000 are because their real value is unknown from
+ * libpcap's source, so a number has been chosen that is unlikely to be
+ * confused with the real one on Linux.
+ */
+#define	ARPHRD_ADAPT			50001
+#define	ARPHRD_ARCNET			50002
+#define	ARPHRD_ATM			19
+#define	ARPHRD_AX25			50003
+#define	ARPHRD_CHAOS			50004
+#define	ARPHRD_CISCO			513
+#define	ARPHRD_CSLIP			50005
+#define	ARPHRD_CSLIP6			50006
+#define	ARPHRD_DLCI			15
+#define	ARPHRD_EETHER			50007
+#define	ARPHRD_ETHER			50008
+#define	ARPHRD_FCAL			785
+#define	ARPHRD_FCFABRIC			787
+#define	ARPHRD_FCPL			786
+#define	ARPHRD_FCPP			784
+#define	ARPHRD_FRAD			770
+#define	ARPHRD_FDDI			774
+#define	ARPHRD_IEEE802			50009
+#define	ARPHRD_IEEE802_TR		800
+#define	ARPHRD_IEEE80211		801
+#define	ARPHRD_IEEE80211_PRISM		802
+#define	ARPHRD_IEEE80211_RADIOTAP	803
+#define	ARPHRD_IRDA			783
+#define	ARPHRD_LAPD			8445
+#define	ARPHRD_LOCALTLK			50010
+#define	ARPHRD_LOOPBACK			50011
+#define	ARPHRD_METRICOM			50012
+#define	ARPHRD_PRONET			50013
+#define	ARPHRD_PPP			50014
+#define	ARPHRD_RAWHDLC			518
+#define	ARPHRD_SIT			776
+#define	ARPHRD_SLIP6			50015
+#define	ARPHRD_SLIP			50016
+#define	ARPHRD_TUNNEL			50017
+
+#ifdef _KERNEL
+/*
+ * PFP socket structure.
+ */
+typedef struct pfpsock {
+	struct bpf_program		ps_bpf;
+	krwlock_t			ps_bpflock;
+	sock_upper_handle_t		ps_upper;
+	sock_upcalls_t			*ps_upcalls;
+	mac_handle_t			ps_mh;
+	mac_client_handle_t		ps_mch;
+	mac_promisc_handle_t		ps_phd;
+	int				ps_type;
+	int				ps_proto;
+	uint_t				ps_max_sdu;
+	boolean_t			ps_bound;
+	mac_client_promisc_type_t	ps_promisc;
+	boolean_t			ps_auxdata;
+	struct tpacket_stats		ps_stats;
+	struct sockaddr			ps_sock;
+	datalink_id_t			ps_linkid;
+	kmutex_t			ps_lock;
+	boolean_t			ps_flow_ctrld;
+	ulong_t				ps_flow_ctrl_drops;
+} pfpsock_t;
+
+typedef struct pfp_kstats_s {
+	kstat_named_t	kp_recv_mac_hdr_fail;
+	kstat_named_t	kp_recv_bad_proto;
+	kstat_named_t	kp_recv_alloc_fail;
+	kstat_named_t	kp_recv_ok;
+	kstat_named_t	kp_recv_fail;
+	kstat_named_t	kp_recv_filtered;
+	kstat_named_t	kp_recv_flow_cntrld;
+	kstat_named_t	kp_send_unbound;
+	kstat_named_t	kp_send_failed;
+	kstat_named_t	kp_send_too_big;
+	kstat_named_t	kp_send_alloc_fail;
+	kstat_named_t	kp_send_uiomove_fail;
+	kstat_named_t	kp_send_no_memory;
+	kstat_named_t	kp_send_open_fail;
+	kstat_named_t	kp_send_wrong_family;
+	kstat_named_t	kp_send_short_msg;
+	kstat_named_t	kp_send_ok;
+} pfp_kstats_t;
+#endif /* _KERNEL */
+
+#endif /* _PACKET_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/inet/sockmods/sockmod_pfp.c	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,1414 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/socket_proto.h>
+#include <sys/sockio.h>
+#include <sys/strsun.h>
+#include <sys/kstat.h>
+#include <sys/modctl.h>
+#include <sys/policy.h>
+#include <sys/priv_const.h>
+#include <sys/tihdr.h>
+#include <sys/zone.h>
+#include <sys/time.h>
+#include <fs/sockfs/sockcommon.h>
+#include <net/if.h>
+
+#include <sys/dls.h>
+#include <sys/mac.h>
+#include <sys/mac_client.h>
+#include <sys/mac_provider.h>
+#include <sys/mac_client_priv.h>
+
+#include <netpacket/packet.h>
+
+static void pfp_close(mac_handle_t, mac_client_handle_t);
+static int pfp_dl_to_arphrd(int);
+static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
+    socklen_t *);
+static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *);
+static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *);
+static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
+    cred_t *);
+static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
+static void pfp_release_bpf(struct pfpsock *);
+static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
+static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
+    socklen_t);
+static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
+    socklen_t);
+
+/*
+ * PFP sockfs operations
+ * Most are currently no-ops because they have no meaning for a connectionless
+ * socket.
+ */
+static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
+    sock_upcalls_t *, int, struct cred *);
+static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
+    struct cred *);
+static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
+static void sdpfp_clr_flowctrl(sock_lower_handle_t);
+static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
+    socklen_t *, struct cred *);
+static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
+    struct cred *);
+static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
+    struct cred *);
+static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
+    socklen_t, struct cred *);
+
+static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
+    uint_t *, int *, int, cred_t *);
+
+static int sockpfp_init(void);
+static void sockpfp_fini(void);
+
+static kstat_t *pfp_ksp;
+static pfp_kstats_t ks_stats;
+static pfp_kstats_t pfp_kstats = {
+	/*
+	 * Each one of these kstats is a different return path in handling
+	 * a packet received from the mac layer.
+	 */
+	{ "recvMacHeaderFail",	KSTAT_DATA_UINT64 },
+	{ "recvBadProtocol",	KSTAT_DATA_UINT64 },
+	{ "recvAllocbFail",	KSTAT_DATA_UINT64 },
+	{ "recvOk",		KSTAT_DATA_UINT64 },
+	{ "recvFail",		KSTAT_DATA_UINT64 },
+	{ "recvFiltered",	KSTAT_DATA_UINT64 },
+	{ "recvFlowControl",	KSTAT_DATA_UINT64 },
+	/*
+	 * A global set of counters is maintained to track the behaviour
+	 * of the system (kernel & applications) in sending packets.
+	 */
+	{ "sendUnbound",	KSTAT_DATA_UINT64 },
+	{ "sendFailed",		KSTAT_DATA_UINT64 },
+	{ "sendTooBig",		KSTAT_DATA_UINT64 },
+	{ "sendAllocFail",	KSTAT_DATA_UINT64 },
+	{ "sendUiomoveFail",	KSTAT_DATA_UINT64 },
+	{ "sendNoMemory",	KSTAT_DATA_UINT64 },
+	{ "sendOpenFail",	KSTAT_DATA_UINT64 },
+	{ "sendWrongFamily",	KSTAT_DATA_UINT64 },
+	{ "sendShortMsg",	KSTAT_DATA_UINT64 },
+	{ "sendOk",		KSTAT_DATA_UINT64 }
+};
+
+sock_downcalls_t pfp_downcalls = {
+	sdpfp_activate,
+	sock_accept_notsupp,
+	sdpfp_bind,
+	sock_listen_notsupp,
+	sock_connect_notsupp,
+	sock_getpeername_notsupp,
+	sock_getsockname_notsupp,
+	sdpfp_getsockopt,
+	sdpfp_setsockopt,
+	sock_send_notsupp,
+	sdpfp_senduio,
+	NULL,
+	sock_poll_notsupp,
+	sock_shutdown_notsupp,
+	sdpfp_clr_flowctrl,
+	sdpfp_ioctl,
+	sdpfp_close,
+};
+
+static smod_reg_t sinfo = {
+	SOCKMOD_VERSION,
+	"sockpfp",
+	SOCK_UC_VERSION,
+	SOCK_DC_VERSION,
+	sockpfp_create,
+	NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modlsockmod modlsockmod = {
+	&mod_sockmodops, "PF Packet socket module", &sinfo
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	&modlsockmod,
+	NULL
+};
+
+int
+_init(void)
+{
+	int error;
+
+	error = sockpfp_init();
+	if (error != 0)
+		return (error);
+
+	error = mod_install(&modlinkage);
+	if (error != 0)
+		sockpfp_fini();
+
+	return (error);
+}
+
+int
+_fini(void)
+{
+	int error;
+
+	error = mod_remove(&modlinkage);
+	if (error == 0)
+		sockpfp_fini();
+
+	return (error);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * sockpfp_init: called as part of the initialisation of the module when
+ * loaded into the kernel.
+ *
+ * Being able to create and record the kstats data in the kernel is not
+ * considered to be vital to the operation of this kernel module, thus
+ * its failure is tolerated.
+ */
+static int
+sockpfp_init(void)
+{
+	(void) memset(&ks_stats, 0, sizeof (ks_stats));
+
+	(void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
+
+	pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
+	    KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	if (pfp_ksp != NULL) {
+		pfp_ksp->ks_data = &ks_stats;
+		kstat_install(pfp_ksp);
+	}
+
+	return (0);
+}
+
+/*
+ * sockpfp_fini: called when the operating system wants to unload the
+ * socket module from the kernel.
+ */
+static void
+sockpfp_fini(void)
+{
+	if (pfp_ksp != NULL)
+		kstat_delete(pfp_ksp);
+}
+
+/*
+ * Due to sockets being created read-write by default, all PF_PACKET sockets
+ * therefore require the NET_RAWACCESS priviliege, even if the socket is only
+ * being used for reading packets from.
+ *
+ * This create function enforces this module only being used with PF_PACKET
+ * sockets and the policy that we support via the sock2path.conf file:
+ * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
+ */
+/* ARGSUSED */
+static sock_lower_handle_t
+sockpfp_create(int family, int type, int proto,
+    sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
+    int sflags, cred_t *cred)
+{
+	struct pfpsock *ps;
+	int kmflags;
+
+	if (secpolicy_net_rawaccess(cred) != 0) {
+		*errorp = EACCES;
+		return (NULL);
+	}
+
+	if (family != AF_PACKET) {
+		*errorp = EAFNOSUPPORT;
+		return (NULL);
+	}
+
+	if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
+		*errorp = ESOCKTNOSUPPORT;
+		return (NULL);
+	}
+
+	kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
+	ps = kmem_zalloc(sizeof (*ps), kmflags);
+	if (ps == NULL) {
+		*errorp = ENOMEM;
+		return (NULL);
+	}
+
+	ps->ps_type = type;
+	ps->ps_proto = proto;
+	rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
+	mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
+
+	*sock_downcalls = &pfp_downcalls;
+	/*
+	 * Setting this causes bytes from a packet that do not fit into the
+	 * destination user buffer to be discarded. Thus the API is one
+	 * packet per receive and callers are required to use a buffer large
+	 * enough for the biggest packet that the interface can provide.
+	 */
+	*smodep = SM_ATOMIC;
+
+	return ((sock_lower_handle_t)ps);
+}
+
+/* ************************************************************************* */
+
+/*
+ * pfp_packet is the callback function that is given to the mac layer for
+ * PF_PACKET to receive packets with. One packet at a time is passed into
+ * this function from the mac layer. Each packet is a private copy given
+ * to PF_PACKET to modify or free as it wishes and does not harm the original
+ * packet from which it was cloned.
+ */
+/* ARGSUSED */
+static void
+pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
+{
+	struct T_unitdata_ind *tunit;
+	struct sockaddr_ll *sll;
+	struct sockaddr_ll *sol;
+	mac_header_info_t hdr;
+	struct pfpsock *ps;
+	size_t tusz;
+	mblk_t *mp0;
+	int error;
+
+	if (mp == NULL)
+		return;
+
+	ps = arg;
+	if (ps->ps_flow_ctrld) {
+		ps->ps_flow_ctrl_drops++;
+		ps->ps_stats.tp_drops++;
+		ks_stats.kp_recv_flow_cntrld.value.ui64++;
+		freemsg(mp);
+		return;
+	}
+
+	if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
+		/*
+		 * Can't decode the packet header information so drop it.
+		 */
+		ps->ps_stats.tp_drops++;
+		ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
+		freemsg(mp);
+		return;
+	}
+
+	if (mac_type(ps->ps_mh) == DL_ETHER &&
+	    hdr.mhi_bindsap == ETHERTYPE_VLAN) {
+		struct ether_vlan_header *evhp;
+		struct ether_vlan_header evh;
+
+		hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
+		hdr.mhi_istagged = B_TRUE;
+
+		if (MBLKL(mp) >= sizeof (*evhp)) {
+			evhp = (struct ether_vlan_header *)mp->b_rptr;
+		} else {
+			int sz = sizeof (*evhp);
+			char *s = (char *)&evh;
+			mblk_t *tmp;
+			int len;
+
+			for (tmp = mp; sz > 0 && tmp != NULL;
+			    tmp = tmp->b_cont) {
+				len = min(sz, MBLKL(tmp));
+				bcopy(tmp->b_rptr, s, len);
+				sz -= len;
+			}
+			evhp = &evh;
+		}
+		hdr.mhi_tci = ntohs(evhp->ether_tci);
+		hdr.mhi_bindsap = ntohs(evhp->ether_type);
+	}
+
+	if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
+		/*
+		 * The packet is not of interest to this socket so
+		 * drop it on the floor. Here the SAP is being used
+		 * as a very course filter.
+		 */
+		ps->ps_stats.tp_drops++;
+		ks_stats.kp_recv_bad_proto.value.ui64++;
+		freemsg(mp);
+		return;
+	}
+
+	/*
+	 * This field is not often set, even for ethernet,
+	 * by mac_header_info, so compute it if it is 0.
+	 */
+	if (hdr.mhi_pktsize == 0)
+		hdr.mhi_pktsize = msgdsize(mp);
+
+	/*
+	 * If a BPF filter is present, pass the raw packet into that.
+	 * A failed match will result in zero being returned, indicating
+	 * that this socket is not interested in the packet.
+	 */
+	if (ps->ps_bpf.bf_len != 0) {
+		uchar_t *buffer;
+		int buflen;
+
+		buflen = MBLKL(mp);
+		if (hdr.mhi_pktsize == buflen) {
+			buffer = mp->b_rptr;
+		} else {
+			buflen = 0;
+			buffer = (uchar_t *)mp;
+		}
+		rw_enter(&ps->ps_bpflock, RW_READER);
+		if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
+		    hdr.mhi_pktsize, buflen) == 0) {
+			rw_exit(&ps->ps_bpflock);
+			ps->ps_stats.tp_drops++;
+			ks_stats.kp_recv_filtered.value.ui64++;
+			freemsg(mp);
+			return;
+		}
+		rw_exit(&ps->ps_bpflock);
+	}
+
+	if (ps->ps_type == SOCK_DGRAM) {
+		/*
+		 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
+		 * past the link layer header.
+		 */
+		mp->b_rptr += hdr.mhi_hdrsize;
+		hdr.mhi_pktsize -= hdr.mhi_hdrsize;
+	}
+
+	tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
+	if (ps->ps_auxdata) {
+		tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
+		tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
+	}
+
+	/*
+	 * It is tempting to think that this could be optimised by having
+	 * the base mblk_t allocated and hung off the pfpsock structure,
+	 * except that then another one would need to be allocated for the
+	 * sockaddr_ll that is included. Even creating a template to copy
+	 * from is of questionable value, as read-write from one structure
+	 * to the other is going to be slower than all of the initialisation.
+	 */
+	mp0 = allocb(tusz, BPRI_HI);
+	if (mp0 == NULL) {
+		ps->ps_stats.tp_drops++;
+		ks_stats.kp_recv_alloc_fail.value.ui64++;
+		freemsg(mp);
+		return;
+	}
+
+	(void) memset(mp0->b_rptr, 0, tusz);
+
+	mp0->b_datap->db_type = M_PROTO;
+	mp0->b_wptr = mp0->b_rptr + tusz;
+
+	tunit = (struct T_unitdata_ind *)mp0->b_rptr;
+	tunit->PRIM_type = T_UNITDATA_IND;
+	tunit->SRC_length = sizeof (struct sockaddr);
+	tunit->SRC_offset = sizeof (*tunit);
+
+	sol = (struct sockaddr_ll *)&ps->ps_sock;
+	sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
+	sll->sll_ifindex = sol->sll_ifindex;
+	sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
+	sll->sll_halen = sol->sll_halen;
+	if (hdr.mhi_saddr != NULL)
+		(void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
+
+	switch (hdr.mhi_dsttype) {
+	case MAC_ADDRTYPE_MULTICAST :
+		sll->sll_pkttype = PACKET_MULTICAST;
+		break;
+	case MAC_ADDRTYPE_BROADCAST :
+		sll->sll_pkttype = PACKET_BROADCAST;
+		break;
+	case MAC_ADDRTYPE_UNICAST :
+		if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
+			sll->sll_pkttype = PACKET_HOST;
+		else
+			sll->sll_pkttype = PACKET_OTHERHOST;
+		break;
+	}
+
+	if (ps->ps_auxdata) {
+		struct tpacket_auxdata *aux;
+		struct T_opthdr *topt;
+
+		tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
+		    sizeof (struct sockaddr_ll));
+		tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
+		    _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
+
+		topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
+		aux = (struct tpacket_auxdata *)
+		    ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
+
+		topt->len = tunit->OPT_length;
+		topt->level = SOL_PACKET;
+		topt->name = PACKET_AUXDATA;
+		topt->status = 0;
+		/*
+		 * libpcap doesn't seem to use any other field,
+		 * so it isn't clear how they should be filled in.
+		 */
+		aux->tp_vlan_vci = hdr.mhi_tci;
+	}
+
+	linkb(mp0, mp);
+
+	ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
+	    &error, NULL);
+
+	if (error == 0) {
+		ps->ps_stats.tp_packets++;
+		ks_stats.kp_recv_ok.value.ui64++;
+	} else {
+		mutex_enter(&ps->ps_lock);
+		if (error == ENOSPC) {
+			ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
+			    &error, NULL);
+			if (error == ENOSPC)
+				ps->ps_flow_ctrld = B_TRUE;
+		}
+		mutex_exit(&ps->ps_lock);
+		ps->ps_stats.tp_drops++;
+		ks_stats.kp_recv_fail.value.ui64++;
+	}
+}
+
+/*
+ * Bind a PF_PACKET socket to a network interface.
+ *
+ * The default operation of this bind() is to place the socket (and thus the
+ * network interface) into promiscuous mode. It is then up to the application
+ * to turn that down by issuing the relevant ioctls, if desired.
+ */
+/* ARGSUSED */
+static int
+sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
+    socklen_t addrlen, struct cred *cred)
+{
+	struct sockaddr_ll *addr_ll, *sol;
+	mac_client_handle_t mch;
+	struct pfpsock *ps;
+	mac_handle_t mh;
+	int error;
+
+	ps = (struct pfpsock *)handle;
+	if (ps->ps_bound)
+		return (EINVAL);
+
+	addr_ll = (struct sockaddr_ll *)addr;
+
+	error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
+	if (error != 0)
+		return (error);
+	/*
+	 * Ensure that each socket is only bound once.
+	 */
+	mutex_enter(&ps->ps_lock);
+	if (ps->ps_mh != 0) {
+		mutex_exit(&ps->ps_lock);
+		pfp_close(mh, mch);
+		return (EADDRINUSE);
+	}
+	ps->ps_mh = mh;
+	ps->ps_mch = mch;
+	mutex_exit(&ps->ps_lock);
+
+	/*
+	 * Cache all of the information from bind so that it's in an easy
+	 * place to get at when packets are received.
+	 */
+	sol = (struct sockaddr_ll *)&ps->ps_sock;
+	sol->sll_family = AF_PACKET;
+	sol->sll_ifindex = addr_ll->sll_ifindex;
+	sol->sll_protocol = addr_ll->sll_protocol;
+	sol->sll_halen = mac_addr_len(ps->ps_mh);
+	mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
+	mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
+	ps->ps_linkid = addr_ll->sll_ifindex;
+
+	error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
+	    pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
+	if (error == 0) {
+		ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
+		ps->ps_bound = B_TRUE;
+	}
+
+	return (error);
+}
+
+/* ARGSUSED */
+static void
+sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
+    sock_upcalls_t *upcalls, int flags, cred_t *cred)
+{
+	struct pfpsock *ps;
+
+	ps = (struct pfpsock *)lower;
+	ps->ps_upper = upper;
+	ps->ps_upcalls = upcalls;
+}
+
+/*
+ * This module only implements getting socket options for the new socket
+ * option level (SOL_PACKET) that it introduces. All other requests are
+ * passed back to the sockfs layer.
+ */
+/* ARGSUSED */
+static int
+sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
+    void *optval, socklen_t *optlenp, struct cred *cred)
+{
+	int error = 0;
+
+	switch (level) {
+	case SOL_PACKET :
+		error = pfp_getpacket_sockopt(handle, option_name, optval,
+		    optlenp);
+		break;
+	default :
+		/*
+		 * If sockfs code receives this error in return from the
+		 * getsockopt downcall it handles the option locally, if
+		 * it can. This implements SO_RCVBUF, etc.
+		 */
+		error = ENOPROTOOPT;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * PF_PACKET supports setting socket options at only two levels:
+ * SOL_SOCKET and SOL_PACKET.
+ */
+/* ARGSUSED */
+static int
+sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
+    const void *optval, socklen_t optlen, struct cred *cred)
+{
+	int error = 0;
+
+	switch (level) {
+	case SOL_SOCKET :
+		error = pfp_setsocket_sockopt(handle, option_name, optval,
+		    optlen);
+		break;
+	case SOL_PACKET :
+		error = pfp_setpacket_sockopt(handle, option_name, optval,
+		    optlen);
+		break;
+	default :
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * This function is incredibly inefficient for sending any packet that
+ * comes with a msghdr asking to be sent to an interface to which the
+ * socket has not been bound. Some possibilities here are keeping a
+ * cache of all open mac's and mac_client's, for the purpose of sending,
+ * and closing them after some amount of inactivity. Clearly, applications
+ * should not be written to use one socket for multiple interfaces if
+ * performance is desired with the code as is.
+ */
+/* ARGSUSED */
+static int
+sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
+    struct nmsghdr *msg, struct cred *cred)
+{
+	struct sockaddr_ll *sol;
+	mac_client_handle_t mch;
+	struct pfpsock *ps;
+	boolean_t new_open;
+	mac_handle_t mh;
+	size_t mpsize;
+	uint_t maxsdu;
+	mblk_t *mp0;
+	mblk_t *mp;
+	int error;
+
+	mp = NULL;
+	mp0 = NULL;
+	new_open = B_FALSE;
+	ps = (struct pfpsock *)handle;
+	mh = ps->ps_mh;
+	mch = ps->ps_mch;
+	maxsdu = ps->ps_max_sdu;
+
+	sol = (struct sockaddr_ll *)msg->msg_name;
+	if (sol == NULL) {
+		/*
+		 * If no sockaddr_ll has been provided with the send call,
+		 * use the one constructed when the socket was bound to an
+		 * interface and fail if it hasn't been bound.
+		 */
+		if (!ps->ps_bound) {
+			ks_stats.kp_send_unbound.value.ui64++;
+			return (EPROTO);
+		}
+		sol = (struct sockaddr_ll *)&ps->ps_sock;
+	} else {
+		/*
+		 * Verify the sockaddr_ll message passed down before using
+		 * it to send a packet out with. If it refers to an interface
+		 * that has not been bound, it is necessary to open it.
+		 */
+		struct sockaddr_ll *sll;
+
+		if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
+			ks_stats.kp_send_short_msg.value.ui64++;
+			return (EINVAL);
+		}
+
+		if (sol->sll_family != AF_PACKET) {
+			ks_stats.kp_send_wrong_family.value.ui64++;
+			return (EAFNOSUPPORT);
+		}
+
+		sll = (struct sockaddr_ll *)&ps->ps_sock;
+		if (sol->sll_ifindex != sll->sll_ifindex) {
+			error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
+			    cred);
+			if (error != 0) {
+				ks_stats.kp_send_open_fail.value.ui64++;
+				return (error);
+			}
+			mac_sdu_get(mh, NULL, &maxsdu);
+			new_open = B_TRUE;
+		}
+	}
+
+	mpsize = uiop->uio_resid;
+	if (mpsize > maxsdu) {
+		ks_stats.kp_send_too_big.value.ui64++;
+		error = EMSGSIZE;
+		goto done;
+	}
+
+	if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
+		ks_stats.kp_send_alloc_fail.value.ui64++;
+		error = ENOBUFS;
+		goto done;
+	}
+
+	mp->b_wptr = mp->b_rptr + mpsize;
+	error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
+	if (error != 0) {
+		ks_stats.kp_send_uiomove_fail.value.ui64++;
+		goto done;
+	}
+
+	if (ps->ps_type == SOCK_DGRAM) {
+		mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
+		if (mp0 == NULL) {
+			ks_stats.kp_send_no_memory.value.ui64++;
+			error = ENOBUFS;
+			goto done;
+		}
+		linkb(mp0, mp);
+		mp = mp0;
+	}
+
+	/*
+	 * As this is sending datagrams and no promise is made about
+	 * how or if a packet will be sent/delivered, no effort is to
+	 * be expended in recovering from a situation where the packet
+	 * cannot be sent - it is just dropped.
+	 */
+	error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
+	if (error == 0) {
+		mp = NULL;
+		ks_stats.kp_send_ok.value.ui64++;
+	} else {
+		ks_stats.kp_send_failed.value.ui64++;
+	}
+
+done:
+
+	if (new_open) {
+		ASSERT(mch != ps->ps_mch);
+		ASSERT(mh != ps->ps_mh);
+		pfp_close(mh, mch);
+	}
+	if (mp != NULL)
+		freemsg(mp);
+
+	return (error);
+
+}
+
+/*
+ * There's no use of a lock here, or at the bottom of pfp_packet() where
+ * ps_flow_ctrld is set to true, because in a situation where these two
+ * are racing to set the flag one way or the other, the end result is
+ * going to be ultimately determined by the scheduler anyway - which of
+ * the two threads gets the lock first? In such an operational environment,
+ * we've got packets arriving too fast to be delt with so packets are going
+ * to be dropped. Grabbing a lock just makes the drop more expensive.
+ */
+static void
+sdpfp_clr_flowctrl(sock_lower_handle_t handle)
+{
+	struct pfpsock *ps;
+
+	ps = (struct pfpsock *)handle;
+
+	mutex_enter(&ps->ps_lock);
+	ps->ps_flow_ctrld = B_FALSE;
+	mutex_exit(&ps->ps_lock);
+}
+
+/*
+ * The implementation of this ioctl() handler is intended to function
+ * in the absence of a bind() being made before it is called. Thus the
+ * function calls mac_open() itself to provide a handle
+ * This function is structured like this:
+ * - determine the linkid for the interface being targetted
+ * - open the interface with said linkid
+ * - perform ioctl
+ * - copy results back to caller
+ *
+ * The ioctls that interact with interface flags have been implented below
+ * to assume that the interface is always up and running (IFF_RUNNING) and
+ * to use the state of this socket to determine whether or not the network
+ * interface is in promiscuous mode. Thus an ioctl to get the interface flags
+ * of an interface that has been put in promiscuous mode by another socket
+ * (in the same program or different), will not report that status.
+ */
+/* ARGSUSED */
+static int
+sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
+    int32_t *rval, struct cred *cr)
+{
+#if defined(_SYSCALL32)
+	struct timeval32 tival;
+#else
+	struct timeval tival;
+#endif
+	mac_client_promisc_type_t mtype;
+	datalink_id_t linkid;
+	struct lifreq lifreq;
+	struct ifreq ifreq;
+	struct pfpsock *ps;
+	mac_handle_t mh;
+	timespec_t tv;
+	int error;
+
+	switch (cmd) {
+	/*
+	 * ioctls that work on "struct lifreq"
+	 */
+	case SIOCSLIFFLAGS :
+	case SIOCGLIFINDEX :
+	case SIOCGLIFFLAGS :
+	case SIOCGLIFMTU :
+		error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid);
+		if (error != 0)
+			return (error);
+		break;
+
+	/*
+	 * ioctls that work on "struct ifreq".
+	 * Not all of these have a "struct lifreq" partner, for example
+	 * SIOCGIFHWADDR, for the simple reason that the logical interface
+	 * does not have a hardware address.
+	 */
+	case SIOCSIFFLAGS :
+	case SIOCGIFINDEX :
+	case SIOCGIFFLAGS :
+	case SIOCGIFMTU :
+	case SIOCGIFHWADDR :
+		error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid);
+		if (error != 0)
+			return (error);
+		break;
+	}
+
+	error =  mac_open_by_linkid(linkid, &mh);
+	if (error != 0)
+		return (error);
+
+	ps = (struct pfpsock *)handle;
+
+	switch (cmd) {
+	case SIOCGLIFINDEX :
+		lifreq.lifr_index = linkid;
+		break;
+
+	case SIOCGIFINDEX :
+		ifreq.ifr_index = linkid;
+		break;
+
+	case SIOCGIFFLAGS :
+		ifreq.ifr_flags = IFF_RUNNING;
+		if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
+			ifreq.ifr_flags |= IFF_PROMISC;
+		break;
+
+	case SIOCGLIFFLAGS :
+		lifreq.lifr_flags = IFF_RUNNING;
+		if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
+			lifreq.lifr_flags |= IFF_PROMISC;
+		break;
+
+	case SIOCSIFFLAGS :
+		if (linkid != ps->ps_linkid) {
+			error = EINVAL;
+		} else {
+			if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
+				mtype = MAC_CLIENT_PROMISC_ALL;
+			else
+				mtype = MAC_CLIENT_PROMISC_FILTERED;
+			error = pfp_set_promisc(ps, mtype);
+		}
+		break;
+
+	case SIOCSLIFFLAGS :
+		if (linkid != ps->ps_linkid) {
+			error = EINVAL;
+		} else {
+			if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
+				mtype = MAC_CLIENT_PROMISC_ALL;
+			else
+				mtype = MAC_CLIENT_PROMISC_FILTERED;
+			error = pfp_set_promisc(ps, mtype);
+		}
+		break;
+
+	case SIOCGIFMTU :
+		mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
+		break;
+
+	case SIOCGLIFMTU :
+		mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
+		break;
+
+	case SIOCGIFHWADDR :
+		mac_unicast_primary_get(mh, (uint8_t *)ifreq.ifr_addr.sa_data);
+		ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
+		break;
+
+	case SIOCGSTAMP :
+		(void) gethrestime(&tv);
+		tival.tv_sec = (time_t)tv.tv_sec;
+		tival.tv_usec = tv.tv_nsec / 1000;
+		error = ddi_copyout(&tival, (void *)arg, sizeof (tival), 0);
+		break;
+
+	default :
+		break;
+	}
+
+	mac_close(mh);
+
+	if (error == 0) {
+		/*
+		 * Only the "GET" ioctls need to copy data back to userace.
+		 */
+		switch (cmd) {
+		case SIOCGLIFINDEX :
+		case SIOCGLIFFLAGS :
+		case SIOCGLIFMTU :
+			error = ddi_copyout(&lifreq, (void *)arg,
+			    sizeof (lifreq), 0);
+			break;
+
+		case SIOCGIFINDEX :
+		case SIOCGIFFLAGS :
+		case SIOCGIFMTU :
+		case SIOCGIFHWADDR :
+			error = ddi_copyout(&ifreq, (void *)arg,
+			    sizeof (ifreq), 0);
+			break;
+		default :
+			break;
+		}
+	}
+
+	return (error);
+}
+
+/*
+ * Closing the socket requires that all open references to network
+ * interfaces be closed.
+ */
+/* ARGSUSED */
+static int
+sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
+{
+	struct pfpsock *ps = (struct pfpsock *)handle;
+
+	if (ps->ps_phd != 0) {
+		mac_promisc_remove(ps->ps_phd);
+		ps->ps_phd = 0;
+	}
+
+	if (ps->ps_mch != 0) {
+		mac_client_close(ps->ps_mch, 0);
+		ps->ps_mch = 0;
+	}
+
+	if (ps->ps_mh != 0) {
+		mac_close(ps->ps_mh);
+		ps->ps_mh = 0;
+	}
+
+	kmem_free(ps, sizeof (*ps));
+
+	return (0);
+}
+
+/* ************************************************************************* */
+
+/*
+ * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
+ * determine the linkid for the interface name stored in that structure.
+ * name is used as a buffer so that we can ensure a trailing \0 is appended
+ * to the name safely.
+ */
+static int
+pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
+    datalink_id_t *linkidp)
+{
+	char name[IFNAMSIZ + 1];
+	int error;
+
+	if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), 0) != 0)
+		return (EFAULT);
+
+	(void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
+
+	error = dls_mgmt_get_linkid(name, linkidp);
+	if (error != 0)
+		error = dls_devnet_macname2linkid(name, linkidp);
+
+	return (error);
+}
+
+/*
+ * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
+ * determine the linkid for the interface name stored in that structure.
+ * name is used as a buffer so that we can ensure a trailing \0 is appended
+ * to the name safely.
+ */
+static int
+pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
+    datalink_id_t *linkidp)
+{
+	char name[LIFNAMSIZ + 1];
+	int error;
+
+	if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), 0) != 0)
+		return (EFAULT);
+
+	(void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
+
+	error = dls_mgmt_get_linkid(name, linkidp);
+	if (error != 0)
+		error = dls_devnet_macname2linkid(name, linkidp);
+
+	return (error);
+}
+
+/*
+ * Although there are several new SOL_PACKET options that can be set and
+ * are specific to this implementation of PF_PACKET, the current API does
+ * not support doing a get on them to retrieve accompanying status. Thus
+ * it is only currently possible to use SOL_PACKET with getsockopt to
+ * retrieve statistical information. This remains consistant with the
+ * Linux API at the time of writing.
+ */
+static int
+pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
+    void *optval, socklen_t *optlenp)
+{
+	struct pfpsock *ps;
+	int error = 0;
+
+	ps = (struct pfpsock *)handle;
+
+	switch (option_name) {
+	case PACKET_STATISTICS :
+		if (*optlenp < sizeof (ps->ps_stats)) {
+			error = EINVAL;
+			break;
+		}
+		*optlenp = sizeof (ps->ps_stats);
+		bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
+		break;
+	default :
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * The SOL_PACKET level for socket options supports three options,
+ * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
+ * This function is responsible for mapping the two socket options
+ * that manage multicast membership into the appropriate internal
+ * function calls to bring the option into effect. Whilst direct
+ * changes to the multicast membership (ADD/DROP) groups is handled
+ * by calls directly into the mac module, changes to the promiscuos
+ * mode are vectored through pfp_set_promisc() so that the logic for
+ * managing the promiscuous mode is in one place.
+ */
+/* ARGSUSED */
+static int
+pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
+    const void *optval, socklen_t optlen)
+{
+	struct packet_mreq mreq;
+	struct pfpsock *ps;
+	int error = 0;
+	int opt;
+
+	ps = (struct pfpsock *)handle;
+	if (!ps->ps_bound)
+		return (EPROTO);
+
+	if ((option_name == PACKET_ADD_MEMBERSHIP) ||
+	    (option_name == PACKET_DROP_MEMBERSHIP)) {
+		if (!ps->ps_bound)
+			return (EPROTO);
+		bcopy(optval, &mreq, sizeof (mreq));
+		if (ps->ps_linkid != mreq.mr_ifindex)
+			return (EINVAL);
+
+		if (mreq.mr_alen !=
+		    ((struct sockaddr_ll *)&ps->ps_sock)->sll_halen)
+			return (EINVAL);
+	}
+
+	switch (option_name) {
+	case PACKET_ADD_MEMBERSHIP :
+		switch (mreq.mr_type) {
+		case PACKET_MR_MULTICAST :
+			error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
+			break;
+
+		case PACKET_MR_PROMISC :
+			error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
+			break;
+
+		case PACKET_MR_ALLMULTI :
+			error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
+			break;
+		}
+		break;
+
+	case PACKET_DROP_MEMBERSHIP :
+		switch (mreq.mr_type) {
+		case PACKET_MR_MULTICAST :
+			mac_multicast_remove(ps->ps_mch, mreq.mr_address);
+			break;
+
+		case PACKET_MR_PROMISC :
+			if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
+				return (EINVAL);
+			error = pfp_set_promisc(ps,
+			    MAC_CLIENT_PROMISC_FILTERED);
+			break;
+
+		case PACKET_MR_ALLMULTI :
+			if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
+				return (EINVAL);
+			error = pfp_set_promisc(ps,
+			    MAC_CLIENT_PROMISC_FILTERED);
+			break;
+		}
+		break;
+
+	case PACKET_AUXDATA :
+		if (optlen == sizeof (int)) {
+			opt = *(int *)optval;
+			ps->ps_auxdata = (opt != 0);
+		} else {
+			error = EINVAL;
+		}
+		break;
+	default :
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
+ * SO_ATTACH_FILTER and SO_DETACH_FILTER. All other setsockopt requests
+ * that are for SOL_SOCKET are passed back to the socket layer for its
+ * generic implementation.
+ *
+ * Both of these setsockopt values are candidates for being handled by the
+ * socket layer itself in future, however this requires understanding how
+ * they would interact with all other sockets.
+ */
+static int
+pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
+    const void *optval, socklen_t optlen)
+{
+	struct bpf_program prog;
+	struct bpf_insn *fcode;
+	struct pfpsock *ps;
+	int error = 0;
+	int size;
+
+	ps = (struct pfpsock *)handle;
+
+	switch (option_name) {
+	case SO_ATTACH_FILTER :
+#ifdef _LP64
+		if (optlen == sizeof (struct bpf_program32)) {
+			struct bpf_program32 prog32;
+
+			bcopy(optval, &prog32, sizeof (prog32));
+			prog.bf_len = prog32.bf_len;
+			prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
+		} else
+#endif
+		if (optlen == sizeof (struct bpf_program)) {
+			bcopy(optval, &prog, sizeof (prog));
+		} else if (optlen != sizeof (struct bpf_program)) {
+			return (EINVAL);
+		}
+
+		size = prog.bf_len * sizeof (*prog.bf_insns);
+		fcode = kmem_alloc(size, KM_SLEEP);
+		if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
+			kmem_free(fcode, size);
+			return (EFAULT);
+		}
+
+		if (bpf_validate(fcode, (int)prog.bf_len)) {
+			rw_enter(&ps->ps_bpflock, RW_WRITER);
+			pfp_release_bpf(ps);
+			ps->ps_bpf.bf_insns = fcode;
+			ps->ps_bpf.bf_len = size;
+			rw_exit(&ps->ps_bpflock);
+
+			return (0);
+		}
+		kmem_free(fcode, size);
+		error = EINVAL;
+		break;
+
+	case SO_DETACH_FILTER :
+		pfp_release_bpf(ps);
+		break;
+	default :
+		/*
+		 * If sockfs code receives this error in return from the
+		 * getsockopt downcall it handles the option locally, if
+		 * it can. This implements SO_RCVBUF, etc.
+		 */
+		error = ENOPROTOOPT;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * pfp_open_index is an internal function used to open a MAC device by
+ * its index. Both a mac_handle_t and mac_client_handle_t are acquired
+ * because some of the interfaces provided by the mac layer require either
+ * only the mac_handle_t or both it and mac_handle_t.
+ *
+ * Whilst inside the kernel we can access data structures supporting any
+ * zone, access to interfaces from non-global zones is restricted to those
+ * interfaces (if any) that are exclusively assigned to a zone.
+ */
+static int
+pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
+    cred_t *cred)
+{
+	mac_client_handle_t mch;
+	zoneid_t ifzoneid;
+	mac_handle_t mh;
+	zoneid_t zoneid;
+	int error;
+
+	mh = 0;
+	mch = 0;
+	error = mac_open_by_linkid(index, &mh);
+	if (error != 0)
+		goto bad_open;
+
+	error = mac_client_open(mh, &mch, NULL,
+	    MAC_OPEN_FLAGS_USE_DATALINK_NAME);
+	if (error != 0)
+		goto bad_open;
+
+	zoneid = crgetzoneid(cred);
+	if (zoneid != GLOBAL_ZONEID) {
+		mac_perim_handle_t perim;
+
+		mac_perim_enter_by_mh(mh, &perim);
+		error = dls_link_getzid(mac_client_name(mch), &ifzoneid);
+		mac_perim_exit(perim);
+		if (error != 0)
+			goto bad_open;
+		if (ifzoneid != zoneid) {
+			error = EACCES;
+			goto bad_open;
+		}
+	}
+
+	*mcip = mch;
+	*mhp = mh;
+
+	return (0);
+bad_open:
+	if (mch != 0)
+		mac_client_close(mch, 0);
+	if (mh != 0)
+		mac_close(mh);
+	return (error);
+}
+
+static void
+pfp_close(mac_handle_t mh, mac_client_handle_t mch)
+{
+	mac_client_close(mch, 0);
+	mac_close(mh);
+}
+
+/*
+ * The purpose of this function is to provide a single place where we free
+ * the loaded BPF program and reset all pointers/counters associated with
+ * it.
+ */
+static void
+pfp_release_bpf(struct pfpsock *ps)
+{
+	if (ps->ps_bpf.bf_len != 0) {
+		kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
+		ps->ps_bpf.bf_len = 0;
+		ps->ps_bpf.bf_insns = NULL;
+	}
+}
+
+/*
+ * Set the promiscuous mode of a network interface.
+ * This function only calls the mac layer when there is a change to the
+ * status of a network interface's promiscous mode. Tracking of how many
+ * sockets have the network interface in promiscuous mode, and thus the
+ * control over the physical device's status, is left to the mac layer.
+ */
+static int
+pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
+{
+	int error = 0;
+	int flags;
+
+	/*
+	 * There are 4 combinations of turnon/ps_promisc.
+	 * This if handles 2 (both false, both true) and the if() below
+	 * handles the remaining one - when change is required.
+	 */
+	if (turnon == ps->ps_promisc)
+		return (error);
+
+	if (ps->ps_phd != 0) {
+		mac_promisc_remove(ps->ps_phd);
+		ps->ps_phd = 0;
+
+		/*
+		 * ps_promisc is set here in case the call to mac_promisc_add
+		 * fails: leaving it to indicate that the interface is still
+		 * in some sort of promiscuous mode is false.
+		 */
+		if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
+			ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
+			flags = MAC_PROMISC_FLAGS_NO_PHYS;
+		} else {
+			flags = 0;
+		}
+		flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
+	}
+
+	error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
+	    &ps->ps_phd, flags);
+	if (error == 0)
+		ps->ps_promisc = turnon;
+
+	return (error);
+}
+
+/*
+ * This table maps the MAC types in Solaris to the ARPHRD_* values used
+ * on Linux. This is used with the SIOCGIFHWADDR ioctl.
+ */
+static uint_t arphrd_to_dl[][2] = {
+	{ ARPHRD_ETHER,		DL_ETHER },
+	{ ARPHRD_IEEE80211,	DL_WIFI },
+	{ 0,			0 }
+};
+
+static int
+pfp_dl_to_arphrd(int dltype)
+{
+	int i;
+
+	for (i = 0; arphrd_to_dl[i][0] != 0; i++)
+		if (arphrd_to_dl[i][1] == dltype)
+			return (arphrd_to_dl[i][0]);
+	return (0);
+}
--- a/usr/src/uts/common/inet/tcp/tcp.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/tcp/tcp.c	Thu Sep 24 07:28:12 2009 -0700
@@ -16897,11 +16897,13 @@
 	mutex_exit(&tcp->tcp_non_sq_lock);

 	/* Check to see if this connection wants to be re-fused. */
-	if (tcp->tcp_refuse && !ipst->ips_ipobs_enabled) {
-		if (tcp->tcp_ipversion == IPV4_VERSION) {
+	if (tcp->tcp_refuse) {
+		if (tcp->tcp_ipversion == IPV4_VERSION &&
+		    !ipst->ips_ip4_observe.he_interested) {
 			tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ipha,
 			    &tcp->tcp_saved_tcph);
-		} else {
+		} else if (tcp->tcp_ipversion == IPV6_VERSION &&
+		    !ipst->ips_ip6_observe.he_interested) {
 			tcp_fuse(tcp, (uchar_t *)&tcp->tcp_saved_ip6h,
 			    &tcp->tcp_saved_tcph);
 		}
@@ -18639,13 +18641,21 @@
 	DTRACE_IP_FASTPATH(mp, ipha, ill, ipha, NULL);

 	if (mp != NULL) {
-		if (ipst->ips_ipobs_enabled) {
+		if (ipst->ips_ip4_observe.he_interested) {
 			zoneid_t szone;

 			szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
 			    ipst, ALL_ZONES);
+
+			/*
+			 * The IP observability hook expects b_rptr to be
+			 * where the IP header starts, so advance past the
+			 * link layer header.
+			 */
+			mp->b_rptr += ire_fp_mp_len;
 			ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
-			    ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
+			    ALL_ZONES, ill, ipst);
+			mp->b_rptr -= ire_fp_mp_len;
 		}

 		ILL_SEND_TX(ill, ire, connp, mp, 0, NULL);
@@ -20440,7 +20450,10 @@
 		atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, obsegs);
 	ire->ire_last_used_time = lbolt;

-	if (ipst->ips_ipobs_enabled) {
+	if ((tcp->tcp_ipversion == IPV4_VERSION &&
+	    ipst->ips_ip4_observe.he_interested) ||
+	    (tcp->tcp_ipversion == IPV6_VERSION &&
+	    ipst->ips_ip6_observe.he_interested)) {
 		multidata_t *dlmdp = mmd_getmultidata(md_mp_head);
 		pdesc_t *dl_pkt;
 		pdescinfo_t pinfo;
@@ -20453,7 +20466,7 @@
 			if ((nmp = mmd_transform_link(dl_pkt)) == NULL)
 				continue;
 			ipobs_hook(nmp, IPOBS_HOOK_OUTBOUND, szone,
-			    ALL_ZONES, ill, tcp->tcp_ipversion, 0, ipst);
+			    ALL_ZONES, ill, ipst);
 			freemsg(nmp);
 		}
 	}
@@ -20634,13 +20647,17 @@
 	DTRACE_IP_FASTPATH(mp, ipha, ill, ipha, NULL);

 	if (mp != NULL) {
-		if (ipst->ips_ipobs_enabled) {
+		if (ipst->ips_ip4_observe.he_interested) {
 			zoneid_t szone;

 			szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
 			    ipst, ALL_ZONES);
+			if (ire_fp_mp_len != 0)
+				mp->b_rptr += ire_fp_mp_len;
 			ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
-			    ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
+			    ALL_ZONES, ill, ipst);
+			if (ire_fp_mp_len != 0)
+				mp->b_rptr -= ire_fp_mp_len;
 		}

 		ILL_SEND_TX(ill, ire, tcp->tcp_connp, mp, 0, NULL);
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c	Thu Sep 24 07:28:12 2009 -0700
@@ -536,7 +536,10 @@
 	if (tcp_loopback_needs_ip(tcp, ns) ||
 	    tcp_loopback_needs_ip(peer_tcp, ns) ||
 	    IPP_ENABLED(IPP_LOCAL_OUT|IPP_LOCAL_IN, ipst) ||
-	    list_head(&ipst->ips_ipobs_cb_list) != NULL) {
+	    (tcp->tcp_ipversion == IPV4_VERSION &&
+	    ipst->ips_ip4_observe.he_interested) ||
+	    (tcp->tcp_ipversion == IPV6_VERSION &&
+	    ipst->ips_ip6_observe.he_interested)) {
 		TCP_STAT(tcps, tcp_fusion_aborted);
 		tcp->tcp_refuse = B_TRUE;
 		peer_tcp->tcp_refuse = B_TRUE;
--- a/usr/src/uts/common/inet/udp/udp.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/inet/udp/udp.c	Thu Sep 24 07:28:12 2009 -0700
@@ -5454,13 +5454,21 @@
 	    ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp,
 	    ll_multicast, ipst);
 	DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp);
-	if (ipst->ips_ipobs_enabled && mp != NULL) {
+	if (ipst->ips_ip4_observe.he_interested && mp != NULL) {
 		zoneid_t szone;

 		szone = ip_get_zoneid_v4(ipha->ipha_src, mp,
 		    ipst, ALL_ZONES);
+
+		/*
+		 * The IP observability hook expects b_rptr to be
+		 * where the IP header starts, so advance past the
+		 * link layer header.
+		 */
+		mp->b_rptr += ire_fp_mp_len;
 		ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone,
-		    ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst);
+		    ALL_ZONES, ill, ipst);
+		mp->b_rptr -= ire_fp_mp_len;
 	}

 	if (mp == NULL)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/BPF.LICENCE	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/bpf.c	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,2047 @@
+/*	$NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $	*/
+
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)bpf.c	8.4 (Berkeley) 1/9/95
+ * static char rcsid[] =
+ * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * The BPF implements the following access controls for zones attempting
+ * to read and write data. Writing of data requires that the net_rawaccess
+ * privilege is held whilst reading data requires either net_rawaccess or
+ * net_observerability.
+ *
+ *                              | Shared |  Exclusive |   Global
+ * -----------------------------+--------+------------+------------+
+ * DLT_IPNET in local zone      |  Read  |    Read    |    Read    |
+ * -----------------------------+--------+------------+------------+
+ * Raw access to local zone NIC |  None  | Read/Write | Read/Write |
+ * -----------------------------+--------+------------+------------+
+ * Raw access to all NICs       |  None  |    None    | Read/Write |
+ * -----------------------------+--------+------------+------------+
+ *
+ * The BPF driver is written as a cloning driver: each call to bpfopen()
+ * allocates a new minor number. This provides BPF with a 1:1 relationship
+ * between open's and close's. There is some amount of "descriptor state"
+ * that is kept per open. Pointers to this data are stored in a hash table
+ * (bpf_hash) that is index'd by the minor device number for each open file.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/queue.h>
+#include <sys/filio.h>
+#include <sys/policy.h>
+#include <sys/cmn_err.h>
+#include <sys/uio.h>
+#include <sys/file.h>
+#include <sys/sysmacros.h>
+#include <sys/zone.h>
+
+#include <sys/socket.h>
+#include <sys/errno.h>
+#include <sys/poll.h>
+#include <sys/dlpi.h>
+#include <sys/neti.h>
+
+#include <net/if.h>
+
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+#include <net/dlt.h>
+
+#include <netinet/in.h>
+#include <sys/mac.h>
+#include <sys/mac_client.h>
+#include <sys/mac_impl.h>
+#include <sys/time_std_impl.h>
+#include <sys/hook.h>
+#include <sys/hook_event.h>
+
+
+#define	mtod(_v, _t)	(_t)((_v)->b_rptr)
+#define	M_LEN(_m)	((_m)->b_wptr - (_m)->b_rptr)
+
+/*
+ * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
+ * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
+ */
+#define	BPF_BUFSIZE (32 * 1024)
+
+typedef void *(*cp_fn_t)(void *, const void *, size_t);
+
+/*
+ * The default read buffer size, and limit for BIOCSBLEN.
+ */
+int bpf_bufsize = BPF_BUFSIZE;
+int bpf_maxbufsize = (16 * 1024 * 1024);
+int bpf_debug = 0;
+mod_hash_t *bpf_hash = NULL;
+
+/*
+ * Use a mutex to avoid a race condition between gathering the stats/peers
+ * and opening/closing the device.
+ */
+static kcondvar_t bpf_dlt_waiter;
+static kmutex_t bpf_mtx;
+static bpf_kstats_t ks_stats;
+static bpf_kstats_t bpf_kstats = {
+	{ "readWait",		KSTAT_DATA_UINT64 },
+	{ "writeOk",		KSTAT_DATA_UINT64 },
+	{ "writeError",		KSTAT_DATA_UINT64 },
+	{ "receive",		KSTAT_DATA_UINT64 },
+	{ "captured",		KSTAT_DATA_UINT64 },
+	{ "dropped",		KSTAT_DATA_UINT64 },
+};
+static kstat_t *bpf_ksp;
+
+/*
+ *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
+ *  bpf_dtab holds the descriptors, indexed by minor device #
+ */
+TAILQ_HEAD(, bpf_if) bpf_iflist;
+LIST_HEAD(, bpf_d) bpf_list;
+
+static int	bpf_allocbufs(struct bpf_d *);
+static void	bpf_clear_timeout(struct bpf_d *);
+static void	bpf_debug_nic_action(char *, struct bpf_if *);
+static void	bpf_deliver(struct bpf_d *, cp_fn_t,
+		    void *, uint_t, uint_t, boolean_t);
+static struct bpf_if *
+		bpf_findif(struct bpf_d *, char *, int);
+static void	bpf_freed(struct bpf_d *);
+static int	bpf_ifname(struct bpf_d *d, char *, int);
+static void	*bpf_mcpy(void *, const void *, size_t);
+static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
+static void	bpf_detachd(struct bpf_d *);
+static int	bpf_setif(struct bpf_d *, char *, int);
+static void	bpf_timed_out(void *);
+static inline void
+		bpf_wakeup(struct bpf_d *);
+static void	catchpacket(struct bpf_d *, uchar_t *, uint_t, uint_t,
+		    cp_fn_t, struct timeval *);
+static void	reset_d(struct bpf_d *);
+static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
+static int	bpf_setdlt(struct bpf_d *, void *);
+static void	bpf_dev_add(struct bpf_d *);
+static struct bpf_d *bpf_dev_find(minor_t);
+static struct bpf_d *bpf_dev_get(minor_t);
+static void	bpf_dev_remove(struct bpf_d *);
+
+static int
+bpf_movein(struct uio *uio, int linktype, int mtu, mblk_t **mp)
+{
+	mblk_t *m;
+	int error;
+	int len;
+	int hlen;
+	int align;
+
+	/*
+	 * Build a sockaddr based on the data link layer type.
+	 * We do this at this level because the ethernet header
+	 * is copied directly into the data field of the sockaddr.
+	 * In the case of SLIP, there is no header and the packet
+	 * is forwarded as is.
+	 * Also, we are careful to leave room at the front of the mbuf
+	 * for the link level header.
+	 */
+	switch (linktype) {
+
+	case DLT_EN10MB:
+		hlen = sizeof (struct ether_header);
+		break;
+
+	case DLT_FDDI:
+		hlen = 16;
+		break;
+
+	case DLT_NULL:
+		hlen = 0;
+		break;
+
+	case DLT_IPOIB:
+		hlen = 44;
+		break;
+
+	default:
+		return (EIO);
+	}
+
+	align = 4 - (hlen & 3);
+
+	len = uio->uio_resid;
+	/*
+	 * If there aren't enough bytes for a link level header or the
+	 * packet length exceeds the interface mtu, return an error.
+	 */
+	if (len < hlen || len - hlen > mtu)
+		return (EMSGSIZE);
+
+	m = allocb(len + align, BPRI_MED);
+	if (m == NULL) {
+		error = ENOBUFS;
+		goto bad;
+	}
+
+	/* Insure the data is properly aligned */
+	if (align > 0)
+		m->b_rptr += align;
+	m->b_wptr = m->b_rptr + len;
+
+	error = uiomove(mtod(m, void *), len, UIO_WRITE, uio);
+	if (error)
+		goto bad;
+	*mp = m;
+	return (0);
+
+bad:
+	if (m != NULL)
+		freemsg(m);
+	return (error);
+}
+
+
+/*
+ * Attach file to the bpf interface, i.e. make d listen on bp.
+ */
+static void
+bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
+{
+	uintptr_t mh = bp->bif_ifp;
+
+	ASSERT(bp != NULL);
+	ASSERT(d->bd_bif == NULL);
+	/*
+	 * Point d at bp, and add d to the interface's list of listeners.
+	 * Finally, point the driver's bpf cookie at the interface so
+	 * it will divert packets to bpf.
+	 *
+	 * Note: Although this results in what looks like a lock order
+	 * reversal (bd_lock is held), the deadlock threat is not present
+	 * because the descriptor is not attached to any interface and
+	 * therefore there cannot be a packet waiting on bd_lock in
+	 * catchpacket.
+	 */
+	mutex_enter(&bp->bif_lock);
+	d->bd_bif = bp;
+	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+	mutex_exit(&bp->bif_lock);
+
+	if (MBPF_CLIENT_OPEN(&bp->bif_mac, mh, &d->bd_mcip) == 0)
+		(void) MBPF_PROMISC_ADD(&bp->bif_mac, d->bd_mcip, 0, d,
+		    &d->bd_promisc_handle, d->bd_promisc_flags);
+}
+
+/*
+ * Detach a file from its interface.
+ */
+static void
+bpf_detachd(struct bpf_d *d)
+{
+	struct bpf_if *bp;
+	uintptr_t mph;
+	uintptr_t mch;
+
+	mch = d->bd_mcip;
+	d->bd_mcip = 0;
+	bp = d->bd_bif;
+	ASSERT(bp != NULL);
+
+	/*
+	 * Check if this descriptor had requested promiscuous mode.
+	 * If so, turn it off. There's no need to take any action
+	 * here, that is done when MBPF_PROMISC_REMOVE is used;
+	 * bd_promisc is just a local flag to stop promiscuous mode
+	 * from being set more than once.
+	 */
+	if (d->bd_promisc)
+		d->bd_promisc = 0;
+
+	/*
+	 * Take device out of "promiscuous" mode.  Since we were able to
+	 * enter "promiscuous" mode, we should be able to turn it off.
+	 * Note, this field stores a pointer used to support both
+	 * promiscuous and non-promiscuous callbacks for packets.
+	 */
+	mph = d->bd_promisc_handle;
+	d->bd_promisc_handle = 0;
+
+	/*
+	 * The lock has to be dropped here because mac_promisc_remove may
+	 * need to wait for mac_promisc_dispatch, which has called into
+	 * bpf and catchpacket is waiting for bd_lock...
+	 * i.e mac_promisc_remove() needs to be called with none of the
+	 * locks held that are part of the bpf_mtap() call path.
+	 */
+	mutex_exit(&d->bd_lock);
+	if (mph != 0)
+		MBPF_PROMISC_REMOVE(&bp->bif_mac, mph);
+
+	if (mch != 0)
+		MBPF_CLIENT_CLOSE(&bp->bif_mac, mch);
+
+	/*
+	 * bd_lock needs to stay not held by this function until after
+	 * it has finished with bif_lock, otherwise there's a lock order
+	 * reversal with bpf_deliver and the system can deadlock.
+	 *
+	 * Remove d from the interface's descriptor list.
+	 */
+	mutex_enter(&bp->bif_lock);
+	LIST_REMOVE(d, bd_next);
+	mutex_exit(&bp->bif_lock);
+
+	/*
+	 * Because this function is called with bd_lock held, so it must
+	 * exit with it held.
+	 */
+	mutex_enter(&d->bd_lock);
+	/*
+	 * bd_bif cannot be cleared until after the promisc callback has been
+	 * removed.
+	 */
+	d->bd_bif = 0;
+}
+
+
+/*
+ * bpfilterattach() is called at load time.
+ */
+int
+bpfilterattach(void)
+{
+
+	bpf_hash = mod_hash_create_idhash("bpf_dev_tab", 31,
+	    mod_hash_null_keydtor);
+	if (bpf_hash == NULL)
+		return (ENOMEM);
+
+	(void) memcpy(&ks_stats, &bpf_kstats, sizeof (bpf_kstats));
+
+	bpf_ksp = kstat_create("bpf", 0, "global", "misc",
+	    KSTAT_TYPE_NAMED, sizeof (bpf_kstats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	if (bpf_ksp != NULL) {
+		bpf_ksp->ks_data = &ks_stats;
+		kstat_install(bpf_ksp);
+	} else {
+		mod_hash_destroy_idhash(bpf_hash);
+		bpf_hash = NULL;
+		return (EEXIST);
+	}
+
+	cv_init(&bpf_dlt_waiter, NULL, CV_DRIVER, NULL);
+	mutex_init(&bpf_mtx, NULL, MUTEX_DRIVER, NULL);
+
+	LIST_INIT(&bpf_list);
+	TAILQ_INIT(&bpf_iflist);
+
+	return (0);
+}
+
+
+/*
+ * bpfilterdetach() is called at unload time.
+ */
+int
+bpfilterdetach(void)
+{
+	struct bpf_if *bp;
+
+	if (bpf_ksp != NULL) {
+		kstat_delete(bpf_ksp);
+		bpf_ksp = NULL;
+	}
+
+	/*
+	 * When no attach/detach callbacks can arrive from mac,
+	 * this is now safe without a lock.
+	 */
+	while ((bp = TAILQ_FIRST(&bpf_iflist)) != NULL)
+		bpfdetach(bp->bif_ifp);
+
+	mutex_enter(&bpf_mtx);
+	if (!LIST_EMPTY(&bpf_list)) {
+		mutex_exit(&bpf_mtx);
+		return (EBUSY);
+	}
+	mutex_exit(&bpf_mtx);
+
+	mod_hash_destroy_idhash(bpf_hash);
+	bpf_hash = NULL;
+
+	cv_destroy(&bpf_dlt_waiter);
+	mutex_destroy(&bpf_mtx);
+
+	return (0);
+}
+
+/*
+ * Open ethernet device. Clones.
+ */
+/* ARGSUSED */
+int
+bpfopen(dev_t *devp, int flag, int mode, cred_t *cred)
+{
+	struct bpf_d *d;
+	uint_t dmin;
+
+	/*
+	 * The security policy described at the top of this file is
+	 * enforced here.
+	 */
+	if ((flag & FWRITE) != 0) {
+		if (secpolicy_net_rawaccess(cred) != 0)
+			return (EACCES);
+	}
+
+	if ((flag & FREAD) != 0) {
+		if ((secpolicy_net_observability(cred) != 0) &&
+		    (secpolicy_net_rawaccess(cred) != 0))
+			return (EACCES);
+	}
+
+	if ((flag & (FWRITE|FREAD)) == 0)
+		return (ENXIO);
+
+	/*
+	 * If BPF is being opened from a non-global zone, trigger a call
+	 * back into the driver to see if it needs to initialise local
+	 * state in a zone.
+	 */
+	if (crgetzoneid(cred) != GLOBAL_ZONEID)
+		bpf_open_zone(crgetzoneid(cred));
+
+	/*
+	 * A structure is allocated per open file in BPF to store settings
+	 * such as buffer capture size, provide private buffers, etc.
+	 */
+	d = (struct bpf_d *)kmem_zalloc(sizeof (*d), KM_SLEEP);
+	d->bd_bufsize = bpf_bufsize;
+	d->bd_fmode = flag;
+	d->bd_zone = crgetzoneid(cred);
+	d->bd_seesent = 1;
+	d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_PHYS|
+	    MAC_PROMISC_FLAGS_NO_COPY;
+	mutex_init(&d->bd_lock, NULL, MUTEX_DRIVER, NULL);
+	cv_init(&d->bd_wait, NULL, CV_DRIVER, NULL);
+
+	mutex_enter(&bpf_mtx);
+	/*
+	 * Find an unused minor number. Obviously this is an O(n) algorithm
+	 * and doesn't scale particularly well, so if there are large numbers
+	 * of open file descriptors happening in real use, this design may
+	 * need to be revisited.
+	 */
+	for (dmin = 0; dmin < L_MAXMIN; dmin++)
+		if (bpf_dev_find(dmin) == NULL)
+			break;
+	if (dmin == L_MAXMIN) {
+		mutex_exit(&bpf_mtx);
+		kmem_free(d, sizeof (*d));
+		return (ENXIO);
+	}
+	d->bd_dev = dmin;
+	LIST_INSERT_HEAD(&bpf_list, d, bd_list);
+	bpf_dev_add(d);
+	mutex_exit(&bpf_mtx);
+
+	*devp = makedevice(getmajor(*devp), dmin);
+
+	return (0);
+}
+
+/*
+ * Close the descriptor by detaching it from its interface,
+ * deallocating its buffers, and marking it free.
+ *
+ * Because we only allow a device to be opened once, there is always a
+ * 1 to 1 relationship between opens and closes supporting this function.
+ */
+/* ARGSUSED */
+int
+bpfclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+	struct bpf_d *d = bpf_dev_get(getminor(dev));
+
+	mutex_enter(&d->bd_lock);
+	if (d->bd_state == BPF_WAITING)
+		bpf_clear_timeout(d);
+	d->bd_state = BPF_IDLE;
+	if (d->bd_bif)
+		bpf_detachd(d);
+	mutex_exit(&d->bd_lock);
+
+	mutex_enter(&bpf_mtx);
+	LIST_REMOVE(d, bd_list);
+	bpf_dev_remove(d);
+	mutex_exit(&bpf_mtx);
+
+	mutex_enter(&d->bd_lock);
+	mutex_destroy(&d->bd_lock);
+	cv_destroy(&d->bd_wait);
+
+	bpf_freed(d);
+	kmem_free(d, sizeof (*d));
+
+	return (0);
+}
+
+/*
+ * Rotate the packet buffers in descriptor d.  Move the store buffer
+ * into the hold slot, and the free buffer into the store slot.
+ * Zero the length of the new store buffer.
+ */
+#define	ROTATE_BUFFERS(d) \
+	(d)->bd_hbuf = (d)->bd_sbuf; \
+	(d)->bd_hlen = (d)->bd_slen; \
+	(d)->bd_sbuf = (d)->bd_fbuf; \
+	(d)->bd_slen = 0; \
+	(d)->bd_fbuf = 0;
+/*
+ *  bpfread - read next chunk of packets from buffers
+ */
+/* ARGSUSED */
+int
+bpfread(dev_t dev, struct uio *uio, cred_t *cred)
+{
+	struct bpf_d *d = bpf_dev_get(getminor(dev));
+	int timed_out;
+	ulong_t delay;
+	int error;
+
+	if ((d->bd_fmode & FREAD) == 0)
+		return (EBADF);
+
+	/*
+	 * Restrict application to use a buffer the same size as
+	 * the kernel buffers.
+	 */
+	if (uio->uio_resid != d->bd_bufsize)
+		return (EINVAL);
+
+	mutex_enter(&d->bd_lock);
+	if (d->bd_state == BPF_WAITING)
+		bpf_clear_timeout(d);
+	timed_out = (d->bd_state == BPF_TIMED_OUT);
+	d->bd_state = BPF_IDLE;
+	/*
+	 * If the hold buffer is empty, then do a timed sleep, which
+	 * ends when the timeout expires or when enough packets
+	 * have arrived to fill the store buffer.
+	 */
+	while (d->bd_hbuf == 0) {
+		if (d->bd_nonblock) {
+			if (d->bd_slen == 0) {
+				mutex_exit(&d->bd_lock);
+				return (EWOULDBLOCK);
+			}
+			ROTATE_BUFFERS(d);
+			break;
+		}
+
+		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
+			/*
+			 * A packet(s) either arrived since the previous
+			 * read or arrived while we were asleep.
+			 * Rotate the buffers and return what's here.
+			 */
+			ROTATE_BUFFERS(d);
+			break;
+		}
+		ks_stats.kp_read_wait.value.ui64++;
+		delay = ddi_get_lbolt() + d->bd_rtout;
+		error = cv_timedwait_sig(&d->bd_wait, &d->bd_lock, delay);
+		if (error == 0) {
+			mutex_exit(&d->bd_lock);
+			return (EINTR);
+		}
+		if (error == -1) {
+			/*
+			 * On a timeout, return what's in the buffer,
+			 * which may be nothing.  If there is something
+			 * in the store buffer, we can rotate the buffers.
+			 */
+			if (d->bd_hbuf)
+				/*
+				 * We filled up the buffer in between
+				 * getting the timeout and arriving
+				 * here, so we don't need to rotate.
+				 */
+				break;
+
+			if (d->bd_slen == 0) {
+				mutex_exit(&d->bd_lock);
+				return (0);
+			}
+			ROTATE_BUFFERS(d);
+		}
+	}
+	/*
+	 * At this point, we know we have something in the hold slot.
+	 */
+	mutex_exit(&d->bd_lock);
+
+	/*
+	 * Move data from hold buffer into user space.
+	 * We know the entire buffer is transferred since
+	 * we checked above that the read buffer is bpf_bufsize bytes.
+	 */
+	error = uiomove(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
+
+	mutex_enter(&d->bd_lock);
+	d->bd_fbuf = d->bd_hbuf;
+	d->bd_hbuf = 0;
+	d->bd_hlen = 0;
+done:
+	mutex_exit(&d->bd_lock);
+	return (error);
+}
+
+
+/*
+ * If there are processes sleeping on this descriptor, wake them up.
+ * NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver,
+ * so there is no code here grabbing it.
+ */
+static inline void
+bpf_wakeup(struct bpf_d *d)
+{
+	cv_signal(&d->bd_wait);
+}
+
+static void
+bpf_timed_out(void *arg)
+{
+	struct bpf_d *d = arg;
+
+	mutex_enter(&d->bd_lock);
+	if (d->bd_state == BPF_WAITING) {
+		d->bd_state = BPF_TIMED_OUT;
+		if (d->bd_slen != 0)
+			cv_signal(&d->bd_wait);
+	}
+	mutex_exit(&d->bd_lock);
+}
+
+
+/* ARGSUSED */
+int
+bpfwrite(dev_t dev, struct uio *uio, cred_t *cred)
+{
+	struct bpf_d *d = bpf_dev_get(getminor(dev));
+	struct bpf_if *bp;
+	uintptr_t mch;
+	uintptr_t ifp;
+	uint_t mtu;
+	mblk_t *m;
+	int error;
+	int dlt;
+
+	if ((d->bd_fmode & FWRITE) == 0)
+		return (EBADF);
+
+	mutex_enter(&d->bd_lock);
+	if (d->bd_bif == 0 || d->bd_mcip == 0 || d->bd_bif->bif_ifp == 0) {
+		mutex_exit(&d->bd_lock);
+		return (EINTR);
+	}
+
+	if (uio->uio_resid == 0) {
+		mutex_exit(&d->bd_lock);
+		return (0);
+	}
+
+	while (d->bd_inuse < 0) {
+		d->bd_waiting++;
+		if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
+			d->bd_waiting--;
+			mutex_exit(&d->bd_lock);
+			return (EINTR);
+		}
+		d->bd_waiting--;
+	}
+
+	mutex_exit(&d->bd_lock);
+
+	bp = d->bd_bif;
+	dlt = bp->bif_dlt;
+	mch = d->bd_mcip;
+	ifp = bp->bif_ifp;
+	MBPF_SDU_GET(&bp->bif_mac, ifp, &mtu);
+	d->bd_inuse++;
+
+	m = NULL;
+	if (dlt == DLT_IPNET) {
+		error = EIO;
+		goto done;
+	}
+
+	error = bpf_movein(uio, dlt, mtu, &m);
+	if (error)
+		goto done;
+
+	DTRACE_PROBE5(bpf__tx, struct bpf_d *, d, struct bpf_if *, bp,
+	    int, dlt, uint_t, mtu, mblk_t *, m);
+
+	if (M_LEN(m) > mtu) {
+		error = EMSGSIZE;
+		goto done;
+	}
+
+	error = MBPF_TX(&bp->bif_mac, mch, m);
+	/*
+	 * The "tx" action here is required to consume the mblk_t.
+	 */
+	m = NULL;
+
+done:
+	if (error == 0)
+		ks_stats.kp_write_ok.value.ui64++;
+	else
+		ks_stats.kp_write_error.value.ui64++;
+	if (m != NULL)
+		freemsg(m);
+
+	mutex_enter(&d->bd_lock);
+	d->bd_inuse--;
+	if ((d->bd_inuse == 0) && (d->bd_waiting != 0))
+		cv_signal(&d->bd_wait);
+	mutex_exit(&d->bd_lock);
+
+	/*
+	 * The driver frees the mbuf.
+	 */
+	return (error);
+}
+
+
+/*
+ * Reset a descriptor by flushing its packet buffer and clearing the
+ * receive and drop counts.  Should be called at splnet.
+ */
+static void
+reset_d(struct bpf_d *d)
+{
+	if (d->bd_hbuf) {
+		/* Free the hold buffer. */
+		d->bd_fbuf = d->bd_hbuf;
+		d->bd_hbuf = 0;
+	}
+	d->bd_slen = 0;
+	d->bd_hlen = 0;
+	d->bd_rcount = 0;
+	d->bd_dcount = 0;
+	d->bd_ccount = 0;
+}
+
+/*
+ *  FIONREAD		Check for read packet available.
+ *  BIOCGBLEN		Get buffer len [for read()].
+ *  BIOCSETF		Set ethernet read filter.
+ *  BIOCFLUSH		Flush read packet buffer.
+ *  BIOCPROMISC		Put interface into promiscuous mode.
+ *  BIOCGDLT		Get link layer type.
+ *  BIOCGETIF		Get interface name.
+ *  BIOCSETIF		Set interface.
+ *  BIOCSRTIMEOUT	Set read timeout.
+ *  BIOCGRTIMEOUT	Get read timeout.
+ *  BIOCGSTATS		Get packet stats.
+ *  BIOCIMMEDIATE	Set immediate mode.
+ *  BIOCVERSION		Get filter language version.
+ *  BIOCGHDRCMPLT	Get "header already complete" flag.
+ *  BIOCSHDRCMPLT	Set "header already complete" flag.
+ */
+/* ARGSUSED */
+int
+bpfioctl(dev_t dev, int cmd, intptr_t addr, int mode, cred_t *cred, int *rval)
+{
+	struct bpf_d *d = bpf_dev_get(getminor(dev));
+	struct bpf_program prog;
+	struct lifreq lifreq;
+	struct ifreq ifreq;
+	int error = 0;
+	uint_t size;
+
+	/*
+	 * Refresh the PID associated with this bpf file.
+	 */
+	mutex_enter(&d->bd_lock);
+	if (d->bd_state == BPF_WAITING)
+		bpf_clear_timeout(d);
+	d->bd_state = BPF_IDLE;
+	mutex_exit(&d->bd_lock);
+
+	switch (cmd) {
+
+	default:
+		error = EINVAL;
+		break;
+
+	/*
+	 * Check for read packet available.
+	 */
+	case FIONREAD:
+		{
+			int n;
+
+			mutex_enter(&d->bd_lock);
+			n = d->bd_slen;
+			if (d->bd_hbuf)
+				n += d->bd_hlen;
+			mutex_exit(&d->bd_lock);
+
+			*(int *)addr = n;
+			break;
+		}
+
+	/*
+	 * Get buffer len [for read()].
+	 */
+	case BIOCGBLEN:
+		error = copyout(&d->bd_bufsize, (void *)addr,
+		    sizeof (d->bd_bufsize));
+		break;
+
+	/*
+	 * Set buffer length.
+	 */
+	case BIOCSBLEN:
+		if (copyin((void *)addr, &size, sizeof (size)) != 0) {
+			error = EFAULT;
+			break;
+		}
+
+		mutex_enter(&d->bd_lock);
+		if (d->bd_bif != 0) {
+			error = EINVAL;
+		} else {
+			if (size > bpf_maxbufsize)
+				size = bpf_maxbufsize;
+			else if (size < BPF_MINBUFSIZE)
+				size = BPF_MINBUFSIZE;
+
+			d->bd_bufsize = size;
+		}
+		mutex_exit(&d->bd_lock);
+
+		if (error == 0)
+			error = copyout(&size, (void *)addr, sizeof (size));
+		break;
+
+	/*
+	 * Set link layer read filter.
+	 */
+	case BIOCSETF:
+		if (ddi_copyin((void *)addr, &prog, sizeof (prog), mode)) {
+			error = EFAULT;
+			break;
+		}
+		error = bpf_setf(d, &prog);
+		break;
+
+	/*
+	 * Flush read packet buffer.
+	 */
+	case BIOCFLUSH:
+		mutex_enter(&d->bd_lock);
+		reset_d(d);
+		mutex_exit(&d->bd_lock);
+		break;
+
+	/*
+	 * Put interface into promiscuous mode.
+	 * This is a one-way ioctl, it is not used to turn promiscuous
+	 * mode off.
+	 */
+	case BIOCPROMISC:
+		if (d->bd_bif == 0) {
+			/*
+			 * No interface attached yet.
+			 */
+			error = EINVAL;
+			break;
+		}
+		mutex_enter(&d->bd_lock);
+		if (d->bd_promisc == 0) {
+
+			if (d->bd_promisc_handle) {
+				uintptr_t mph;
+
+				mph = d->bd_promisc_handle;
+				d->bd_promisc_handle = 0;
+
+				mutex_exit(&d->bd_lock);
+				MBPF_PROMISC_REMOVE(&d->bd_bif->bif_mac, mph);
+				mutex_enter(&d->bd_lock);
+			}
+
+			d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_COPY;
+			error = MBPF_PROMISC_ADD(&d->bd_bif->bif_mac,
+			    d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d,
+			    &d->bd_promisc_handle, d->bd_promisc_flags);
+			if (error == 0)
+				d->bd_promisc = 1;
+		}
+		mutex_exit(&d->bd_lock);
+		break;
+
+	/*
+	 * Get device parameters.
+	 */
+	case BIOCGDLT:
+		if (d->bd_bif == 0)
+			error = EINVAL;
+		else
+			error = copyout(&d->bd_bif->bif_dlt, (void *)addr,
+			    sizeof (d->bd_bif->bif_dlt));
+		break;
+
+	/*
+	 * Get a list of supported device parameters.
+	 */
+	case BIOCGDLTLIST:
+		if (d->bd_bif == 0) {
+			error = EINVAL;
+		} else {
+			struct bpf_dltlist list;
+
+			if (copyin((void *)addr, &list, sizeof (list)) != 0) {
+				error = EFAULT;
+				break;
+			}
+			error = bpf_getdltlist(d, &list);
+			if ((error == 0) &&
+			    copyout(&list, (void *)addr, sizeof (list)) != 0)
+				error = EFAULT;
+		}
+		break;
+
+	/*
+	 * Set device parameters.
+	 */
+	case BIOCSDLT:
+		error = bpf_setdlt(d, (void *)addr);
+		break;
+
+	/*
+	 * Get interface name.
+	 */
+	case BIOCGETIF:
+		if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) {
+			error = EFAULT;
+			break;
+		}
+		error = bpf_ifname(d, ifreq.ifr_name, sizeof (ifreq.ifr_name));
+		if ((error == 0) &&
+		    copyout(&ifreq, (void *)addr, sizeof (ifreq)) != 0) {
+			error = EFAULT;
+			break;
+		}
+		break;
+
+	/*
+	 * Set interface.
+	 */
+	case BIOCSETIF:
+		if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) {
+			error = EFAULT;
+			break;
+		}
+		error = bpf_setif(d, ifreq.ifr_name, sizeof (ifreq.ifr_name));
+		break;
+
+	/*
+	 * Get interface name.
+	 */
+	case BIOCGETLIF:
+		if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) {
+			error = EFAULT;
+			break;
+		}
+		error = bpf_ifname(d, lifreq.lifr_name,
+		    sizeof (lifreq.lifr_name));
+		if ((error == 0) &&
+		    copyout(&lifreq, (void *)addr, sizeof (lifreq)) != 0) {
+			error = EFAULT;
+			break;
+		}
+		break;
+
+	/*
+	 * Set interface.
+	 */
+	case BIOCSETLIF:
+		if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) {
+			error = EFAULT;
+			break;
+		}
+		error = bpf_setif(d, lifreq.lifr_name,
+		    sizeof (lifreq.lifr_name));
+		break;
+
+#ifdef _SYSCALL32_IMPL
+	/*
+	 * Set read timeout.
+	 */
+	case BIOCSRTIMEOUT32:
+		{
+			struct timeval32 tv;
+
+			if (copyin((void *)addr, &tv, sizeof (tv)) != 0) {
+				error = EFAULT;
+				break;
+			}
+
+			/* Convert the timeout in microseconds to ticks */
+			d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 +
+			    tv.tv_usec);
+			if ((d->bd_rtout == 0) && (tv.tv_usec != 0))
+				d->bd_rtout = 1;
+			break;
+		}
+
+	/*
+	 * Get read timeout.
+	 */
+	case BIOCGRTIMEOUT32:
+		{
+			struct timeval32 tv;
+			clock_t ticks;
+
+			ticks = drv_hztousec(d->bd_rtout);
+			tv.tv_sec = ticks / 1000000;
+			tv.tv_usec = ticks - (tv.tv_sec * 1000000);
+			error = copyout(&tv, (void *)addr, sizeof (tv));
+			break;
+		}
+
+	/*
+	 * Get a list of supported device parameters.
+	 */
+	case BIOCGDLTLIST32:
+		if (d->bd_bif == 0) {
+			error = EINVAL;
+		} else {
+			struct bpf_dltlist32 lst32;
+			struct bpf_dltlist list;
+
+			if (copyin((void *)addr, &lst32, sizeof (lst32)) != 0) {
+				error = EFAULT;
+				break;
+			}
+
+			list.bfl_len = lst32.bfl_len;
+			list.bfl_list = (void *)(uint64_t)lst32.bfl_list;
+			error = bpf_getdltlist(d, &list);
+			if (error == 0) {
+				lst32.bfl_len = list.bfl_len;
+
+				if (copyout(&lst32, (void *)addr,
+				    sizeof (lst32)) != 0)
+					error = EFAULT;
+			}
+		}
+		break;
+
+	/*
+	 * Set link layer read filter.
+	 */
+	case BIOCSETF32: {
+		struct bpf_program32 prog32;
+
+		if (ddi_copyin((void *)addr, &prog32, sizeof (prog), mode)) {
+			error = EFAULT;
+			break;
+		}
+		prog.bf_len = prog32.bf_len;
+		prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
+		error = bpf_setf(d, &prog);
+		break;
+	}
+#endif
+
+	/*
+	 * Set read timeout.
+	 */
+	case BIOCSRTIMEOUT:
+		{
+			struct timeval tv;
+
+			if (copyin((void *)addr, &tv, sizeof (tv)) != 0) {
+				error = EFAULT;
+				break;
+			}
+
+			/* Convert the timeout in microseconds to ticks */
+			d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 +
+			    tv.tv_usec);
+			if ((d->bd_rtout == 0) && (tv.tv_usec != 0))
+				d->bd_rtout = 1;
+			break;
+		}
+
+	/*
+	 * Get read timeout.
+	 */
+	case BIOCGRTIMEOUT:
+		{
+			struct timeval tv;
+			clock_t ticks;
+
+			ticks = drv_hztousec(d->bd_rtout);
+			tv.tv_sec = ticks / 1000000;
+			tv.tv_usec = ticks - (tv.tv_sec * 1000000);
+			if (copyout(&tv, (void *)addr, sizeof (tv)) != 0)
+				error = EFAULT;
+			break;
+		}
+
+	/*
+	 * Get packet stats.
+	 */
+	case BIOCGSTATS:
+		{
+			struct bpf_stat bs;
+
+			bs.bs_recv = d->bd_rcount;
+			bs.bs_drop = d->bd_dcount;
+			bs.bs_capt = d->bd_ccount;
+			if (copyout(&bs, (void *)addr, sizeof (bs)) != 0)
+				error = EFAULT;
+			break;
+		}
+
+	/*
+	 * Set immediate mode.
+	 */
+	case BIOCIMMEDIATE:
+		if (copyin((void *)addr, &d->bd_immediate,
+		    sizeof (d->bd_immediate)) != 0)
+			error = EFAULT;
+		break;
+
+	case BIOCVERSION:
+		{
+			struct bpf_version bv;
+
+			bv.bv_major = BPF_MAJOR_VERSION;
+			bv.bv_minor = BPF_MINOR_VERSION;
+			if (copyout(&bv, (void *)addr, sizeof (bv)) != 0)
+				error = EFAULT;
+			break;
+		}
+
+	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
+		if (copyout(&d->bd_hdrcmplt, (void *)addr,
+		    sizeof (d->bd_hdrcmplt)) != 0)
+			error = EFAULT;
+		break;
+
+	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
+		if (copyin((void *)addr, &d->bd_hdrcmplt,
+		    sizeof (d->bd_hdrcmplt)) != 0)
+			error = EFAULT;
+		break;
+
+	/*
+	 * Get "see sent packets" flag
+	 */
+	case BIOCGSEESENT:
+		if (copyout(&d->bd_seesent, (void *)addr,
+		    sizeof (d->bd_seesent)) != 0)
+			error = EFAULT;
+		break;
+
+	/*
+	 * Set "see sent" packets flag
+	 */
+	case BIOCSSEESENT:
+		if (copyin((void *)addr, &d->bd_seesent,
+		    sizeof (d->bd_seesent)) != 0)
+			error = EFAULT;
+		break;
+
+	case FIONBIO:		/* Non-blocking I/O */
+		if (copyin((void *)addr, &d->bd_nonblock,
+		    sizeof (d->bd_nonblock)) != 0)
+			error = EFAULT;
+		break;
+	}
+	return (error);
+}
+
+/*
+ * Set d's packet filter program to fp.  If this file already has a filter,
+ * free it and replace it. If the new filter is "empty" (has a 0 size), then
+ * the result is to just remove and free the existing filter.
+ * Returns EINVAL for bogus requests.
+ */
+int
+bpf_setf(struct bpf_d *d, struct bpf_program *fp)
+{
+	struct bpf_insn *fcode, *old;
+	uint_t flen, size;
+	size_t oldsize;
+
+	if (fp->bf_insns == 0) {
+		if (fp->bf_len != 0)
+			return (EINVAL);
+		mutex_enter(&d->bd_lock);
+		old = d->bd_filter;
+		oldsize = d->bd_filter_size;
+		d->bd_filter = 0;
+		d->bd_filter_size = 0;
+		reset_d(d);
+		mutex_exit(&d->bd_lock);
+		if (old != 0)
+			kmem_free(old, oldsize);
+		return (0);
+	}
+	flen = fp->bf_len;
+	if (flen > BPF_MAXINSNS)
+		return (EINVAL);
+
+	size = flen * sizeof (*fp->bf_insns);
+	fcode = kmem_alloc(size, KM_SLEEP);
+	if (copyin(fp->bf_insns, fcode, size) != 0)
+		return (EFAULT);
+
+	if (bpf_validate(fcode, (int)flen)) {
+		mutex_enter(&d->bd_lock);
+		old = d->bd_filter;
+		oldsize = d->bd_filter_size;
+		d->bd_filter = fcode;
+		d->bd_filter_size = size;
+		reset_d(d);
+		mutex_exit(&d->bd_lock);
+		if (old != 0)
+			kmem_free(old, oldsize);
+
+		return (0);
+	}
+	kmem_free(fcode, size);
+	return (EINVAL);
+}
+
+/*
+ * Detach a file from its current interface (if attached at all) and attach
+ * to the interface indicated by the name stored in ifr.
+ * Return an errno or 0.
+ */
+static int
+bpf_setif(struct bpf_d *d, char *ifname, int namesize)
+{
+	struct bpf_if *bp;
+	int unit_seen;
+	char *cp;
+	int i;
+
+	/*
+	 * Make sure the provided name has a unit number, and default
+	 * it to '0' if not specified.
+	 * XXX This is ugly ... do this differently?
+	 */
+	unit_seen = 0;
+	cp = ifname;
+	cp[namesize - 1] = '\0';	/* sanity */
+	while (*cp++)
+		if (*cp >= '0' && *cp <= '9')
+			unit_seen = 1;
+	if (!unit_seen) {
+		/* Make sure to leave room for the '\0'. */
+		for (i = 0; i < (namesize - 1); ++i) {
+			if ((ifname[i] >= 'a' && ifname[i] <= 'z') ||
+			    (ifname[i] >= 'A' && ifname[i] <= 'Z'))
+				continue;
+			ifname[i] = '0';
+		}
+	}
+
+	/*
+	 * Make sure that only one call to this function happens at a time
+	 * and that we're not interleaving a read/write
+	 */
+	mutex_enter(&d->bd_lock);
+	while (d->bd_inuse != 0) {
+		d->bd_waiting++;
+		if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
+			d->bd_waiting--;
+			mutex_exit(&d->bd_lock);
+			return (EINTR);
+		}
+		d->bd_waiting--;
+	}
+	d->bd_inuse = -1;
+	mutex_exit(&d->bd_lock);
+
+	/*
+	 * Look through attached interfaces for the named one.
+	 *
+	 * The search is done twice - once
+	 */
+	mutex_enter(&bpf_mtx);
+
+	bp = bpf_findif(d, ifname, -1);
+
+	if (bp != NULL) {
+		int error = 0;
+
+		if (d->bd_sbuf == 0)
+			error = bpf_allocbufs(d);
+
+		/*
+		 * We found the requested interface.
+		 * If we're already attached to requested interface,
+		 * just flush the buffer.
+		 */
+		mutex_enter(&d->bd_lock);
+		if (error == 0 && bp != d->bd_bif) {
+			if (d->bd_bif)
+				/*
+				 * Detach if attached to something else.
+				 */
+				bpf_detachd(d);
+
+			bpf_attachd(d, bp);
+		}
+		reset_d(d);
+		d->bd_inuse = 0;
+		if (d->bd_waiting != 0)
+			cv_signal(&d->bd_wait);
+		mutex_exit(&d->bd_lock);
+		mutex_exit(&bpf_mtx);
+		return (error);
+	}
+
+	mutex_enter(&d->bd_lock);
+	d->bd_inuse = 0;
+	if (d->bd_waiting != 0)
+		cv_signal(&d->bd_wait);
+	mutex_exit(&d->bd_lock);
+	mutex_exit(&bpf_mtx);
+
+	/*
+	 * Try tickle the mac layer into attaching the device...
+	 */
+	return (bpf_provider_tickle(ifname, d->bd_zone));
+}
+
+/*
+ * Copy the interface name to the ifreq.
+ */
+static int
+bpf_ifname(struct bpf_d *d, char *buffer, int bufsize)
+{
+	struct bpf_if *bp;
+
+	mutex_enter(&d->bd_lock);
+	bp = d->bd_bif;
+	if (bp == NULL) {
+		mutex_exit(&d->bd_lock);
+		return (EINVAL);
+	}
+
+	(void) strlcpy(buffer, bp->bif_ifname, bufsize);
+	mutex_exit(&d->bd_lock);
+
+	return (0);
+}
+
+/*
+ * Support for poll() system call
+ *
+ * Return true iff the specific operation will not block indefinitely - with
+ * the assumption that it is safe to positively acknowledge a request for the
+ * ability to write to the BPF device.
+ * Otherwise, return false but make a note that a selnotify() must be done.
+ */
+int
+bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp,
+    struct pollhead **phpp)
+{
+	struct bpf_d *d = bpf_dev_get(getminor(dev));
+
+	if (events & (POLLIN | POLLRDNORM)) {
+		/*
+		 * An imitation of the FIONREAD ioctl code.
+		 */
+		mutex_enter(&d->bd_lock);
+		if (d->bd_hlen != 0 ||
+		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
+		    d->bd_slen != 0)) {
+			*reventsp |= events & (POLLIN | POLLRDNORM);
+		} else {
+			*reventsp = 0;
+			if (!anyyet)
+				*phpp = &d->bd_poll;
+			/* Start the read timeout if necessary */
+			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
+				bpf_clear_timeout(d);
+				/*
+				 * Only allow the timeout to be set once.
+				 */
+				if (d->bd_callout == 0)
+					d->bd_callout = timeout(bpf_timed_out,
+					    d, d->bd_rtout);
+				d->bd_state = BPF_WAITING;
+			}
+		}
+		mutex_exit(&d->bd_lock);
+	}
+
+	return (0);
+}
+
+/*
+ * Copy data from an mblk_t chain into a buffer. This works for ipnet
+ * because the dl_ipnetinfo_t is placed in an mblk_t that leads the
+ * packet itself.
+ */
+static void *
+bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
+{
+	const mblk_t *m;
+	uint_t count;
+	uchar_t *dst;
+
+	m = src_arg;
+	dst = dst_arg;
+	while (len > 0) {
+		if (m == NULL)
+			panic("bpf_mcpy");
+		count = (uint_t)min(M_LEN(m), len);
+		(void) memcpy(dst, mtod(m, const void *), count);
+		m = m->b_cont;
+		dst += count;
+		len -= count;
+	}
+	return (dst_arg);
+}
+
+/*
+ * Dispatch a packet to all the listeners on interface bp.
+ *
+ * marg    pointer to the packet, either a data buffer or an mbuf chain
+ * buflen  buffer length, if marg is a data buffer
+ * cpfn    a function that can copy marg into the listener's buffer
+ * pktlen  length of the packet
+ * issent  boolean indicating whether the packet was sent or receive
+ */
+static inline void
+bpf_deliver(struct bpf_d *d, cp_fn_t cpfn, void *marg, uint_t pktlen,
+    uint_t buflen, boolean_t issent)
+{
+	struct timeval tv;
+	uint_t slen;
+
+	if (!d->bd_seesent && issent)
+		return;
+
+	/*
+	 * Accuracy of the packet counters in BPF is vital so it
+	 * is important to protect even the outer ones.
+	 */
+	mutex_enter(&d->bd_lock);
+	slen = bpf_filter(d->bd_filter, marg, pktlen, buflen);
+	DTRACE_PROBE5(bpf__packet, struct bpf_if *, d->bd_bif,
+	    struct bpf_d *, d, void *, marg, uint_t, pktlen, uint_t, slen);
+	d->bd_rcount++;
+	ks_stats.kp_receive.value.ui64++;
+	if (slen != 0) {
+		uniqtime(&tv);
+		catchpacket(d, marg, pktlen, slen, cpfn, &tv);
+	}
+	mutex_exit(&d->bd_lock);
+}
+
+/*
+ * Incoming linkage from device drivers.
+ */
+/* ARGSUSED */
+void
+bpf_mtap(void *arg, mac_resource_handle_t mrh, mblk_t *m, boolean_t issent)
+{
+	cp_fn_t cpfn;
+	struct bpf_d *d = arg;
+	uint_t pktlen, buflen;
+	void *marg;
+
+	pktlen = msgdsize(m);
+
+	if (pktlen == M_LEN(m)) {
+		cpfn = (cp_fn_t)memcpy;
+		marg = mtod(m, void *);
+		buflen = pktlen;
+	} else {
+		cpfn = bpf_mcpy;
+		marg = m;
+		buflen = 0;
+	}
+
+	bpf_deliver(d, cpfn, marg, pktlen, buflen, issent);
+}
+
+/*
+ * Incoming linkage from ipnet.
+ * In ipnet, there is only one event, NH_OBSERVE, that delivers packets
+ * from all network interfaces. Thus the tap function needs to apply a
+ * filter using the interface index/id to immitate snoop'ing on just the
+ * specified interface.
+ */
+/* ARGSUSED */
+void
+bpf_itap(void *arg, mblk_t *m, boolean_t issent, uint_t length)
+{
+	hook_pkt_observe_t *hdr;
+	struct bpf_d *d = arg;
+
+	hdr = (hook_pkt_observe_t *)m->b_rptr;
+	if (ntohl(hdr->hpo_ifindex) != d->bd_bif->bif_linkid)
+		return;
+	bpf_deliver(d, bpf_mcpy, m, length, 0, issent);
+
+}
+
+/*
+ * Move the packet data from interface memory (pkt) into the
+ * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
+ * otherwise 0.  "copy" is the routine called to do the actual data
+ * transfer.  memcpy is passed in to copy contiguous chunks, while
+ * bpf_mcpy is passed in to copy mbuf chains.  In the latter case,
+ * pkt is really an mbuf.
+ */
+static void
+catchpacket(struct bpf_d *d, uchar_t *pkt, uint_t pktlen, uint_t snaplen,
+    cp_fn_t cpfn, struct timeval *tv)
+{
+	struct bpf_hdr *hp;
+	int totlen, curlen;
+	int hdrlen = d->bd_bif->bif_hdrlen;
+	int do_wakeup = 0;
+
+	++d->bd_ccount;
+	ks_stats.kp_capture.value.ui64++;
+	/*
+	 * Figure out how many bytes to move.  If the packet is
+	 * greater or equal to the snapshot length, transfer that
+	 * much.  Otherwise, transfer the whole packet (unless
+	 * we hit the buffer size limit).
+	 */
+	totlen = hdrlen + min(snaplen, pktlen);
+	if (totlen > d->bd_bufsize)
+		totlen = d->bd_bufsize;
+
+	/*
+	 * Round up the end of the previous packet to the next longword.
+	 */
+	curlen = BPF_WORDALIGN(d->bd_slen);
+	if (curlen + totlen > d->bd_bufsize) {
+		/*
+		 * This packet will overflow the storage buffer.
+		 * Rotate the buffers if we can, then wakeup any
+		 * pending reads.
+		 */
+		if (d->bd_fbuf == 0) {
+			/*
+			 * We haven't completed the previous read yet,
+			 * so drop the packet.
+			 */
+			++d->bd_dcount;
+			ks_stats.kp_dropped.value.ui64++;
+			return;
+		}
+		ROTATE_BUFFERS(d);
+		do_wakeup = 1;
+		curlen = 0;
+	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
+		/*
+		 * Immediate mode is set, or the read timeout has
+		 * already expired during a select call.  A packet
+		 * arrived, so the reader should be woken up.
+		 */
+		do_wakeup = 1;
+	}
+
+	/*
+	 * Append the bpf header to the existing buffer before we add
+	 * on the actual packet data.
+	 */
+	hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen);
+	hp->bh_tstamp.tv_sec = tv->tv_sec;
+	hp->bh_tstamp.tv_usec = tv->tv_usec;
+	hp->bh_datalen = pktlen;
+	hp->bh_hdrlen = (uint16_t)hdrlen;
+	/*
+	 * Copy the packet data into the store buffer and update its length.
+	 */
+	(*cpfn)((uchar_t *)hp + hdrlen, pkt,
+	    (hp->bh_caplen = totlen - hdrlen));
+	d->bd_slen = curlen + totlen;
+
+	/*
+	 * Call bpf_wakeup after bd_slen has been updated.
+	 */
+	if (do_wakeup)
+		bpf_wakeup(d);
+}
+
+/*
+ * Initialize all nonzero fields of a descriptor.
+ */
+static int
+bpf_allocbufs(struct bpf_d *d)
+{
+
+	d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
+	if (!d->bd_fbuf)
+		return (ENOBUFS);
+	d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
+	if (!d->bd_sbuf) {
+		kmem_free(d->bd_fbuf, d->bd_bufsize);
+		return (ENOBUFS);
+	}
+	d->bd_slen = 0;
+	d->bd_hlen = 0;
+	return (0);
+}
+
+/*
+ * Free buffers currently in use by a descriptor.
+ * Called on close.
+ */
+static void
+bpf_freed(struct bpf_d *d)
+{
+	/*
+	 * At this point the descriptor has been detached from its
+	 * interface and it yet hasn't been marked free.
+	 */
+	if (d->bd_sbuf != 0) {
+		kmem_free(d->bd_sbuf, d->bd_bufsize);
+		if (d->bd_hbuf != 0)
+			kmem_free(d->bd_hbuf, d->bd_bufsize);
+		if (d->bd_fbuf != 0)
+			kmem_free(d->bd_fbuf, d->bd_bufsize);
+	}
+	if (d->bd_filter)
+		kmem_free(d->bd_filter, d->bd_filter_size);
+}
+
+/*
+ * Attach additional dlt for a interface to bpf.
+ * dlt is the link layer type.
+ *
+ * The zoneid is passed in explicitly to prevent the need to
+ * do a lookup in dls using the linkid. Such a lookup would need
+ * to use the same hash table that gets used for walking when
+ * dls_set_bpfattach() is called.
+ */
+void
+bpfattach(uintptr_t ifp, int dlt, zoneid_t zoneid, int provider)
+{
+	bpf_provider_t *bpr;
+	struct bpf_if *bp;
+	uintptr_t client;
+	int hdrlen;
+
+	bpr = bpf_find_provider_by_id(provider);
+	if (bpr == NULL) {
+		if (bpf_debug)
+			cmn_err(CE_WARN, "bpfattach: unknown provider %d",
+			    provider);
+		return;
+	}
+
+	bp = kmem_zalloc(sizeof (*bp), KM_NOSLEEP);
+	if (bp == NULL) {
+		if (bpf_debug)
+			cmn_err(CE_WARN, "bpfattach: no memory for bpf_if");
+		return;
+	}
+	bp->bif_mac = *bpr;
+
+	/*
+	 * To get the user-visible name, it is necessary to get the mac
+	 * client name of an interface and for this, we need to do the
+	 * mac_client_open. Leaving it open is undesirable because it
+	 * creates an open reference that is hard to see from outside
+	 * of bpf, potentially leading to data structures not being
+	 * cleaned up when they should.
+	 */
+	if (MBPF_CLIENT_OPEN(&bp->bif_mac, ifp, &client) != 0) {
+		if (bpf_debug)
+			cmn_err(CE_WARN,
+			    "bpfattach: mac_client_open fail for %s",
+			    MBPF_NAME(&bp->bif_mac, ifp));
+		kmem_free(bp, sizeof (*bp));
+		return;
+	}
+	(void) strlcpy(bp->bif_ifname, MBPF_CLIENT_NAME(&bp->bif_mac, client),
+	    sizeof (bp->bif_ifname));
+	MBPF_CLIENT_CLOSE(&bp->bif_mac, client);
+
+	bp->bif_ifp = ifp;
+	bp->bif_dlt = bpf_dl_to_dlt(dlt);
+	bp->bif_zoneid = zoneid;
+	LIST_INIT(&bp->bif_dlist);
+
+	/*
+	 * Compute the length of the bpf header.  This is not necessarily
+	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
+	 * that the network layer header begins on a longword boundary (for
+	 * performance reasons and to alleviate alignment restrictions).
+	 */
+	hdrlen = bpf_dl_hdrsize(dlt);
+	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
+
+	if (MBPF_GET_LINKID(&bp->bif_mac, MBPF_NAME(&bp->bif_mac, ifp),
+	    &bp->bif_linkid, zoneid) != 0) {
+		if (bpf_debug) {
+			cmn_err(CE_WARN,
+			    "bpfattach: linkid resolution fail for %s/%s",
+			    MBPF_NAME(&bp->bif_mac, ifp), bp->bif_ifname);
+		}
+		kmem_free(bp, sizeof (*bp));
+		return;
+	}
+	mutex_init(&bp->bif_lock, NULL, MUTEX_DRIVER, NULL);
+
+	bpf_debug_nic_action("attached to", bp);
+
+	mutex_enter(&bpf_mtx);
+	TAILQ_INSERT_TAIL(&bpf_iflist, bp, bif_next);
+	mutex_exit(&bpf_mtx);
+}
+
+/*
+ * Remove an interface from bpf.
+ */
+void
+bpfdetach(uintptr_t ifp)
+{
+	struct bpf_if *bp;
+	struct bpf_d *d;
+	int removed = 0;
+
+	mutex_enter(&bpf_mtx);
+	/*
+	 * Loop through all of the known descriptors to find any that are
+	 * using the interface that wants to be detached.
+	 */
+	LIST_FOREACH(d, &bpf_list, bd_list) {
+		mutex_enter(&d->bd_lock);
+		bp = d->bd_bif;
+		if (bp != NULL && bp->bif_ifp == ifp) {
+			/*
+			 * Detach the descriptor from an interface now.
+			 * It will be free'ed later by close routine.
+			 */
+			bpf_detachd(d);
+		}
+		mutex_exit(&d->bd_lock);
+	}
+
+again:
+	TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
+		if (bp->bif_ifp == ifp) {
+			TAILQ_REMOVE(&bpf_iflist, bp, bif_next);
+			bpf_debug_nic_action("detached from", bp);
+			while (bp->bif_inuse != 0)
+				cv_wait(&bpf_dlt_waiter, &bpf_mtx);
+			kmem_free(bp, sizeof (*bp));
+			removed++;
+			goto again;
+		}
+	}
+	mutex_exit(&bpf_mtx);
+
+	ASSERT(removed > 0);
+}
+
+/*
+ * Get a list of available data link type of the interface.
+ */
+static int
+bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *listp)
+{
+	char ifname[LIFNAMSIZ+1];
+	struct bpf_if *bp;
+	uintptr_t ifp;
+	int n, error;
+
+	mutex_enter(&bpf_mtx);
+	ifp = d->bd_bif->bif_ifp;
+	(void) strlcpy(ifname, MBPF_NAME(&d->bd_bif->bif_mac, ifp),
+	    sizeof (ifname));
+	n = 0;
+	error = 0;
+	TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
+		if (strcmp(bp->bif_ifname, ifname) != 0)
+			continue;
+		if (d->bd_zone != GLOBAL_ZONEID &&
+		    d->bd_zone != bp->bif_zoneid)
+			continue;
+		if (listp->bfl_list != NULL) {
+			if (n >= listp->bfl_len)
+				return (ENOMEM);
+			/*
+			 * Bumping of bif_inuse ensures the structure does not
+			 * disappear while the copyout runs and allows the for
+			 * loop to be continued.
+			 */
+			bp->bif_inuse++;
+			mutex_exit(&bpf_mtx);
+			if (copyout(&bp->bif_dlt,
+			    listp->bfl_list + n, sizeof (uint_t)) != 0)
+				error = EFAULT;
+			mutex_enter(&bpf_mtx);
+			bp->bif_inuse--;
+		}
+		n++;
+	}
+	cv_signal(&bpf_dlt_waiter);
+	mutex_exit(&bpf_mtx);
+	listp->bfl_len = n;
+	return (error);
+}
+
+/*
+ * Set the data link type of a BPF instance.
+ */
+static int
+bpf_setdlt(struct bpf_d *d, void *addr)
+{
+	char ifname[LIFNAMSIZ+1];
+	struct bpf_if *bp;
+	int error;
+	int dlt;
+
+	if (copyin(addr, &dlt, sizeof (dlt)) != 0)
+		return (EFAULT);
+	/*
+	 * The established order is get bpf_mtx before bd_lock, even
+	 * though bpf_mtx is not needed until the loop...
+	 */
+	mutex_enter(&bpf_mtx);
+	mutex_enter(&d->bd_lock);
+
+	if (d->bd_bif == 0) {			/* Interface not set */
+		mutex_exit(&d->bd_lock);
+		mutex_exit(&bpf_mtx);
+		return (EINVAL);
+	}
+	if (d->bd_bif->bif_dlt == dlt) {	/* NULL-op */
+		mutex_exit(&d->bd_lock);
+		mutex_exit(&bpf_mtx);
+		return (0);
+	}
+
+	/*
+	 * See the matrix at the top of the file for the permissions table
+	 * enforced by this driver.
+	 */
+	if ((d->bd_zone != GLOBAL_ZONEID) && (dlt != DLT_IPNET) &&
+	    (d->bd_bif->bif_zoneid != d->bd_zone)) {
+		mutex_exit(&d->bd_lock);
+		mutex_exit(&bpf_mtx);
+		return (EINVAL);
+	}
+
+	(void) strlcpy(ifname,
+	    MBPF_NAME(&d->bd_bif->bif_mac, d->bd_bif->bif_ifp),
+	    sizeof (ifname));
+
+	bp = bpf_findif(d, ifname, dlt);
+
+	mutex_exit(&bpf_mtx);
+	/*
+	 * Now only bd_lock is held.
+	 *
+	 * If there was no matching interface that supports the requested
+	 * DLT, return an error and leave the current binding alone.
+	 */
+	if (bp == NULL) {
+		mutex_exit(&d->bd_lock);
+		return (EINVAL);
+	}
+
+	error = 0;
+	bpf_detachd(d);
+	bpf_attachd(d, bp);
+	reset_d(d);
+
+	mutex_exit(&d->bd_lock);
+	return (error);
+}
+
+/*
+ * bpf_clear_timeout is called with the bd_lock mutex held, providing it
+ * with the necessary protection to retrieve and modify bd_callout but it
+ * does not hold the lock for its entire duration... see below...
+ */
+static void
+bpf_clear_timeout(struct bpf_d *d)
+{
+	timeout_id_t tid = d->bd_callout;
+	d->bd_callout = 0;
+	d->bd_inuse++;
+
+	/*
+	 * If the timeout has fired and is waiting on bd_lock, we could
+	 * deadlock here because untimeout if bd_lock is held and would
+	 * wait for bpf_timed_out to finish and it never would.
+	 */
+	if (tid != 0) {
+		mutex_exit(&d->bd_lock);
+		(void) untimeout(tid);
+		mutex_enter(&d->bd_lock);
+	}
+
+	d->bd_inuse--;
+}
+
+/*
+ * As a cloning device driver, BPF needs to keep track of which device
+ * numbers are in use and which ones are not. A hash table, indexed by
+ * the minor device number, is used to store the pointers to the
+ * individual descriptors that are allocated in bpfopen().
+ * The functions below present the interface for that hash table to
+ * the rest of the driver.
+ */
+static struct bpf_d *
+bpf_dev_find(minor_t minor)
+{
+	struct bpf_d *d = NULL;
+
+	(void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor,
+	    (mod_hash_val_t *)&d);
+
+	return (d);
+}
+
+static void
+bpf_dev_add(struct bpf_d *d)
+{
+	(void) mod_hash_insert(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev,
+	    (mod_hash_val_t)d);
+}
+
+static void
+bpf_dev_remove(struct bpf_d *d)
+{
+	struct bpf_d *stor;
+
+	(void) mod_hash_remove(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev,
+	    (mod_hash_val_t *)&stor);
+	ASSERT(stor == d);
+}
+
+/*
+ * bpf_def_get should only ever be called for a minor number that exists,
+ * thus there should always be a pointer in the hash table that corresponds
+ * to it.
+ */
+static struct bpf_d *
+bpf_dev_get(minor_t minor)
+{
+	struct bpf_d *d = NULL;
+
+	(void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor,
+	    (mod_hash_val_t *)&d);
+	ASSERT(d != NULL);
+
+	return (d);
+}
+
+static void
+bpf_debug_nic_action(char *txt, struct bpf_if *bp)
+{
+	if (bpf_debug) {
+		cmn_err(CE_CONT, "%s %s %s/%d/%d/%d\n", bp->bif_ifname, txt,
+		    MBPF_NAME(&bp->bif_mac, bp->bif_ifp), bp->bif_linkid,
+		    bp->bif_zoneid, bp->bif_dlt);
+	}
+}
+
+/*
+ * Finding a BPF network interface is a two pass job.
+ * In the first pass, the best possible match is made on zone, DLT and
+ * interface name.
+ * In the second pass, we allow global zone snoopers to attach to interfaces
+ * that are reserved for other zones.
+ * This ensures that the global zone will always see its own interfaces first
+ * before attaching to those that belong to a shared IP instance zone.
+ */
+static struct bpf_if *
+bpf_findif(struct bpf_d *d, char *ifname, int dlt)
+{
+	struct bpf_if *bp;
+
+	TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
+		if ((bp->bif_ifp == 0) ||
+		    (strcmp(ifname, bp->bif_ifname) != 0))
+			continue;
+
+		if (bp->bif_zoneid != d->bd_zone)
+			continue;
+
+		if ((dlt != -1) && (dlt != bp->bif_dlt))
+			continue;
+
+		return (bp);
+	}
+
+	if (d->bd_zone == GLOBAL_ZONEID) {
+		TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
+			if ((bp->bif_ifp == 0) ||
+			    (strcmp(ifname, bp->bif_ifname) != 0))
+				continue;
+
+			if ((dlt != -1) && (dlt != bp->bif_dlt))
+				continue;
+			return (bp);
+		}
+	}
+
+	return (NULL);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/bpf.conf	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,26 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#
+name="bpf" parent="pseudo" instance=0;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/bpf_dlt.c	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/dlpi.h>
+#include <net/if.h>
+#include <net/dlt.h>
+
+/*
+ * This table provides a mapping of the DLPI data link types used in
+ * Solaris to the BPF data link types. Providing this translation in
+ * the kernel allows libpcap to be downloaded and used without any
+ * need for change.
+ *
+ * Note that this table is not necessarily sorted.
+ */
+static uint_t dl_to_dlt[][3] = {
+	{ DL_CSMACD,	DLT_EN10MB,	14 },	/* IEEE 802.3 CSMA/CD */
+	{ DL_TPB,	DLT_NULL,	0 },	/* IEEE 802.4 Token Bus */
+	{ DL_TPR,	DLT_IEEE802,	0 },	/* IEEE 802.5 Token Ring */
+	{ DL_METRO,	DLT_NULL,	0 },	/* IEEE 802.6 Metro Net */
+	{ DL_ETHER,	DLT_EN10MB,	14 },	/* Ethernet Bus */
+	{ DL_HDLC,	DLT_C_HDLC,	0 },	/* Cisco HDLC protocol */
+	{ DL_CHAR,	DLT_NULL,	0 },	/* Character Synchr. proto */
+	{ DL_CTCA,	DLT_NULL,	0 },	/* IBM Channel-to-Channel */
+	{ DL_FDDI,	DLT_FDDI,	24 },	/* Fiber Distributed data */
+	{ DL_FC,	DLT_NULL,	0 },	/* Fibre Channel interface */
+	{ DL_ATM,	DLT_SUNATM,	0 },	/* ATM */
+	{ DL_IPATM,	DLT_ATM_CLIP,	0 },	/* ATM CLIP */
+	{ DL_X25,	DLT_NULL,	0 },	/* X.25 LAPB interface */
+	{ DL_ISDN,	DLT_NULL,	0 },	/* ISDN interface */
+	{ DL_HIPPI,	DLT_HIPPI,	0 },	/* HIPPI interface */
+	{ DL_100VG,	DLT_EN10MB,	14 },	/* 100 Based VG Ethernet */
+	{ DL_100VGTPR,	DLT_IEEE802,	0 },	/* 100 Based VG Token Ring */
+	{ DL_ETH_CSMA,	DLT_EN10MB,	14 },	/* ISO 8802/3 and Ethernet */
+	{ DL_100BT,	DLT_EN10MB,	14 },	/* 100 Base T */
+	{ DL_IB,	DLT_IPOIB,	44 },	/* Solaris IPoIB (infini.) */
+	{ DL_FRAME,	DLT_FRELAY,	0 },	/* Frame Relay LAPF */
+	{ DL_MPFRAME,	DLT_NULL,	0 },	/* Multi-protocol Frame Relay */
+	{ DL_ASYNC,	DLT_NULL,	0 },	/* Character Asynch. Protocol */
+	{ DL_IPX25,	DLT_NULL,	0 },	/* X.25 Classical IP */
+	{ DL_LOOP,	DLT_NULL,	0 },	/* software loopback */
+	{ DL_IPV4,	DLT_RAW,	0 },	/* IPv4 Tunnel Link */
+	{ DL_IPV6,	DLT_RAW,	0 },	/* IPv6 Tunnel Link */
+	{ SUNW_DL_VNI,	DLT_NULL,	0 },	/* Virtual network interface */
+	{ DL_WIFI,	DLT_IEEE802_11,	0 },	/* IEEE 802.11 */
+	{ DL_IPNET,	DLT_IPNET,	24 },	/* Solaris IP Observability */
+	{ DL_OTHER,	DLT_NULL,	0 },	/* Mediums not listed above */
+	{ 0,		0 }
+};
+
+/*
+ * Given a data link type number used with DLPI on Solaris, return
+ * the equivalent data link type number for use with BPF.
+ */
+int
+bpf_dl_to_dlt(int dl)
+{
+	int i;
+
+	for (i = 0; i < sizeof (dl_to_dlt) / sizeof (dl_to_dlt[0]); i++)
+		if (dl_to_dlt[i][0] == dl)
+			return (dl_to_dlt[i][1]);
+	return (0);
+}
+
+/*
+ * Given a DLPI data link type for Solaris, return the expected header
+ * size of the link layer.
+ */
+int
+bpf_dl_hdrsize(int dl)
+{
+	int i;
+
+	for (i = 0; i < sizeof (dl_to_dlt) / sizeof (dl_to_dlt[0]); i++)
+		if (dl_to_dlt[i][0] == dl)
+			return (dl_to_dlt[i][2]);
+	return (0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/bpf_filter.c	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,576 @@
+/*	$NetBSD: bpf_filter.c,v 1.35 2008/08/20 13:01:54 joerg Exp $	*/
+
+/*
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)bpf_filter.c	8.1 (Berkeley) 6/10/93
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/stream.h>
+#include <sys/byteorder.h>
+#include <sys/sdt.h>
+
+#define	EXTRACT_SHORT(p)	BE_IN16(p)
+#define	EXTRACT_LONG(p)		BE_IN32(p)
+
+#ifdef _KERNEL
+#define	M_LEN(_m)	((_m)->b_wptr - (_m)->b_rptr)
+#define	mtod(_a, _t)	((_t)((_a)->b_rptr))
+#define	MINDEX(len, m, k) 		\
+{ 					\
+	len = M_LEN(m); 		\
+	while (k >= len) { 		\
+		k -= len; 		\
+		m = m->b_cont; 		\
+		if (m == 0) 		\
+			return (0); 	\
+		len = M_LEN(m); 	\
+	} 				\
+}
+
+static int m_xword(mblk_t *, uint32_t, int *);
+static int m_xhalf(mblk_t *, uint32_t, int *);
+
+static int
+m_xword(mblk_t *m, uint32_t k, int *err)
+{
+	int len;
+	uchar_t *cp, *np;
+	mblk_t *m0;
+
+	*err = 1;
+	MINDEX(len, m, k);
+	cp = mtod(m, uchar_t *) + k;
+	if (len >= k + 4) {
+		*err = 0;
+		return (EXTRACT_LONG(cp));
+	}
+	m0 = m->b_cont;
+	if (m0 == 0 || M_LEN(m0) + len - k < 4) {
+		DTRACE_PROBE3(mblk_xword_fail, mblk_t *, m0, int, len, int, k);
+		return (0);
+	}
+	*err = 0;
+	np = mtod(m0, uchar_t *);
+	switch (len - k) {
+
+	case 1:
+		return ((cp[0] << 24) | (np[0] << 16) | (np[1] << 8) | np[2]);
+
+	case 2:
+		return ((cp[0] << 24) | (cp[1] << 16) | (np[0] << 8) | np[1]);
+
+	default:
+		return ((cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | np[0]);
+	}
+}
+
+static int
+m_xhalf(mblk_t *m, uint32_t k, int *err)
+{
+	int len;
+	uchar_t *cp;
+	mblk_t *m0;
+
+	*err = 1;
+	MINDEX(len, m, k);
+	cp = mtod(m, uchar_t *) + k;
+	if (len >= k + 2) {
+		*err = 0;
+		return (EXTRACT_SHORT(cp));
+	}
+	m0 = m->b_cont;
+	if (m0 == 0) {
+		DTRACE_PROBE3(mblk_xhalf_fail, mblk_t *, m0, int, len, int, k);
+		return (0);
+	}
+	*err = 0;
+	return ((cp[0] << 8) | mtod(m0, uchar_t *)[0]);
+}
+#else /* _KERNEL */
+#include <stdlib.h>
+#endif /* !_KERNEL */
+
+#include <net/bpf.h>
+
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ * When buflen is non-0, p is a pointer to a the start of the packet and the
+ * packet is only in one mblk_t.
+ * When buflen is 0, p is an mblk_t pointer.
+ */
+uint_t
+bpf_filter(struct bpf_insn *pc, uchar_t *p, uint_t wirelen, uint_t buflen)
+{
+	uint32_t A, X, k;
+	uint32_t mem[BPF_MEMWORDS];
+
+	if (pc == 0)
+		/*
+		 * No filter means accept all.
+		 */
+		return ((uint_t)-1);
+	A = 0;
+	X = 0;
+	--pc;
+	/* CONSTCOND */
+	while (1) {
+		++pc;
+		switch (pc->code) {
+
+		default:
+#ifdef _KERNEL
+			DTRACE_PROBE1(bpf_insn_unknown,
+			    struct bpf_insn *, pc);
+			return (0);
+#else
+			abort();
+#endif
+		case BPF_RET|BPF_K:
+			return ((uint_t)pc->k);
+
+		case BPF_RET|BPF_A:
+			return ((uint_t)A);
+
+		case BPF_LD|BPF_W|BPF_ABS:
+			k = pc->k;
+			if (k + sizeof (int32_t) > buflen) {
+#ifdef _KERNEL
+				int merr = 0;
+
+				if (buflen != 0)
+					return (0);
+				A = m_xword((mblk_t *)p, k, &merr);
+				if (merr != 0)
+					return (0);
+				continue;
+#else
+				return (0);
+#endif
+			}
+			A = EXTRACT_LONG(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_H|BPF_ABS:
+			k = pc->k;
+			if (k + sizeof (int16_t) > buflen) {
+#ifdef _KERNEL
+				int merr;
+
+				if (buflen != 0)
+					return (0);
+				A = m_xhalf((mblk_t *)p, k, &merr);
+				if (merr != 0)
+					return (0);
+				continue;
+#else
+				return (0);
+#endif
+			}
+			A = EXTRACT_SHORT(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_B|BPF_ABS:
+			k = pc->k;
+			if (k >= buflen) {
+#ifdef _KERNEL
+				mblk_t *m;
+				int len;
+
+				if (buflen != 0)
+					return (0);
+				m = (mblk_t *)p;
+				MINDEX(len, m, k);
+				A = mtod(m, uchar_t *)[k];
+				continue;
+#else
+				return (0);
+#endif
+			}
+			A = p[k];
+			continue;
+
+		case BPF_LD|BPF_W|BPF_LEN:
+			A = wirelen;
+			continue;
+
+		case BPF_LDX|BPF_W|BPF_LEN:
+			X = wirelen;
+			continue;
+
+		case BPF_LD|BPF_W|BPF_IND:
+			k = X + pc->k;
+			if (k + sizeof (int32_t) > buflen) {
+#ifdef _KERNEL
+				int merr = 0;
+
+				if (buflen != 0)
+					return (0);
+				A = m_xword((mblk_t *)p, k, &merr);
+				if (merr != 0)
+					return (0);
+				continue;
+#else
+				return (0);
+#endif
+			}
+			A = EXTRACT_LONG(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_H|BPF_IND:
+			k = X + pc->k;
+			if (k + sizeof (int16_t) > buflen) {
+#ifdef _KERNEL
+				int merr = 0;
+
+				if (buflen != 0)
+					return (0);
+				A = m_xhalf((mblk_t *)p, k, &merr);
+				if (merr != 0)
+					return (0);
+				continue;
+#else
+				return (0);
+#endif
+			}
+			A = EXTRACT_SHORT(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_B|BPF_IND:
+			k = X + pc->k;
+			if (k >= buflen) {
+#ifdef _KERNEL
+				mblk_t *m;
+				int len;
+
+				if (buflen != 0)
+					return (0);
+				m = (mblk_t *)p;
+				MINDEX(len, m, k);
+				A = mtod(m, uchar_t *)[k];
+				continue;
+#else
+				return (0);
+#endif
+			}
+			A = p[k];
+			continue;
+
+		case BPF_LDX|BPF_MSH|BPF_B:
+			k = pc->k;
+			if (k >= buflen) {
+#ifdef _KERNEL
+				mblk_t *m;
+				int len;
+
+				if (buflen != 0)
+					return (0);
+				m = (mblk_t *)p;
+				MINDEX(len, m, k);
+				X = (mtod(m, char *)[k] & 0xf) << 2;
+				continue;
+#else
+				return (0);
+#endif
+			}
+			X = (p[pc->k] & 0xf) << 2;
+			continue;
+
+		case BPF_LD|BPF_IMM:
+			A = pc->k;
+			continue;
+
+		case BPF_LDX|BPF_IMM:
+			X = pc->k;
+			continue;
+
+		case BPF_LD|BPF_MEM:
+			A = mem[pc->k];
+			continue;
+
+		case BPF_LDX|BPF_MEM:
+			X = mem[pc->k];
+			continue;
+
+		case BPF_ST:
+			mem[pc->k] = A;
+			continue;
+
+		case BPF_STX:
+			mem[pc->k] = X;
+			continue;
+
+		case BPF_JMP|BPF_JA:
+			pc += pc->k;
+			continue;
+
+		case BPF_JMP|BPF_JGT|BPF_K:
+			pc += (A > pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGE|BPF_K:
+			pc += (A >= pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JEQ|BPF_K:
+			pc += (A == pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JSET|BPF_K:
+			pc += (A & pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGT|BPF_X:
+			pc += (A > X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGE|BPF_X:
+			pc += (A >= X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JEQ|BPF_X:
+			pc += (A == X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JSET|BPF_X:
+			pc += (A & X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_ALU|BPF_ADD|BPF_X:
+			A += X;
+			continue;
+
+		case BPF_ALU|BPF_SUB|BPF_X:
+			A -= X;
+			continue;
+
+		case BPF_ALU|BPF_MUL|BPF_X:
+			A *= X;
+			continue;
+
+		case BPF_ALU|BPF_DIV|BPF_X:
+			if (X == 0)
+				return (0);
+			A /= X;
+			continue;
+
+		case BPF_ALU|BPF_AND|BPF_X:
+			A &= X;
+			continue;
+
+		case BPF_ALU|BPF_OR|BPF_X:
+			A |= X;
+			continue;
+
+		case BPF_ALU|BPF_LSH|BPF_X:
+			A <<= X;
+			continue;
+
+		case BPF_ALU|BPF_RSH|BPF_X:
+			A >>= X;
+			continue;
+
+		case BPF_ALU|BPF_ADD|BPF_K:
+			A += pc->k;
+			continue;
+
+		case BPF_ALU|BPF_SUB|BPF_K:
+			A -= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_MUL|BPF_K:
+			A *= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_DIV|BPF_K:
+			A /= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_AND|BPF_K:
+			A &= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_OR|BPF_K:
+			A |= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_LSH|BPF_K:
+			A <<= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_RSH|BPF_K:
+			A >>= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_NEG:
+			A = -A;
+			continue;
+
+		case BPF_MISC|BPF_TAX:
+			X = A;
+			continue;
+
+		case BPF_MISC|BPF_TXA:
+			A = X;
+			continue;
+		}
+	}
+	/* NOTREACHED */
+}
+
+#ifdef _KERNEL
+/*
+ * Return true if the 'fcode' is a valid filter program.
+ * The constraints are that each jump be forward and to a valid
+ * code, that memory accesses are within valid ranges (to the
+ * extent that this can be checked statically; loads of packet
+ * data have to be, and are, also checked at run time), and that
+ * the code terminates with either an accept or reject.
+ *
+ * The kernel needs to be able to verify an application's filter code.
+ * Otherwise, a bogus program could easily crash the system.
+ */
+int
+bpf_validate(struct bpf_insn *f, int len)
+{
+	uint_t i, from;
+	struct bpf_insn *p;
+
+	if (len < 1 || len > BPF_MAXINSNS)
+		return (0);
+
+	for (i = 0; i < len; ++i) {
+		p = &f[i];
+		DTRACE_PROBE1(bpf_valid_insn, struct bpf_insn *, p);
+		switch (BPF_CLASS(p->code)) {
+		/*
+		 * Check that memory operations use valid addresses.
+		 */
+		case BPF_LD:
+		case BPF_LDX:
+			switch (BPF_MODE(p->code)) {
+			case BPF_MEM:
+				if (p->k >= BPF_MEMWORDS)
+					return (0);
+				break;
+			case BPF_ABS:
+			case BPF_IND:
+			case BPF_MSH:
+			case BPF_IMM:
+			case BPF_LEN:
+				break;
+			default:
+				return (0);
+			}
+			break;
+		case BPF_ST:
+		case BPF_STX:
+			if (p->k >= BPF_MEMWORDS)
+				return (0);
+			break;
+		case BPF_ALU:
+			switch (BPF_OP(p->code)) {
+			case BPF_ADD:
+			case BPF_SUB:
+			case BPF_MUL:
+			case BPF_OR:
+			case BPF_AND:
+			case BPF_LSH:
+			case BPF_RSH:
+			case BPF_NEG:
+				break;
+			case BPF_DIV:
+				/*
+				 * Check for constant division by 0.
+				 */
+				if (BPF_RVAL(p->code) == BPF_K && p->k == 0)
+					return (0);
+				break;
+			default:
+				return (0);
+			}
+			break;
+		case BPF_JMP:
+			/*
+			 * Check that jumps are within the code block,
+			 * and that unconditional branches don't go
+			 * backwards as a result of an overflow.
+			 * Unconditional branches have a 32-bit offset,
+			 * so they could overflow; we check to make
+			 * sure they don't.  Conditional branches have
+			 * an 8-bit offset, and the from address is <=
+			 * BPF_MAXINSNS, and we assume that BPF_MAXINSNS
+			 * is sufficiently small that adding 255 to it
+			 * won't overflow.
+			 *
+			 * We know that len is <= BPF_MAXINSNS, and we
+			 * assume that BPF_MAXINSNS is < the maximum size
+			 * of a uint_t, so that i + 1 doesn't overflow.
+			 */
+			from = i + 1;
+			switch (BPF_OP(p->code)) {
+			case BPF_JA:
+				if (from + p->k < from || from + p->k >= len)
+					return (0);
+				break;
+			case BPF_JEQ:
+			case BPF_JGT:
+			case BPF_JGE:
+			case BPF_JSET:
+				if (from + p->jt >= len || from + p->jf >= len)
+					return (0);
+				break;
+			default:
+				return (0);
+			}
+			break;
+		case BPF_RET:
+			break;
+		case BPF_MISC:
+			break;
+		default:
+			return (0);
+		}
+	}
+
+	return (BPF_CLASS(f[len - 1].code) == BPF_RET);
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/bpf_mac.c	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,165 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/stream.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
+
+/*
+ * This file provides the link to the functions required from the mac
+ * module. It is currently in bpf, rather than mac (like ipnet_bpf)
+ * because of the mac/dls split. The bpf driver needs to know when
+ * interfaces appear and disappear and the best place for that is in
+ * dls. Unfortunately all of the other functions used here are found
+ * in the mac module, making it seem ill suited to being at home in
+ * dls. Similarly it has even less purpose being in mac as it is
+ * today.
+ */
+static int	mac_bpf_open(const char *, uintptr_t *, zoneid_t);
+static void	mac_bpf_close(uintptr_t);
+static const char *mac_bpf_name(uintptr_t);
+static int	mac_bpf_type(uintptr_t);
+static void	mac_bpf_sdu_get(uintptr_t, uint_t *);
+static int	mac_bpf_tx(uintptr_t, mblk_t *);
+static uintptr_t mac_bpf_promisc_add(uintptr_t, int, void *, uintptr_t *, int);
+static void	mac_bpf_promisc_remove(uintptr_t);
+static int	mac_bpf_client_open(uintptr_t, uintptr_t *);
+static void	mac_bpf_client_close(uintptr_t);
+static const char *mac_bpf_client_name(uintptr_t);
+static int	mac_bpf_getlinkid(const char *, datalink_id_t *, zoneid_t);
+
+bpf_provider_t bpf_mac = {
+	BPR_MAC,
+	mac_bpf_open,
+	mac_bpf_close,
+	mac_bpf_name,
+	mac_bpf_type,
+	mac_bpf_sdu_get,
+	mac_bpf_tx,
+	mac_bpf_promisc_add,
+	mac_bpf_promisc_remove,
+	mac_bpf_getlinkid,
+	mac_bpf_client_close,
+	mac_bpf_client_name,
+	mac_bpf_client_open
+};
+
+/*ARGSUSED*/
+static int
+mac_bpf_open(const char *name, uintptr_t *mhandlep, zoneid_t zoneid)
+{
+	return (mac_open(name, (mac_handle_t *)mhandlep));
+}
+
+static void
+mac_bpf_close(uintptr_t mhandle)
+{
+	mac_close((mac_handle_t)mhandle);
+}
+
+static const char *
+mac_bpf_name(uintptr_t mhandle)
+{
+	return (mac_name((mac_handle_t)mhandle));
+}
+
+static int
+mac_bpf_type(uintptr_t mhandle)
+{
+	return (mac_type((mac_handle_t)mhandle));
+}
+
+static void
+mac_bpf_sdu_get(uintptr_t mhandle, uint_t *mtup)
+{
+	mac_sdu_get((mac_handle_t)mhandle, NULL, mtup);
+}
+
+static int
+mac_bpf_tx(uintptr_t chandle, mblk_t *pkt)
+{
+	/*
+	 * If the mac layer cannot deliver a packet as requested by BPF then
+	 * simply have the mac layer drop it. BPF isn't interested in doing
+	 * any amount of retry - that's left to the application.
+	 */
+	return (mac_tx((mac_client_handle_t)chandle, pkt, 0,
+	    MAC_DROP_ON_NO_DESC, NULL));
+}
+
+static uintptr_t
+mac_bpf_promisc_add(uintptr_t chandle, int how, void *arg, uintptr_t *promisc,
+    int flags)
+{
+	return (mac_promisc_add((mac_client_handle_t)chandle, how, bpf_mtap,
+	    arg, (mac_promisc_handle_t *)promisc, flags));
+}
+
+static void
+mac_bpf_promisc_remove(uintptr_t phandle)
+{
+	mac_promisc_remove((mac_promisc_handle_t)phandle);
+}
+
+static int
+mac_bpf_client_open(uintptr_t mhandle, uintptr_t *chandlep)
+{
+	return (mac_client_open((mac_handle_t)mhandle,
+	    (mac_client_handle_t *)chandlep,  NULL,
+	    MAC_OPEN_FLAGS_USE_DATALINK_NAME));
+}
+
+static void
+mac_bpf_client_close(uintptr_t chandle)
+{
+	mac_client_close((mac_client_handle_t)chandle, 0);
+}
+
+static const char *
+mac_bpf_client_name(uintptr_t chandle)
+{
+	return (mac_client_name((mac_client_handle_t)chandle));
+}
+
+/*ARGSUSED*/
+static int
+mac_bpf_getlinkid(const char *name, datalink_id_t *idp, zoneid_t zoneid)
+{
+	int error;
+
+	/*
+	 * If at first we don't succeed, try again, just in case it is in
+	 * hiding. The first call requires the datalink management daemon
+	 * (the authorative source of information about name to id mapping)
+	 * to be present and answering upcalls, the seond does not.
+	 */
+	error = dls_mgmt_get_linkid(name, idp);
+	if (error != 0)
+		error = dls_devnet_macname2linkid(name, idp);
+
+	return (error);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/bpf_mod.c	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,443 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/buf.h>
+#include <sys/modctl.h>
+#include <sys/open.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/sunddi.h>
+#include <sys/mac_provider.h>
+#include <sys/dls_impl.h>
+#include <inet/ipnet.h>
+
+extern	int	bpfopen(dev_t *devp, int flag, int otyp, cred_t *cred);
+extern	int	bpfclose(dev_t dev, int flag, int otyp, cred_t *cred);
+extern	int	bpfread(dev_t dev, struct uio *uio_p, cred_t *cred_p);
+extern	int	bpfwrite(dev_t dev, struct uio *uio, cred_t *cred);
+extern	int	bpfchpoll(dev_t, short, int, short *, struct pollhead **);
+extern	int	bpfioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+extern	int	bpfilterattach(void);
+extern	int	bpfilterdetach(void);
+
+extern	bpf_provider_t	bpf_mac;
+extern	bpf_provider_t	bpf_ipnet;
+
+static	int	bpf_attach(dev_info_t *, ddi_attach_cmd_t);
+static	void	*bpf_create_inst(const netid_t);
+static	void	bpf_destroy_inst(const netid_t, void *);
+static	int	bpf_detach(dev_info_t *, ddi_detach_cmd_t);
+static	int	bpf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static	int	bpf_provider_add(bpf_provider_t *);
+static	int	bpf_provider_remove(bpf_provider_t *);
+static	void	bpf_shutdown_inst(const netid_t, void *);
+
+extern	void	bpfdetach(uintptr_t);
+extern	int	bpf_bufsize;
+extern	int	bpf_maxbufsize;
+
+static LIST_HEAD(, bpf_provider_list) bpf_providers;
+
+static struct cb_ops bpf_cb_ops = {
+	bpfopen,
+	bpfclose,
+	nodev,		/* strategy */
+	nodev,		/* print */
+	nodev,		/* dump */
+	bpfread,
+	bpfwrite,	/* write */
+	bpfioctl,	/* ioctl */
+	nodev,		/* devmap */
+	nodev,		/* mmap */
+	nodev,		/* segmap */
+	bpfchpoll,	/* poll */
+	ddi_prop_op,
+	NULL,
+	D_MTSAFE,
+	CB_REV,
+	nodev,		/* aread */
+	nodev,		/* awrite */
+};
+
+static struct dev_ops bpf_ops = {
+	DEVO_REV,
+	0,
+	bpf_getinfo,
+	nulldev,
+	nulldev,
+	bpf_attach,
+	bpf_detach,
+	nodev,		/* reset */
+	&bpf_cb_ops,
+	(struct bus_ops *)0
+};
+
+extern struct mod_ops mod_driverops;
+static struct modldrv bpfmod = {
+	&mod_driverops, "Berkely Packet Filter", &bpf_ops
+};
+static struct modlinkage modlink1 = { MODREV_1, &bpfmod, NULL };
+
+static dev_info_t *bpf_dev_info = NULL;
+static net_instance_t *bpf_inst = NULL;
+
+int
+_init()
+{
+	int bpfinst;
+
+	bpfinst = mod_install(&modlink1);
+	return (bpfinst);
+}
+
+int
+_fini(void)
+{
+	int bpfinst;
+
+	bpfinst = mod_remove(&modlink1);
+	return (bpfinst);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	int bpfinst;
+
+	bpfinst = mod_info(&modlink1, modinfop);
+	return (bpfinst);
+}
+
+static int
+bpf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+
+	switch (cmd) {
+	case DDI_ATTACH:
+		/*
+		 * Default buffer size from bpf's driver.conf file
+		 */
+		bpf_bufsize = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+		    "buf_size", 32 * 1024);
+		/*
+		 * Maximum buffer size from bpf's driver.conf file
+		 */
+		bpf_maxbufsize = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+		    "max_buf_size", 16 * 1024 * 1024);
+
+		if (ddi_create_minor_node(dip, "bpf", S_IFCHR, 0,
+		    DDI_PSEUDO, 0) == DDI_FAILURE) {
+			ddi_remove_minor_node(dip, NULL);
+			goto attach_failed;
+		}
+		bpf_dev_info = dip;
+		ddi_report_dev(dip);
+
+		LIST_INIT(&bpf_providers);
+
+		if (bpfilterattach() != 0)
+			goto attach_failed;
+
+		ASSERT(bpf_provider_add(&bpf_mac) == 0);
+		dls_set_bpfattach(bpfattach, bpfdetach);
+		ipnet_set_bpfattach(bpfattach, bpfdetach, GLOBAL_ZONEID,
+		    bpf_itap, bpf_provider_add);
+
+		/*
+		 * Set up to be notified about zones coming and going
+		 * so that proper interaction with ipnet is possible.
+		 */
+		bpf_inst = net_instance_alloc(NETINFO_VERSION);
+		if (bpf_inst == NULL)
+			goto attach_failed;
+		bpf_inst->nin_name = "bpf";
+		bpf_inst->nin_create = bpf_create_inst;
+		bpf_inst->nin_destroy = bpf_destroy_inst;
+		bpf_inst->nin_shutdown = bpf_shutdown_inst;
+		if (net_instance_register(bpf_inst) != 0) {
+			net_instance_free(bpf_inst);
+			goto attach_failed;
+		}
+
+		return (DDI_SUCCESS);
+		/* NOTREACHED */
+	case DDI_RESUME:
+		return (DDI_SUCCESS);
+		/* NOTREACHED */
+	default:
+		break;
+	}
+
+attach_failed:
+
+	/*
+	 * Use our own detach routine to toss
+	 * away any stuff we allocated above.
+	 */
+	(void) bpfilterdetach();
+	(void) bpf_detach(dip, DDI_DETACH);
+	return (DDI_FAILURE);
+}
+
+static int
+bpf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	int error;
+
+	switch (cmd) {
+	case DDI_DETACH:
+		if (net_instance_unregister(bpf_inst) != 0)
+			return (DDI_FAILURE);
+		net_instance_free(bpf_inst);
+
+		ipnet_set_bpfattach(NULL, NULL, GLOBAL_ZONEID, NULL,
+		    bpf_provider_remove);
+		/*
+		 * Whilst we don't want to be notified about new devices that
+		 * are being detached, to set the bpf detach function to NULL
+		 * introduces a race condition between this kernel module
+		 * unloading and a network interface driver also unloading.
+		 */
+		dls_set_bpfattach(NULL, bpfdetach);
+		error = bpfilterdetach();
+		if (error != 0)
+			return (DDI_FAILURE);
+		/*
+		 * Now everything is clean, set the detach to NULL too.
+		 */
+		dls_set_bpfattach(NULL, NULL);
+		ASSERT(bpf_provider_remove(&bpf_mac) == 0);
+
+		ASSERT(LIST_EMPTY(&bpf_providers));
+
+		ddi_prop_remove_all(dip);
+
+		return (DDI_SUCCESS);
+		/* NOTREACHED */
+	case DDI_SUSPEND:
+	case DDI_PM_SUSPEND:
+		return (DDI_SUCCESS);
+		/* NOTREACHED */
+	default:
+		break;
+	}
+	return (DDI_FAILURE);
+}
+
+/*ARGSUSED*/
+static int
+bpf_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+	int error = DDI_FAILURE;
+
+	switch (infocmd) {
+	case DDI_INFO_DEVT2DEVINFO:
+		*result = bpf_dev_info;
+		error = DDI_SUCCESS;
+		break;
+	case DDI_INFO_DEVT2INSTANCE:
+		*result = (void *)0;
+		error = DDI_SUCCESS;
+		break;
+	default:
+		break;
+	}
+	return (error);
+}
+
+/*
+ * The two functions below work with and manage a list of providers that
+ * supply BPF with packets. Their addition and removal is only happens
+ * when the bpf module is attaching/detaching, thus there is no race
+ * condition to guard against with using locks as the kernel module system
+ * takes care of this for us. Similarly, bpf_provider_tickle() is called
+ * from bpf_setif, which implies an open file descriptor that would get
+ * in the way of detach being active.
+ */
+static int
+bpf_provider_add(bpf_provider_t *provider)
+{
+	bpf_provider_list_t *bp;
+
+	LIST_FOREACH(bp, &bpf_providers, bpl_next) {
+		if (bp->bpl_what == provider)
+			return (EEXIST);
+	}
+
+
+	bp = kmem_alloc(sizeof (*bp), KM_SLEEP);
+	bp->bpl_what = provider;
+	LIST_INSERT_HEAD(&bpf_providers, bp, bpl_next);
+
+	return (0);
+}
+
+static int
+bpf_provider_remove(bpf_provider_t *provider)
+{
+	bpf_provider_list_t *bp;
+
+	LIST_FOREACH(bp, &bpf_providers, bpl_next) {
+		if (bp->bpl_what == provider)
+			break;
+	}
+
+	if (bp == NULL)
+		return (ESRCH);
+
+	LIST_REMOVE(bp, bpl_next);
+
+	kmem_free(bp, sizeof (*bp));
+
+	return (0);
+}
+
+/*
+ * return a pointer to the structure that holds all of the functions
+ * available to be used to support a particular packet provider.
+ */
+bpf_provider_t *
+bpf_find_provider_by_id(int who)
+{
+	bpf_provider_list_t *b;
+
+	LIST_FOREACH(b, &bpf_providers, bpl_next) {
+		if (b->bpl_what->bpr_unit == who)
+			return (b->bpl_what);
+	}
+
+	return (NULL);
+}
+
+/*
+ * This function is used by bpf_setif() to force an open() to be called on
+ * a given device name. If a device has been unloaded by the kernel, but it
+ * is still recognised, then calling this function will hopefully cause it
+ * to be loaded back into the kernel. When this function is called, it is
+ * not known which packet provider the name belongs to so all are tried.
+ */
+int
+bpf_provider_tickle(char *name, zoneid_t zone)
+{
+	bpf_provider_list_t *bp;
+	uintptr_t handle;
+	int tickled = 0;
+
+	LIST_FOREACH(bp, &bpf_providers, bpl_next) {
+		handle = 0;
+		if (bp->bpl_what->bpr_open(name, &handle, zone) == 0) {
+			bp->bpl_what->bpr_close(handle);
+			tickled++;
+		} else if (bp->bpl_what->bpr_unit == BPR_MAC) {
+			/*
+			 * For mac devices, sometimes the open/close is not
+			 * enough. In that case, further provocation is
+			 * attempted by fetching the linkid and trying to
+			 * use that as the key for open, rather than the
+			 * name.
+			 */
+			datalink_id_t id;
+
+			if (bp->bpl_what->bpr_getlinkid(name, &id,
+			    zone) == 0) {
+				if (bp->bpl_what->bpr_open(name, &handle,
+				    zone) == 0) {
+					bp->bpl_what->bpr_close(handle);
+					tickled++;
+				} else {
+					mac_handle_t mh;
+
+					if (mac_open_by_linkid(id, &mh) == 0) {
+						mac_close(mh);
+						tickled++;
+					}
+				}
+			}
+		}
+
+	}
+
+	if (tickled != 0)
+		return (EWOULDBLOCK);
+
+	return (ENXIO);
+}
+
+/*
+ * The following three functions provide the necessary callbacks into
+ * the netinfo API. This API is primarily used to trigger awareness of
+ * when a zone is being torn down, allowing BPF to drive IPNET to
+ * tell it which interfaces need to go away.
+ */
+/*ARGSUSED*/
+static void *
+bpf_create_inst(const netid_t netid)
+{
+	/*
+	 * BPF does not keep any per-instance state, its list of
+	 * interfaces is global, as is its device hash table.
+	 */
+	return ((void *)bpf_itap);
+}
+
+/*ARGSUSED*/
+static void
+bpf_shutdown_inst(const netid_t netid, void *arg)
+{
+	zoneid_t zoneid;
+
+	zoneid = net_getzoneidbynetid(netid);
+	if (zoneid != GLOBAL_ZONEID) {
+		ipnet_set_bpfattach(NULL, NULL, zoneid, NULL, NULL);
+	}
+}
+
+/*ARGSUSED*/
+static void
+bpf_destroy_inst(const netid_t netid, void *arg)
+{
+}
+
+/*
+ * This function is required, and is called from bpfopen, rather than
+ * bpf_create_inst() for the simple reason that when bpf_create_inst()
+ * is called, the zone is not fully initialised yet. This leads fo
+ * functions that map the zoneid to pointers failing (when they should
+ * not be failing) and thus the system panic'ing.
+ */
+void
+bpf_open_zone(const zoneid_t zoneid)
+{
+	ipnet_set_bpfattach(bpfattach, bpfdetach,
+	    zoneid, bpf_itap, bpf_provider_add);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/net/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# uts/common/io/bpf/net/Makefile
+#
+# include global definitions
+include ../../../../../Makefile.master
+
+HDRS=	bpf.h bpfdesc.h dlt.h
+
+ROOTDIRS=	$(ROOT)/usr/include/net
+
+ROOTHDRS=	$(HDRS:%=$(ROOT)/usr/include/net/%)
+
+$(ROOTDIRS)/%:	%
+	$(INS.file)
+
+.KEEP_STATE:
+
+install_h:	$(ROOTDIRS) $(ROOTHDRS)
+
+$(ROOTDIRS):
+	$(INS.dir)
+
+check:	$(CHECKHDRS)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/net/bpf.h	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,298 @@
+/*	$NetBSD: bpf.h,v 1.50 2009/01/13 19:10:52 christos Exp $	*/
+
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)bpf.h	8.2 (Berkeley) 1/9/95
+ * @(#) Header: bpf.h,v 1.36 97/06/12 14:29:53 leres Exp  (LBL)
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _NET_BPF_H_
+#define	_NET_BPF_H_
+
+#include <sys/time.h>
+#include <sys/types32.h>
+#include <sys/ioccom.h>
+
+/* BSD style release date */
+#define	BPF_RELEASE 199606
+
+typedef	int bpf_int32;
+typedef	uint_t bpf_uint_t32;
+typedef	uint_t bpf_u_int32;
+
+/*
+ * Alignment macros.  BPF_WORDALIGN rounds up to the next
+ * even multiple of BPF_ALIGNMENT.
+ */
+#define	BPF_ALIGNMENT sizeof (uint32_t)
+#define	BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+#define	BPF_MAXINSNS 512
+#define	BPF_DFLTBUFSIZE (1024*1024)	/* default static upper limit */
+#define	BPF_MAXBUFSIZE (1024*1024*16)	/* hard limit on sysctl'able value */
+#define	BPF_MINBUFSIZE 32
+
+/*
+ *  Structure for BIOCSETF.
+ */
+struct bpf_program {
+	uint_t bf_len;
+	struct bpf_insn *bf_insns;
+};
+struct bpf_program32 {
+	uint_t bf_len;
+	caddr32_t bf_insns;
+};
+
+/*
+ * Struct returned by BIOCGSTATS and net.bpf.stats sysctl.
+ */
+struct bpf_stat {
+	uint64_t bs_recv;	/* number of packets received */
+	uint64_t bs_drop;	/* number of packets dropped */
+	uint64_t bs_capt;	/* number of packets captured */
+	uint64_t bs_padding[13];
+};
+
+/*
+ * Struct returned by BIOCGSTATSOLD.
+ */
+struct bpf_stat_old {
+	uint_t bs_recv;		/* number of packets received */
+	uint_t bs_drop;		/* number of packets dropped */
+};
+
+/*
+ * Struct return by BIOCVERSION.  This represents the version number of
+ * the filter language described by the instruction encodings below.
+ * bpf understands a program iff kernel_major == filter_major &&
+ * kernel_minor >= filter_minor, that is, if the value returned by the
+ * running kernel has the same major number and a minor number equal
+ * equal to or less than the filter being downloaded.  Otherwise, the
+ * results are undefined, meaning an error may be returned or packets
+ * may be accepted haphazardly.
+ * It has nothing to do with the source code version.
+ */
+struct bpf_version {
+	ushort_t bv_major;
+	ushort_t bv_minor;
+};
+/* Current version number of filter architecture. */
+#define	BPF_MAJOR_VERSION 1
+#define	BPF_MINOR_VERSION 1
+
+/*
+ * BPF ioctls
+ *
+ * The first set is for compatibility with Sun's pcc style
+ * header files.  If your using gcc, we assume that you
+ * have run fixincludes so the latter set should work.
+ */
+#define	BIOCGBLEN	 _IOR('B', 102, uint_t)
+#define	BIOCSBLEN	_IOWR('B', 102, uint_t)
+#define	BIOCSETF	 _IOW('B', 103, struct bpf_program)
+#define	BIOCFLUSH	  _IO('B', 104)
+#define	BIOCPROMISC	  _IO('B', 105)
+#define	BIOCGDLT	 _IOR('B', 106, uint_t)
+#define	BIOCGETIF	 _IOR('B', 107, struct ifreq)
+#define	BIOCGETLIF	 _IOR('B', 107, struct lifreq)
+#define	BIOCSETIF	 _IOW('B', 108, struct ifreq)
+#define	BIOCSETLIF	 _IOW('B', 108, struct lifreq)
+#define	BIOCGSTATS	 _IOR('B', 111, struct bpf_stat)
+#define	BIOCGSTATSOLD	 _IOR('B', 111, struct bpf_stat_old)
+#define	BIOCIMMEDIATE	 _IOW('B', 112, uint_t)
+#define	BIOCVERSION	 _IOR('B', 113, struct bpf_version)
+#define	BIOCSTCPF	 _IOW('B', 114, struct bpf_program)
+#define	BIOCSUDPF	 _IOW('B', 115, struct bpf_program)
+#define	BIOCGHDRCMPLT	 _IOR('B', 116, uint_t)
+#define	BIOCSHDRCMPLT	 _IOW('B', 117, uint_t)
+#define	BIOCSDLT	 _IOW('B', 118, uint_t)
+#define	BIOCGDLTLIST	_IOWR('B', 119, struct bpf_dltlist)
+#define	BIOCGSEESENT	 _IOR('B', 120, uint_t)
+#define	BIOCSSEESENT	 _IOW('B', 121, uint_t)
+#define	BIOCSRTIMEOUT	 _IOW('B', 122, struct timeval)
+#define	BIOCGRTIMEOUT	 _IOR('B', 123, struct timeval)
+/*
+ */
+#define	BIOCSETF32	 _IOW('B', 103, struct bpf_program32)
+#define	BIOCGDLTLIST32	_IOWR('B', 119, struct bpf_dltlist32)
+#define	BIOCSRTIMEOUT32	 _IOW('B', 122, struct timeval32)
+#define	BIOCGRTIMEOUT32	 _IOR('B', 123, struct timeval32)
+
+/*
+ * Structure prepended to each packet. This is "wire" format, so we
+ * cannot change it unfortunately to 64 bit times on 32 bit systems [yet].
+ */
+struct bpf_timeval {
+	int32_t tv_sec;
+	int32_t tv_usec;
+};
+
+struct bpf_hdr {
+	struct bpf_timeval bh_tstamp;	/* time stamp */
+	uint32_t	bh_caplen;	/* length of captured portion */
+	uint32_t	bh_datalen;	/* original length of packet */
+	uint16_t	bh_hdrlen;	/* length of bpf header (this struct */
+					/*  plus alignment padding) */
+};
+/*
+ * Because the structure above is not a multiple of 4 bytes, some compilers
+ * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
+ * Only the kernel needs to know about it; applications use bh_hdrlen.
+ * XXX To save a few bytes on 32-bit machines, we avoid end-of-struct
+ * XXX padding by using the size of the header data elements.  This is
+ * XXX fail-safe: on new machines, we just use the 'safe' sizeof.
+ */
+#ifdef _KERNEL
+#if defined(__arm32__) || defined(__i386__) || defined(__m68k__) || \
+    defined(__mips__) || defined(__ns32k__) || defined(__vax__) || \
+    defined(__sh__) || (defined(__sparc__) && !defined(__sparc64__))
+#define	SIZEOF_BPF_HDR 18
+#else
+#define	SIZEOF_BPF_HDR sizeof (struct bpf_hdr)
+#endif
+#endif
+
+/* Pull in data-link level type codes. */
+#include <net/dlt.h>
+
+/*
+ * The instruction encodings.
+ */
+/* instruction classes */
+#define	BPF_CLASS(code) ((code) & 0x07)
+#define		BPF_LD		0x00
+#define		BPF_LDX		0x01
+#define		BPF_ST		0x02
+#define		BPF_STX		0x03
+#define		BPF_ALU		0x04
+#define		BPF_JMP		0x05
+#define		BPF_RET		0x06
+#define		BPF_MISC	0x07
+
+/* ld/ldx fields */
+#define	BPF_SIZE(code)	((code) & 0x18)
+#define		BPF_W		0x00
+#define		BPF_H		0x08
+#define		BPF_B		0x10
+#define	BPF_MODE(code)	((code) & 0xe0)
+#define		BPF_IMM 	0x00
+#define		BPF_ABS		0x20
+#define		BPF_IND		0x40
+#define		BPF_MEM		0x60
+#define		BPF_LEN		0x80
+#define		BPF_MSH		0xa0
+
+/* alu/jmp fields */
+#define	BPF_OP(code)	((code) & 0xf0)
+#define		BPF_ADD		0x00
+#define		BPF_SUB		0x10
+#define		BPF_MUL		0x20
+#define		BPF_DIV		0x30
+#define		BPF_OR		0x40
+#define		BPF_AND		0x50
+#define		BPF_LSH		0x60
+#define		BPF_RSH		0x70
+#define		BPF_NEG		0x80
+#define		BPF_JA		0x00
+#define		BPF_JEQ		0x10
+#define		BPF_JGT		0x20
+#define		BPF_JGE		0x30
+#define		BPF_JSET	0x40
+#define	BPF_SRC(code)	((code) & 0x08)
+#define		BPF_K		0x00
+#define		BPF_X		0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define	BPF_RVAL(code)	((code) & 0x18)
+#define		BPF_A		0x10
+
+/* misc */
+#define	BPF_MISCOP(code) ((code) & 0xf8)
+#define		BPF_TAX		0x00
+#define		BPF_TXA		0x80
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+	uint16_t  code;
+	uint8_t   jt;
+	uint8_t   jf;
+	uint32_t  k;
+};
+
+/*
+ * Macros for insn array initializers.
+ */
+#define	BPF_STMT(code, k) { (uint16_t)(code), 0, 0, k }
+#define	BPF_JUMP(code, k, jt, jf) { (uint16_t)(code), jt, jf, k }
+
+/*
+ * Structure to retrieve available DLTs for the interface.
+ */
+struct bpf_dltlist {
+	uint_t	bfl_len;	/* number of bfd_list array */
+	uint_t	*bfl_list;	/* array of DLTs */
+};
+struct bpf_dltlist32 {
+	uint_t	bfl_len;
+	caddr32_t bfl_list;
+};
+
+#ifdef _KERNEL
+#include <sys/mac.h>
+#include <sys/dls_impl.h>
+
+typedef void (*bpf_itap_fn_t)(void *, mblk_t *, boolean_t, uint_t);
+
+extern void	bpfattach(uintptr_t, int, zoneid_t, int);
+extern void	bpfdetach(uintptr_t);
+extern uint_t	bpf_filter(struct bpf_insn *, uchar_t *, uint_t, uint_t);
+extern void	bpf_itap(void *, mblk_t *, boolean_t, uint_t);
+extern void	bpf_mtap(void *, mac_resource_handle_t, mblk_t *, boolean_t);
+extern int	bpf_validate(struct bpf_insn *, int);
+
+#endif /* _KERNEL */
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define	BPF_MEMWORDS 16
+
+#endif /* !_NET_BPF_H_ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/net/bpfdesc.h	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,235 @@
+/*	$NetBSD: bpfdesc.h,v 1.29 2009/03/14 14:46:10 dsl Exp $	*/
+
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)bpfdesc.h	8.1 (Berkeley) 6/10/93
+ *
+ * @(#) Header: bpfdesc.h,v 1.14 96/06/16 22:28:07 leres Exp  (LBL)
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _NET_BPFDESC_H_
+#define	_NET_BPFDESC_H_
+
+#include <net/if.h>			/* for IFNAMSIZ */
+#include <sys/mutex.h>
+#include <sys/condvar.h>
+#include <sys/queue.h>
+
+/*
+ * Descriptor associated with each open bpf file.
+ */
+struct bpf_d {
+	LIST_ENTRY(bpf_d) bd_list;	/* List of bpf_d */
+	LIST_ENTRY(bpf_d) bd_next;	/* List attaced to bif_if */
+	/*
+	 * Buffer slots: two mbuf clusters buffer the incoming packets.
+	 *   The model has three slots.  Sbuf is always occupied.
+	 *   sbuf (store) - Receive interrupt puts packets here.
+	 *   hbuf (hold) - When sbuf is full, put cluster here and
+	 *		   wakeup read (replace sbuf with fbuf).
+	 *   fbuf (free) - When read is done, put cluster here.
+	 * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
+	 */
+	void *		bd_sbuf;	/* store slot */
+	void *		bd_hbuf;	/* hold slot */
+	void *		bd_fbuf;	/* free slot */
+	int 		bd_slen;	/* current length of store buffer */
+	int 		bd_hlen;	/* current length of hold buffer */
+
+	int		bd_bufsize;	/* absolute length of buffers */
+
+	struct bpf_if 	*bd_bif;	/* interface descriptor */
+	ulong_t		bd_rtout;	/* Read timeout in 'ticks' */
+	struct bpf_insn *bd_filter; 	/* filter code */
+	size_t		bd_filter_size;
+	ulong_t		bd_rcount;	/* number of packets received */
+	ulong_t		bd_dcount;	/* number of packets dropped */
+	ulong_t		bd_ccount;	/* number of packets captured */
+
+	uchar_t		bd_promisc;	/* true if listening promiscuously */
+	uchar_t		bd_state;	/* idle, waiting, or timed out */
+	uchar_t		bd_immediate;	/* true to return on packet arrival */
+	int		bd_hdrcmplt;	/* false to fill in src lladdr */
+	int		bd_seesent;	/* true if bpf should see sent pkts */
+	int		bd_async;	/* non-zero if packet reception .. */
+					/* .. should generate signal */
+	int		bd_nonblock;	/* non-zero for non-blocking read */
+	pid_t		bd_pgid;	/* process or group id for signal */
+	int		bd_timedout;
+	struct pollhead	bd_poll;
+	timeout_id_t	bd_callout;	/* for BPF timeouts with select */
+	pid_t		bd_pid;		/* corresponding PID */
+	void		*bd_sih;	/* soft interrupt handle */
+	/*
+	 * Solaris specific bits after this.
+	 */
+	kmutex_t	bd_lock;
+	kcondvar_t	bd_wait;
+	uintptr_t	bd_mcip;	/* Where mac_client_handle_t gets put */
+	uintptr_t	bd_promisc_handle;
+	minor_t		bd_dev;		/* device number for this handle */
+	int		bd_fmode;	/* flags from bpfopen */
+	zoneid_t	bd_zone;	/* zoneid of the opening process */
+	int		bd_inuse;
+	int		bd_waiting;
+	/*
+	 * bd_promisc_flags is used to store the promiscuous state of the
+	 * the interface in BPF so that the correct mode of operation can
+	 * be kept across changing DLT or network interface.
+	 */
+	int		bd_promisc_flags;
+};
+
+
+/* Values for bd_state */
+#define	BPF_IDLE	0		/* no select in progress */
+#define	BPF_WAITING	1		/* waiting for read timeout in select */
+#define	BPF_TIMED_OUT	2		/* read timeout has expired in select */
+
+/*
+ * Description associated with the external representation of each
+ * open bpf file.
+ */
+struct bpf_d_ext {
+	int32_t		bde_bufsize;
+	uint8_t		bde_promisc;
+	uint8_t		bde_state;
+	uint8_t		bde_immediate;
+	int32_t		bde_hdrcmplt;
+	int32_t		bde_seesent;
+	pid_t		bde_pid;
+	uint64_t	bde_rcount;		/* number of packets received */
+	uint64_t	bde_dcount;		/* number of packets dropped */
+	uint64_t	bde_ccount;		/* number of packets captured */
+	char		bde_ifname[IFNAMSIZ];
+};
+
+/*
+ * Access to "layer 2" networking is provided through each such provider
+ * delcaring a set of functions to use in the structure below. It has been
+ * modeled around what's required to use the mac layer. All of the functions
+ * below must be declared, even if only filled by a stub function.
+ */
+typedef struct bpf_provider_s {
+	int		bpr_unit;
+	int		(*bpr_open)(const char *, uintptr_t *, zoneid_t);
+	void		(*bpr_close)(uintptr_t);
+	const char 	*(*bpr_name)(uintptr_t);
+	int		(*bpr_type)(uintptr_t);
+	void		(*bpr_sdu_get)(uintptr_t, uint_t *);
+	int		(*bpr_tx)(uintptr_t, mblk_t *);
+	uintptr_t	(*bpr_promisc_add)(uintptr_t, int, void *, uintptr_t *,
+			    int);
+	void		(*bpr_promisc_remove)(uintptr_t);
+	int		(*bpr_getlinkid)(const char *, datalink_id_t *,
+			    zoneid_t);
+	void		(*bpr_client_close)(uintptr_t);
+	const char 	*(*bpr_client_name)(uintptr_t);
+	int		(*bpr_client_open)(uintptr_t, uintptr_t *);
+} bpf_provider_t;
+
+typedef struct bpf_provider_list {
+	LIST_ENTRY(bpf_provider_list)	bpl_next;
+	bpf_provider_t			*bpl_what;
+} bpf_provider_list_t;
+
+/*
+ * The bpr_field from bpf_provider_t expects an integer that comes from
+ * the list of defines below.
+ */
+#define	BPR_MAC		1
+#define	BPR_IPNET	2
+
+#define	MBPF_OPEN(_m, _n, _p, _z)	(_m)->bpr_open(_n, (uintptr_t *)_p, _z)
+#define	MBPF_CLOSE(_m, _h)		(_m)->bpr_close(_h)
+#define	MBPF_NAME(_m, _h)		(_m)->bpr_name(_h)
+#define	MBPF_TYPE(_m, _h)		(_m)->bpr_type(_h)
+#define	MBPF_SDU_GET(_m, _h, _p)	(_m)->bpr_sdu_get(_h, _p)
+#define	MBPF_TX(_m, _h, _pkt)		(_m)->bpr_tx(_h, _pkt)
+#define	MBPF_PROMISC_ADD(_m, _h, _o, _d, _p, _f) \
+				(_m)->bpr_promisc_add(_h, _o, _d, _p, _f)
+#define	MBPF_PROMISC_REMOVE(_m, _h)	(_m)->bpr_promisc_remove(_h)
+#define	MBPF_GET_LINKID(_m, _n, _ip, _z) \
+					(_m)->bpr_getlinkid(_n, _ip, _z)
+#define	MBPF_CLIENT_CLOSE(_m, _h)	(_m)->bpr_client_close(_h)
+#define	MBPF_CLIENT_NAME(_m, _h)	(_m)->bpr_client_name(_h)
+#define	MBPF_CLIENT_OPEN(_m, _h, _p)	(_m)->bpr_client_open((uintptr_t)_h, \
+					    (uintptr_t *)_p)
+
+/*
+ * Descriptor associated with each attached hardware interface.
+ */
+struct bpf_if {
+	TAILQ_ENTRY(bpf_if) bif_next;	/* list of all interfaces */
+	LIST_HEAD(, bpf_d) bif_dlist;	/* list of all descriptors att'd */
+	uint_t		bif_dlt;	/* link layer type */
+	uint_t		bif_hdrlen;	/* length of header (with padding) */
+	/*
+	 * Solaris specific bits after this.
+	 */
+	uintptr_t	bif_ifp;	/* correspoding interface */
+	datalink_id_t	bif_linkid;
+	kmutex_t	bif_lock;
+	zoneid_t	bif_zoneid;	/* zone that the interface is in */
+	int		bif_inuse;
+	bpf_provider_t	bif_mac;
+	char		bif_ifname[LIFNAMSIZ+1];
+};
+
+#ifdef _KERNEL
+typedef struct bpf_kstats_s {
+	kstat_named_t	kp_read_wait;
+	kstat_named_t	kp_write_ok;
+	kstat_named_t	kp_write_error;
+	kstat_named_t	kp_receive;
+	kstat_named_t	kp_capture;
+	kstat_named_t	kp_dropped;
+} bpf_kstats_t;
+
+int	 bpf_setf(struct bpf_d *, struct bpf_program *);
+#endif
+
+typedef void	(*bpf_attach_fn_t)(uintptr_t, int, zoneid_t, int);
+typedef void	(*bpf_detach_fn_t)(uintptr_t);
+typedef int	(*bpf_provider_reg_fn_t)(bpf_provider_t *);
+
+extern bpf_provider_t	*bpf_find_provider_by_id(int);
+extern void		bpf_open_zone(const zoneid_t);
+extern int		bpf_provider_tickle(char *, zoneid_t);
+
+#endif /* !_NET_BPFDESC_H_ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/bpf/net/dlt.h	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,170 @@
+/*	$NetBSD: dlt.h,v 1.11 2006/02/27 14:22:26 drochner Exp $	*/
+
+/*
+ * Copyright (c) 1990, 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)bpf.h	8.2 (Berkeley) 1/9/95
+ * @(#) Header: bpf.h,v 1.36 97/06/12 14:29:53 leres Exp  (LBL)
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _NET_DLT_H_
+#define	_NET_DLT_H_
+
+/*
+ * Data-link level type codes.
+ */
+#define	DLT_NULL	0	/* no link-layer encapsulation */
+#define	DLT_EN10MB	1	/* Ethernet (10Mb) */
+#define	DLT_EN3MB	2	/* Experimental Ethernet (3Mb) */
+#define	DLT_AX25	3	/* Amateur Radio AX.25 */
+#define	DLT_PRONET	4	/* Proteon ProNET Token Ring */
+#define	DLT_CHAOS	5	/* Chaos */
+#define	DLT_IEEE802	6	/* IEEE 802 Networks */
+#define	DLT_ARCNET	7	/* ARCNET */
+#define	DLT_SLIP	8	/* Serial Line IP */
+#define	DLT_PPP		9	/* Point-to-point Protocol */
+#define	DLT_FDDI	10	/* FDDI */
+#define	DLT_ATM_RFC1483	11	/* LLC/SNAP encapsulated atm */
+#define	DLT_RAW		12	/* raw IP */
+#define	DLT_SLIP_BSDOS	13	/* BSD/OS Serial Line IP */
+#define	DLT_PPP_BSDOS	14	/* BSD/OS Point-to-point Protocol */
+#define	DLT_HIPPI	15	/* HIPPI */
+#define	DLT_HDLC	16	/* HDLC framing */
+
+#define	DLT_PFSYNC	18	/* Packet filter state syncing */
+#define	DLT_ATM_CLIP	19	/* Linux Classical-IP over ATM */
+#define	DLT_ENC		109	/* Encapsulated packets for IPsec */
+#define	DLT_LINUX_SLL	113	/* Linux cooked sockets */
+#define	DLT_LTALK	114	/* Apple LocalTalk hardware */
+#define	DLT_PFLOG	117	/* Packet filter logging, by pcap people */
+#define	DLT_CISCO_IOS	118	/* Registered for Cisco-internal use */
+
+/* Axent Raptor / Symantec Enterprise Firewall */
+#define	DLT_SYMANTEC_FIREWALL	99
+
+#define	DLT_C_HDLC		104	/* Cisco HDLC */
+#define	DLT_IEEE802_11		105	/* IEEE 802.11 wireless */
+#define	DLT_FRELAY		107	/* Frame Relay */
+#define	DLT_LOOP		108	/* OpenBSD DLT_LOOP */
+#define	DLT_ECONET		115	/* Acorn Econet */
+#define	DLT_PRISM_HEADER	119	/* 802.11 header plus Prism II info. */
+#define	DLT_AIRONET_HEADER 	120	/* 802.11 header plus Aironet info. */
+#define	DLT_HHDLC		121	/* Reserved for Siemens HiPath HDLC */
+#define	DLT_IP_OVER_FC		122	/* RFC 2625 IP-over-Fibre Channel */
+#define	DLT_SUNATM		123	/* Solaris+SunATM */
+#define	DLT_RIO			124	/* RapidIO */
+#define	DLT_PCI_EXP		125	/* PCI Express */
+#define	DLT_AURORA		126	/* Xilinx Aurora link layer */
+#define	DLT_IEEE802_11_RADIO 	127	/* 802.11 header plus radio info. */
+#define	DLT_TZSP		128	/* Tazmen Sniffer Protocol */
+#define	DLT_ARCNET_LINUX	129	/* ARCNET */
+#define	DLT_JUNIPER_MLPPP	130	/* Juniper-private data link types. */
+#define	DLT_JUNIPER_MLFR	131
+#define	DLT_JUNIPER_ES		132
+#define	DLT_JUNIPER_GGSN	133
+#define	DLT_JUNIPER_MFR		134
+#define	DLT_JUNIPER_ATM2	135
+#define	DLT_JUNIPER_SERVICES	136
+#define	DLT_JUNIPER_ATM1	137
+#define	DLT_APPLE_IP_OVER_IEEE1394	138	/* Apple IP-over-IEEE 1394 */
+
+/* Various SS7 encapsulations */
+#define	DLT_MTP2_WITH_PHDR	139	/* pseudo-header with various info, */
+					/* followed by MTP2 */
+#define	DLT_MTP2		140	/* MTP2, no pseudo-header */
+#define	DLT_MTP3		141	/* MTP3, no pseudo-header or MTP2 */
+#define	DLT_SCCP		142	/* SCCP, no pseudo-header or MTP2 */
+					/* or MTP3 */
+
+#define	DLT_DOCSIS		143	/* Reserved for DOCSIS MAC frames. */
+#define	DLT_LINUX_IRDA		144	/* Linux-IrDA packets */
+
+/* Reserved for IBM SP switch and IBM Next Federation switch. */
+#define	DLT_IBM_SP		145
+#define	DLT_IBM_SN		146
+
+#define	DLT_IEEE802_11_RADIO_AVS	163	/* 802.11 plus AVS header */
+#define	DLT_JUNIPER_MONITOR	164	/* Juniper-private data link type */
+#define	DLT_BACNET_MS_TP	165
+#define	DLT_PPP_PPPD		166	/* Another PPP variant (Linux? */
+
+#define	DLT_JUNIPER_PPPOE	167
+#define	DLT_JUNIPER_PPPOE_ATM	168
+#define	DLT_JUNIPER_PIC_PEER	174
+#define	DLT_JUNIPER_ETHER	178
+#define	DLT_JUNIPER_PPP		179
+#define	DLT_JUNIPER_FRELAY	180
+#define	DLT_JUNIPER_CHDLC	181
+
+#define	DLT_GPRS_LLC		169	/* GPRS LLC */
+#define	DLT_GPF_T		170	/* GPF-T (ITU-T G.7041/Y.1303) */
+#define	DLT_GPF_F		171	/* GPF-F (ITU-T G.7041/Y.1303) */
+
+#define	DLT_GCOM_T1E1		172
+#define	DLT_GCOM_SERIAL		173
+
+/* "EndaceRecordFormat" */
+#define	DLT_ERF_ETH		175	/* Ethernet */
+#define	DLT_ERF_POS		176	/* Packet-over-SONET */
+
+#define	DLT_LINUX_LAPD		177	/* Raw LAPD for vISDN */
+
+#define	DLT_IPNET		226	/* MAC client view on Solaris */
+/*
+ * A number reserved for private user use is currently assigned, pending
+ * a real one from tcpdump.org. A description of the link layer frame
+ * is a requisite for this.
+ */
+#define	DLT_IPOIB		162	/* Infiniband (IPoIB) on Solaris */
+
+/*
+ * NetBSD-specific generic "raw" link type.  The upper 16-bits indicate
+ * that this is the generic raw type, and the lower 16-bits are the
+ * address family we're dealing with.
+ */
+#define	DLT_RAWAF_MASK		0x02240000
+#define	DLT_RAWAF(af)		(DLT_RAWAF_MASK | (af))
+#define	DLT_RAWAF_AF(x)		((x) & 0x0000ffff)
+#define	DLT_IS_RAWAF(x)		(((x) & 0xffff0000) == DLT_RAWAF_MASK)
+
+/*
+ * Solaris specific function to map DLPI DL_ data link types to BPF DLT_
+ */
+extern int bpf_dl_to_dlt(int);
+extern int bpf_dl_hdrsize(int);
+
+#endif /* !_NET_DLT_H_ */
--- a/usr/src/uts/common/io/dls/dls_link.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/io/dls/dls_link.c	Thu Sep 24 07:28:12 2009 -0700
@@ -35,6 +35,8 @@
 #include	<sys/sdt.h>
 #include	<sys/atomic.h>

+static void		dls_bpf_newzone(dls_link_t *dlp, zoneid_t zid);
+
 static kmem_cache_t	*i_dls_link_cachep;
 mod_hash_t		*i_dls_link_hash;
 static uint_t		i_dls_link_count;
@@ -866,6 +868,7 @@
 		goto done;
 	}

+	dls_bpf_newzone(dlp, zid);
 	dlp->dl_zid = zid;

 	if (zid == GLOBAL_ZONEID) {
@@ -888,6 +891,41 @@
 	return (err);
 }

+
+/*
+ * When a NIC changes zone, that change needs to be communicated to BPF
+ * so that it can correctly enforce access rights on it via BPF. In the
+ * absence of a function from BPF to just change the zoneid, this is
+ * done with a detach followed by an attach.
+ */
+static void
+dls_bpf_newzone(dls_link_t *dlp, zoneid_t zid)
+{
+	if (dls_bpfdetach_fn != NULL)
+		dls_bpfdetach_fn((uintptr_t)dlp->dl_mh);
+
+	if (dls_bpfattach_fn != NULL)
+		dls_bpfattach_fn((uintptr_t)dlp->dl_mh, mac_type(dlp->dl_mh),
+		    zid, BPR_MAC);
+}
+
+int
+dls_link_getzid(const char *name, zoneid_t *zidp)
+{
+	dls_link_t	*dlp;
+	int		err = 0;
+
+	if ((err = dls_link_hold(name, &dlp)) != 0)
+		return (err);
+
+	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
+
+	*zidp = dlp->dl_zid;
+
+	dls_link_rele(dlp);
+	return (0);
+}
+
 void
 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp)
 {
--- a/usr/src/uts/common/io/dls/dls_mgmt.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/io/dls/dls_mgmt.c	Thu Sep 24 07:28:12 2009 -0700
@@ -60,6 +60,8 @@
 static mod_hash_t	*i_dls_devnet_id_hash;
 static mod_hash_t	*i_dls_devnet_hash;

+bpf_attach_fn_t		dls_bpfattach_fn = NULL;
+bpf_detach_fn_t		dls_bpfdetach_fn = NULL;
 boolean_t		devnet_need_rebuild;

 #define	VLAN_HASHSZ	67	/* prime */
@@ -1217,7 +1219,6 @@
 	return (0);
 }

-
 /*
  * Get linkid for the given dev.
  */
@@ -1656,6 +1657,19 @@
 			return (err);
 		}
 	}
+	/*
+	 * Tell BPF it is here, if BPF is there
+	 */
+	if (dls_bpfattach_fn != NULL) {
+		/*
+		 * The zoneid is passed in explicitly to prevent the need to
+		 * do a lookup in dls using the linkid. Such a lookup would need
+		 * to use the same hash table that gets used for walking when
+		 * dls_set_bpfattach() is called.
+		 */
+		dls_bpfattach_fn((uintptr_t)mh, mac_type(mh),
+		    dlp->dl_zid, BPR_MAC);
+	}
 	mac_perim_exit(mph);
 	return (err);
 }
@@ -1684,6 +1698,12 @@
 	if (err != 0 && err != ENOENT)
 		return (err);

+	/*
+	 * Tell BPF that the link is going away, if BPF is there.
+	 */
+	if (dls_bpfdetach_fn != NULL)
+		dls_bpfdetach_fn((uintptr_t)mh);
+
 	mac_perim_enter_by_mh(mh, &mph);
 	err = dls_link_rele_by_name(mac_name(mh));
 	mac_perim_exit(mph);
@@ -1781,3 +1801,36 @@
 {
 	return (ddh->dd_linkid);
 }
+
+/*ARGSUSED*/
+static uint_t
+i_dls_bpfattach_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+	dls_link_t		*dlp = (dls_link_t *)val;
+
+	dls_bpfattach_fn((uintptr_t)dlp->dl_mh, mac_type(dlp->dl_mh),
+	    dlp->dl_zid, BPR_MAC);
+
+	return (MH_WALK_CONTINUE);
+}
+
+/*
+ * Set the functions to call back to when adding or removing a mac so that
+ * BPF can keep its internal list of these up to date.
+ */
+void
+dls_set_bpfattach(bpf_attach_fn_t attach, bpf_detach_fn_t detach)
+{
+	bpf_attach_fn_t		old = dls_bpfattach_fn;
+
+	dls_bpfattach_fn = attach;
+	dls_bpfdetach_fn = detach;
+
+	/*
+	 * If we're setting a new attach function, call it for every
+	 * mac that has already been attached.
+	 */
+	if (attach != NULL && old == NULL) {
+		mod_hash_walk(i_dls_link_hash, i_dls_bpfattach_walker, NULL);
+	}
+}
--- a/usr/src/uts/common/io/mac/mac_client.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/io/mac/mac_client.c	Thu Sep 24 07:28:12 2009 -0700
@@ -422,6 +422,12 @@
 	return (((mac_impl_t *)mh)->mi_name);
 }

+int
+mac_type(mac_handle_t mh)
+{
+	return (((mac_impl_t *)mh)->mi_type->mt_type);
+}
+
 char *
 mac_client_name(mac_client_handle_t mch)
 {
@@ -2647,6 +2653,7 @@
 	mpip->mpi_no_phys = ((flags & MAC_PROMISC_FLAGS_NO_PHYS) != 0);
 	mpip->mpi_strip_vlan_tag =
 	    ((flags & MAC_PROMISC_FLAGS_VLAN_TAG_STRIP) != 0);
+	mpip->mpi_no_copy = ((flags & MAC_PROMISC_FLAGS_NO_COPY) != 0);

 	mcbi = &mip->mi_promisc_cb_info;
 	mutex_enter(mcbi->mcbi_lockp);
@@ -2823,6 +2830,17 @@
 	}

 	srs = flent->fe_tx_srs;
+	/*
+	 * This is to avoid panics with PF_PACKET that can call mac_tx()
+	 * against an interface that is not capable of sending. A rewrite
+	 * of the mac datapath is required to remove this limitation.
+	 */
+	if (srs == NULL) {
+		if (!(flag & MAC_TX_NO_HOLD))
+			MAC_TX_RELE(mcip, mytx);
+		freemsgchain(mp_chain);
+		return (NULL);
+	}
 	srs_tx = &srs->srs_tx;
 	if (srs_tx->st_mode == SRS_TX_DEFAULT &&
 	    (srs->srs_state & SRS_ENQUEUED) == 0 &&
@@ -3254,18 +3272,28 @@
 mac_promisc_dispatch_one(mac_promisc_impl_t *mpip, mblk_t *mp,
     boolean_t loopback)
 {
-	mblk_t *mp_copy;
-
-	mp_copy = copymsg(mp);
-	if (mp_copy == NULL)
-		return;
+	mblk_t *mp_copy, *mp_next;
+
+	if (!mpip->mpi_no_copy || mpip->mpi_strip_vlan_tag) {
+		mp_copy = copymsg(mp);
+		if (mp_copy == NULL)
+			return;
+
+		if (mpip->mpi_strip_vlan_tag) {
+			mp_copy = mac_strip_vlan_tag_chain(mp_copy);
+			if (mp_copy == NULL)
+				return;
+		}
+		mp_next = NULL;
+	} else {
+		mp_copy = mp;
+		mp_next = mp->b_next;
+	}
 	mp_copy->b_next = NULL;

-	if (mpip->mpi_strip_vlan_tag) {
-		if ((mp_copy = mac_strip_vlan_tag_chain(mp_copy)) == NULL)
-			return;
-	}
 	mpip->mpi_fn(mpip->mpi_arg, NULL, mp_copy, loopback);
+	if (mp_copy == mp)
+		mp->b_next = mp_next;
 }

 /*
--- a/usr/src/uts/common/net/if.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/net/if.h	Thu Sep 24 07:28:12 2009 -0700
@@ -436,6 +436,7 @@
 		char	ifru_oname[IFNAMSIZ];	/* other if name */
 		struct	sockaddr ifru_broadaddr;
 		int	ifru_index;		/* interface index */
+		uint_t	ifru_mtu;
 		short	ifru_flags;
 		int	ifru_metric;
 		char	ifru_data[1];		/* interface dependent data */
@@ -487,6 +488,7 @@
 #define	ifr_data	ifr_ifru.ifru_data	/* for use by interface */
 #define	ifr_enaddr	ifr_ifru.ifru_enaddr	/* ethernet address */
 #define	ifr_index	ifr_ifru.ifru_index	/* interface index */
+#define	ifr_mtu		ifr_ifru.ifru_mtu	/* mtu */
 /* For setting ppa */
 #define	ifr_ppa		ifr_ifru.ifru_ppaflags.ifrup_ppa
--- a/usr/src/uts/common/os/netstack.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/os/netstack.c	Thu Sep 24 07:28:12 2009 -0700
@@ -20,7 +20,7 @@
  */

 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */

@@ -1279,6 +1279,12 @@
 		return (zoneid);
 }

+zoneid_t
+netstack_get_zoneid(netstack_t *ns)
+{
+	return (netstackid_to_zoneid(ns->netstack_stackid));
+}
+
 /*
  * Simplistic support for walking all the handles.
  * Example usage:
--- a/usr/src/uts/common/os/policy.c	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/os/policy.c	Thu Sep 24 07:28:12 2009 -0700
@@ -1679,6 +1679,12 @@
 	return (PRIV_POLICY(cr, PRIV_NET_RAWACCESS, B_FALSE, EACCES, NULL));
 }

+int
+secpolicy_net_observability(const cred_t *cr)
+{
+	return (PRIV_POLICY(cr, PRIV_NET_OBSERVABILITY, B_FALSE, EACCES, NULL));
+}
+
 /*
  * Need this privilege for accessing the ICMP device
  */
--- a/usr/src/uts/common/sys/dlpi.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/dlpi.h	Thu Sep 24 07:28:12 2009 -0700
@@ -57,11 +57,13 @@

 typedef struct dl_ipnetinfo {
 	uint8_t		dli_version;	/* DL_IPNETINFO_* version */
-	uint8_t		dli_ipver;	/* packet IP header version */
-	uint16_t	dli_len;	/* length of dl_ipnetinfo_t */
-	uint32_t	dli_pad;	/* alignment pad */
-	uint64_t	dli_srczone; 	/* packet source zone ID (if any) */
-	uint64_t	dli_dstzone;	/* packet dest zone ID (if any) */
+	uint8_t		dli_family;	/* packet IP header version */
+	uint16_t	dli_htype;
+	uint32_t	dli_pktlen;	/* length of dl_ipnetinfo_t */
+	uint32_t	dli_ifindex;
+	uint32_t	dli_grifindex;
+	uint32_t	dli_zsrc; 	/* packet source zone ID (if any) */
+	uint32_t	dli_zdst;	/* packet dest zone ID (if any) */
 } dl_ipnetinfo_t;

 /*
--- a/usr/src/uts/common/sys/dls_impl.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/dls_impl.h	Thu Sep 24 07:28:12 2009 -0700
@@ -34,6 +34,8 @@
 #include <sys/modhash.h>
 #include <sys/kstat.h>
 #include <net/if.h>
+#include <net/bpf.h>
+#include <net/bpfdesc.h>
 #include <sys/dlpi.h>

 #ifdef	__cplusplus
@@ -84,6 +86,7 @@
 extern void		dls_link_remove(dls_link_t *, dld_str_t *);
 extern int		dls_link_header_info(dls_link_t *, mblk_t *,
 			    mac_header_info_t *);
+extern int		dls_link_getzid(const char *, zoneid_t *);
 extern int		dls_link_setzid(const char *, zoneid_t);
 extern dev_info_t	*dls_link_devinfo(dev_t);
 extern dev_t		dls_link_dev(dls_link_t *);
@@ -127,6 +130,10 @@

 extern int		dls_mgmt_get_phydev(datalink_id_t, dev_t *);

+extern bpf_attach_fn_t	dls_bpfattach_fn;
+extern bpf_detach_fn_t	dls_bpfdetach_fn;
+extern void		dls_set_bpfattach(bpf_attach_fn_t, bpf_detach_fn_t);
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/hook_event.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/hook_event.h	Thu Sep 24 07:28:12 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */

@@ -110,6 +110,57 @@
 };
 typedef struct hook_nic_event_int hook_nic_event_int_t;

+/*
+ * This structure holds the data passed back from the ip module to
+ * observability consumers.
+ *
+ * Externally exposed fields, that must match the order and size of
+ * dl_ipnetinfo_t in <sys/dlpi.h> are:
+ * hpo_version    Version number for this header
+ * hpo_family     Address family of the attached packet
+ * hpo_htype      IPobs hook type
+ * hpo_pktlen     Length of the attached packet
+ * hpo_ifindex    Interface index that the packet was received/sent over.
+ *                For local packets, this is the index of the interface
+ *                associated with the local destination address.
+ * hpo_grifindex  IPMP group interface index (zero unless ihd_ifindex
+ *                is an IPMP underlying interface).
+ * hpo_zsrc       Source zoneid; set to ALL_ZONES when unknown.
+ * hpo_zdst       Destination zoneid; set to ALL_ZONES when unknown.
+ *
+ * Fields used internally are:
+ * hpo_pkt        Pointer to the mblk_t containig this structure with
+ *                the real packet found at b_cont
+ */
+typedef struct hook_pkt_observe_s {
+	uint8_t		hpo_version;
+	uint8_t		hpo_family;
+	uint16_t	hpo_htype;
+	uint32_t	hpo_pktlen;
+	uint32_t	hpo_ifindex;
+	uint32_t	hpo_grifindex;
+	uint32_t	hpo_zsrc;
+	uint32_t	hpo_zdst;
+	/*
+	 * Fields used internally are below.
+	 */
+	mblk_t		*hpo_pkt;
+	void		*hpo_ctx;
+} hook_pkt_observe_t;
+
+/*
+ * ipobs_hooktype_t describes the hook types supported
+ * by the ip module. IPOBS_HOOK_LOCAL refers to packets
+ * which are looped back internally within the ip module.
+ */
+
+typedef enum ipobs_hook_type {
+	IPOBS_HOOK_INBOUND = 0,
+	IPOBS_HOOK_OUTBOUND = 1,
+	IPOBS_HOOK_LOCAL = 2
+} ipobs_hook_type_t;
+
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/mac.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/mac.h	Thu Sep 24 07:28:12 2009 -0700
@@ -591,6 +591,8 @@
 extern void			mac_minor_rele(minor_t);
 extern void			mac_sdu_get(mac_handle_t, uint_t *, uint_t *);
 extern int			mac_maxsdu_update(mac_handle_t, uint_t);
+extern uint_t			mac_addr_len(mac_handle_t);
+extern int			mac_type(mac_handle_t);

 extern void 			mac_unicst_update(mac_handle_t,
 				    const uint8_t *);
--- a/usr/src/uts/common/sys/mac_client.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/mac_client.h	Thu Sep 24 07:28:12 2009 -0700
@@ -102,6 +102,7 @@
 #define	MAC_PROMISC_FLAGS_NO_TX_LOOP		0x0001
 #define	MAC_PROMISC_FLAGS_NO_PHYS		0x0002
 #define	MAC_PROMISC_FLAGS_VLAN_TAG_STRIP	0x0004
+#define	MAC_PROMISC_FLAGS_NO_COPY		0x0008

 /* flags passed to mac_tx() */
 #define	MAC_DROP_ON_NO_DESC	0x01 /* freemsg() if no tx descs */
@@ -157,8 +158,6 @@
     char *, boolean_t *);
 extern uint_t mac_addr_factory_num(mac_handle_t);

-extern uint_t mac_addr_len(mac_handle_t);
-
 extern mac_tx_notify_handle_t mac_client_tx_notify(mac_client_handle_t,
     mac_tx_notify_t, void *);
--- a/usr/src/uts/common/sys/mac_client_impl.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/mac_client_impl.h	Thu Sep 24 07:28:12 2009 -0700
@@ -77,6 +77,7 @@
 	boolean_t			mpi_no_tx_loop;	/* WO */
 	boolean_t			mpi_no_phys;	/* WO */
 	boolean_t			mpi_strip_vlan_tag;	/* WO */
+	boolean_t			mpi_no_copy;	/* WO */
 } mac_promisc_impl_t;

 typedef union mac_tx_percpu_s {
--- a/usr/src/uts/common/sys/neti.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/neti.h	Thu Sep 24 07:28:12 2009 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */

@@ -54,6 +54,7 @@
 #define	NH_LOOPBACK_IN	"LOOPBACK_IN"
 #define	NH_LOOPBACK_OUT	"LOOPBACK_OUT"
 #define	NH_NIC_EVENTS	"NIC_EVENTS"
+#define	NH_OBSERVE	"OBSERVING"

 /*
  * Network NIC hardware checksum capability
--- a/usr/src/uts/common/sys/netstack.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/netstack.h	Thu Sep 24 07:28:12 2009 -0700
@@ -232,6 +232,7 @@
 extern netstack_t *netstack_find_by_zoneid(zoneid_t);

 extern zoneid_t netstackid_to_zoneid(netstackid_t);
+extern zoneid_t netstack_get_zoneid(netstack_t *);
 extern netstackid_t zoneid_to_netstackid(zoneid_t);

 extern netstack_t *netstack_get_current(void);
--- a/usr/src/uts/common/sys/policy.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/policy.h	Thu Sep 24 07:28:12 2009 -0700
@@ -112,6 +112,7 @@
 int secpolicy_net_config(const cred_t *, boolean_t);
 int secpolicy_net_icmpaccess(const cred_t *);
 int secpolicy_net_mac_aware(const cred_t *);
+int secpolicy_net_observability(const cred_t *);
 int secpolicy_net_privaddr(const cred_t *, in_port_t, int proto);
 int secpolicy_net_rawaccess(const cred_t *);
 boolean_t secpolicy_net_reply_equal(const cred_t *);
--- a/usr/src/uts/common/sys/socket.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/socket.h	Thu Sep 24 07:28:12 2009 -0700
@@ -118,6 +118,20 @@
 #define	SO_DGRAM_ERRIND	0x0200		/* Application wants delayed error */
 #define	SO_RECVUCRED	0x0400		/* Application wants ucred of sender */

+/*
+ * Socket options are passed using a signed integer, but it is also rare
+ * for more than one to ever be passed at the same time with setsockopt
+ * and only one at a time can be retrieved with getsockopt.
+ *
+ * Since the lower numbers cannot be renumbered for compatibility reasons,
+ * it would seem that we need to start a new number space (0x40000000 -
+ * 0x7fffffff) for those that don't need to be stored as a bit flag
+ * somewhere. This limits the flag options to 30 but that seems to be
+ * plenty, anyway. 0x40000000 is reserved for future use.
+ */
+#define	SO_ATTACH_FILTER	0x40000001
+#define	SO_DETACH_FILTER	0x40000002
+
 #ifdef _KERNEL
 #define	SO_SND_COPYAVOID 0x0800		/* Internal: use zero-copy */
 #define	SO_SND_BUFINFO	0x1000		/* Internal: get buffer info */
@@ -207,6 +221,7 @@
 #if !defined(_XPG4_2) || defined(__EXTENSIONS__)
 #define	SOL_ROUTE	0xfffe		/* options for routing socket level */
 #endif
+#define	SOL_PACKET	0xfffd		/* options for packet level */

 /*
  * Address families.
@@ -249,8 +264,9 @@
 #define	AF_POLICY	29		/* Security Policy DB socket */
 #define	AF_INET_OFFLOAD	30		/* Sun private; do not use */
 #define	AF_TRILL	31		/* TRILL interface */
+#define	AF_PACKET	32		/* PF_PACKET Linux socket interface */

-#define	AF_MAX		31
+#define	AF_MAX		32

 /*
  * Protocol families, same as address families for now.
@@ -289,6 +305,7 @@
 #define	PF_POLICY	AF_POLICY
 #define	PF_INET_OFFLOAD	AF_INET_OFFLOAD	/* Sun private; do not use */
 #define	PF_TRILL	AF_TRILL
+#define	PF_PACKET	AF_PACKET

 #define	PF_MAX		AF_MAX
--- a/usr/src/uts/common/sys/socket_impl.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/socket_impl.h	Thu Sep 24 07:28:12 2009 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */

@@ -35,8 +34,6 @@
 #ifndef	_SYS_SOCKET_IMPL_H
 #define	_SYS_SOCKET_IMPL_H

-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -105,6 +102,28 @@
 };
 #endif	/* !defined(_XPG4_2) || defined(_XPG6) || defined(__EXTENSIONS__) */

+/*
+ * To be compatible with the Linux interfaces used, this structure is
+ * placed in socket_impl.h so that an include for <sys/socket.h> will
+ * pickup this structure. This structure is for use with PF_PACKET
+ * sockets.
+ */
+struct sockaddr_ll {
+	uint16_t	sll_family;
+	uint16_t	sll_protocol;
+	int32_t		sll_ifindex;
+	uint16_t	sll_hatype;
+	uint8_t		sll_pkttype;
+	uint8_t		sll_halen;
+	uint8_t		sll_addr[8];
+};
+
+#define	LINUX_SLL_HOST		0
+#define	LINUX_SLL_BROADCAST	1
+#define	LINUX_SLL_MULTICAST	2
+#define	LINUX_SLL_OTHERHOST	3
+#define	LINUX_SLL_OUTGOING	4
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/common/sys/socket_proto.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/socket_proto.h	Thu Sep 24 07:28:12 2009 -0700
@@ -135,6 +135,38 @@
     boolean_t, so_proto_quiesced_cb_t);

 /*
+ * These functions return EOPNOTSUPP and are intended for the sockfs
+ * developer that doesn't wish to supply stubs for every function themselves.
+ */
+extern int sock_accept_notsupp(sock_lower_handle_t, sock_lower_handle_t,
+    sock_upper_handle_t, cred_t *);
+extern int sock_bind_notsupp(sock_lower_handle_t, struct sockaddr *,
+    socklen_t, cred_t *);
+extern int sock_listen_notsupp(sock_lower_handle_t, int, cred_t *);
+extern int sock_connect_notsupp(sock_lower_handle_t,
+    const struct sockaddr *, socklen_t, sock_connid_t *, cred_t *);
+extern int sock_getpeername_notsupp(sock_lower_handle_t, struct sockaddr *,
+    socklen_t *, cred_t *);
+extern int sock_getsockname_notsupp(sock_lower_handle_t, struct sockaddr *,
+    socklen_t *, cred_t *);
+extern int sock_getsockopt_notsupp(sock_lower_handle_t, int, int, void *,
+    socklen_t *, cred_t *);
+extern int sock_setsockopt_notsupp(sock_lower_handle_t, int, int,
+    const void *, socklen_t, cred_t *);
+extern int sock_send_notsupp(sock_lower_handle_t, mblk_t *,
+    struct nmsghdr *, cred_t *);
+extern int sock_send_uio_notsupp(sock_lower_handle_t, uio_t *,
+    struct nmsghdr *, cred_t *);
+extern int sock_recv_uio_notsupp(sock_lower_handle_t, uio_t *,
+    struct nmsghdr *, cred_t *);
+extern short sock_poll_notsupp(sock_lower_handle_t, short, int, cred_t *);
+extern int sock_shutdown_notsupp(sock_lower_handle_t, int, cred_t *);
+extern void sock_clr_flowctrl_notsupp(sock_lower_handle_t);
+extern int sock_ioctl_notsupp(sock_lower_handle_t, int, intptr_t, int,
+    int32_t *, cred_t *);
+extern int sock_close_notsupp(sock_lower_handle_t, int, cred_t *);
+
+/*
  * Upcalls and related information
  */
--- a/usr/src/uts/common/sys/sockio.h	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/common/sys/sockio.h	Thu Sep 24 07:28:12 2009 -0700
@@ -310,6 +310,9 @@

 #define	SIOCSQPTR	_IOWR('i', 184, int)    /* set q_ptr of stream */

+#define	SIOCGIFHWADDR	_IOWR('i', 185, int)	/* PF_PACKET */
+#define	SIOCGSTAMP	_IOWR('i', 186, struct timeval)	/* PF_PACKET */
+
 #ifdef	__cplusplus
 }
 #endif
--- a/usr/src/uts/intel/Makefile.intel.shared	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/Makefile.intel.shared	Thu Sep 24 07:28:12 2009 -0700
@@ -213,6 +213,7 @@
 DRV_KMODS	+= bl
 DRV_KMODS	+= bge
 DRV_KMODS	+= bofi
+DRV_KMODS	+= bpf
 DRV_KMODS	+= bridge
 DRV_KMODS	+= bscbus
 DRV_KMODS	+= bscv
@@ -729,6 +730,7 @@
 #
 # socketmod (kernel/socketmod)
 #
+SOCKET_KMODS	+= sockpfp
 SOCKET_KMODS	+= socksctp
 SOCKET_KMODS    += socksdp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/intel/bpf/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# uts/intel/bpf/Makefile
+#
+#
+#	This makefile drives the production of the bpf driver
+#	kernel module.
+#
+#	intel architecture dependent
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= bpf
+OBJECTS		= $(BPF_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(BPF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR	= $(UTSBASE)/common/io/bpf
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+#
+CFLAGS		+= $(CCVERBOSE)
+LDFLAGS		+= -dy -Nmisc/mac -Nmisc/dls -Ndrv/ipnet -Nmisc/neti
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/uts/intel/dev/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/dev/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -61,6 +61,7 @@
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -Nfs/devfs -Nmisc/dls
 INC_PATH	+= -I$(UTSBASE)/common/fs/zfs
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 #	Default build targets.
--- a/usr/src/uts/intel/dld/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/dld/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,11 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#

 #
 # Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -56,6 +54,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -N misc/dls -N misc/mac
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 # For now, disable these lint checks; maintainers should endeavor
--- a/usr/src/uts/intel/dls/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/dls/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,10 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"

 #
 # Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -54,6 +53,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -N misc/mac
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 # For now, disable these lint checks; maintainers should endeavor
--- a/usr/src/uts/intel/ip/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/ip/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #
@@ -58,7 +58,11 @@

 CINLINEFLAGS = -xinline=tcp_set_ws_value

-CFLAGS += $(CINLINEFLAGS)
+CFLAGS		+= $(CINLINEFLAGS)
+#
+# To get the BPF header files included by ipnet.h
+#
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 # Depends on md5 and swrand (for SCTP). SCTP needs to depend on
--- a/usr/src/uts/intel/ipnet/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/ipnet/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -20,7 +20,7 @@
 #

 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #   This makefile drives the production of the ipnet driver
@@ -77,6 +77,11 @@
 LDFLAGS		+= -dy -Ndrv/ip -Nmisc/neti -Nmisc/hook

 #
+# To get the BPF header files
+#
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
+#
 #   Default build targets.
 #
--- a/usr/src/uts/intel/iptun/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/iptun/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -54,6 +54,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -Ndrv/dld -Nmisc/dls -Nmisc/mac -Ndrv/ip
+INC_PATH        += -I$(UTSBASE)/common/io/bpf

 LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
 LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW
--- a/usr/src/uts/intel/mac/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/mac/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #
@@ -56,6 +56,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW
 LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
--- a/usr/src/uts/intel/mac_ether/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/mac_ether/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,12 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-#
 #	This makefile drives the production of the mac_ether MAC-Type plugin
 #	kernel module.
 #
@@ -59,6 +56,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -N misc/mac
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 #	Default build targets.
--- a/usr/src/uts/intel/mac_ib/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/mac_ib/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,12 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-#
 #	This makefile drives the production of the mac_ib MAC-Type plugin
 #	kernel module.
 #
@@ -59,6 +56,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -N misc/mac
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 #	Default build targets.
--- a/usr/src/uts/intel/mac_wifi/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/mac_wifi/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,12 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
-#
 #	This makefile drives the production of the mac_wifi plugin
 #	kernel module.
 #
@@ -59,6 +56,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -Nmisc/mac
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 #	Default build targets.
--- a/usr/src/uts/intel/os/minor_perm	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/os/minor_perm	Thu Sep 24 07:28:12 2009 -0700
@@ -203,3 +203,4 @@
 amd_iommu:* 0644 root sys
 xpvtap:* 0666 root sys
 clone:bridge 0666 root sys
+bpf:bpf 0666 root sys
--- a/usr/src/uts/intel/os/name_to_major	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/os/name_to_major	Thu Sep 24 07:28:12 2009 -0700
@@ -159,3 +159,4 @@
 bridge 265
 iptun 266
 iptunq 267
+bpf 268
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/intel/sockpfp/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,95 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#	This makefile drives the production of the nca driver
+#	kernel module.
+#
+#	intel architecture dependent
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= sockpfp
+OBJECTS		= $(PFP_SOCK_MOD_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(PFP_SOCK_MOD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(USR_SOCK_DIR)/$(MODULE)
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Nmisc/dls -Nmisc/mac -Ndrv/bpf
+INC_PATH += -I$(UTSBASE)/common/inet/sockmods -I$(UTSBASE)/common/io/bpf
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/uts/intel/spdsock/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/intel/spdsock/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -20,11 +20,9 @@
 #
 #
 # uts/intel/spdsock/Makefile
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
 #	This makefile drives the production of the spdsock driver
 #	kernel module.
 #
@@ -62,6 +60,8 @@
 #
 LDFLAGS += -dy -Ndrv/ip

+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
 #
 # For now, disable these lint checks; maintainers should endeavor
 # to investigate and remove these for maximum lint coverage.
--- a/usr/src/uts/sparc/Makefile.sparc.shared	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/Makefile.sparc.shared	Thu Sep 24 07:28:12 2009 -0700
@@ -227,6 +227,7 @@
 DRV_KMODS	+= fm
 DRV_KMODS	+= nulldriver
 DRV_KMODS	+= bridge trill
+DRV_KMODS	+= bpf

 #
 # Don't build some of these for OpenSolaris, since they will be
@@ -508,6 +509,7 @@
 #
 # socketmod (kernel/socketmod)
 #
+SOCKET_KMODS	+= sockpfp
 SOCKET_KMODS	+= socksctp
 SOCKET_KMODS	+= socksdp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/sparc/bpf/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# uts/sparc/bpf/Makefile
+#
+#
+#	This makefile drives the production of the bpf driver
+#	kernel module.
+#
+#	sparc architecture dependent
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= bpf
+OBJECTS		= $(BPF_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(BPF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR	= $(UTSBASE)/common/io/bpf
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+#
+CFLAGS		+= $(CCVERBOSE)
+LDFLAGS		+= -dy -Nmisc/mac -Nmisc/dls -Ndrv/ipnet -Nmisc/neti
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
--- a/usr/src/uts/sparc/dev/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/dev/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -62,6 +62,7 @@
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -Nfs/devfs -Nmisc/dls
 INC_PATH	+= -I$(UTSBASE)/common/fs/zfs
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 #	Default build targets.
--- a/usr/src/uts/sparc/dld/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/dld/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,11 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#

 #
 # Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -58,6 +56,7 @@
 $(RELEASE_BUILD)CFLAGS		+= -xinline=auto -xcrossfile
 $(RELEASE_BUILD)COPTIMIZE	= -xO5
 LDFLAGS				+= -dy -N misc/dls -N misc/mac
+INC_PATH			+= -I$(UTSBASE)/common/io/bpf

 #
 # For now, disable these lint checks; maintainers should endeavor
--- a/usr/src/uts/sparc/dls/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/dls/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,10 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"

 #
 # Path to the base of the uts directory tree (usually /usr/src/uts).
@@ -56,6 +55,7 @@
 $(RELEASE_BUILD)CFLAGS		+= -xinline=auto -xcrossfile
 $(RELEASE_BUILD)COPTIMIZE	= -xO5
 LDFLAGS				+= -dy -N misc/mac
+INC_PATH			+= -I$(UTSBASE)/common/io/bpf

 #
 # For now, disable these lint checks; maintainers should endeavor
--- a/usr/src/uts/sparc/ip/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/ip/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #
@@ -60,6 +60,10 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 CFLAGS 		+= -xinline=tcp_set_ws_value
+#
+# To get the BPF header files included by ipnet.h
+#
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 # For now, disable these lint checks; maintainers should endeavor
--- a/usr/src/uts/sparc/ipnet/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/ipnet/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -20,7 +20,7 @@
 #

 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #   This makefile drives the production of the ipnet driver
@@ -83,6 +83,11 @@
 LDFLAGS		+= -dy -Ndrv/ip -Nmisc/neti -Nmisc/hook

 #
+# To get the BPF header files
+#
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
+#
 #   Default build targets.
 #
--- a/usr/src/uts/sparc/iptun/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/iptun/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -54,6 +54,7 @@
 #
 CFLAGS 		+= $(CCVERBOSE)
 LDFLAGS 	+= -dy -Ndrv/dld -Nmisc/dls -Nmisc/mac -Ndrv/ip
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
 LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW
--- a/usr/src/uts/sparc/mac/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/mac/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #
@@ -60,6 +60,7 @@
 $(RELEASE_BUILD)CFLAGS		+= -xinline=auto -xcrossfile
 $(RELEASE_BUILD)COPTIMIZE	= -xO5
 LDFLAGS				+= -dy
+INC_PATH			+= -I$(UTSBASE)/common/io/bpf

 LINTTAGS			+= -erroff=E_PTRDIFF_OVERFLOW
 LINTTAGS			+= -erroff=E_BAD_PTR_CAST_ALIGN
--- a/usr/src/uts/sparc/mac_ether/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/mac_ether/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,12 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-#
 #	This makefile drives the production of the mac_ether MAC-Type plugin
 #	kernel module.
 #
@@ -59,6 +56,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -N misc/mac
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 #	Default build targets.
--- a/usr/src/uts/sparc/mac_ib/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/mac_ib/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,12 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-#
 #	This makefile drives the production of the mac_ib MAC-Type plugin
 #	kernel module.
 #
@@ -59,6 +56,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -N misc/mac
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 #	Default build targets.
--- a/usr/src/uts/sparc/mac_wifi/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/mac_wifi/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -19,12 +19,9 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
-#
 #	This makefile drives the production of the mac_wifi plugin
 #	kernel module.
 #
@@ -59,6 +56,7 @@
 #
 CFLAGS		+= $(CCVERBOSE)
 LDFLAGS		+= -dy -Nmisc/mac
+INC_PATH	+= -I$(UTSBASE)/common/io/bpf

 #
 #	Default build targets.
--- a/usr/src/uts/sparc/os/minor_perm	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/os/minor_perm	Thu Sep 24 07:28:12 2009 -0700
@@ -192,3 +192,4 @@
 iptunq:* 0640 root sys
 fm:* 0644 root sys
 clone:bridge 0666 root sys
+bpf:bpf 0666 root sys
--- a/usr/src/uts/sparc/os/name_to_major	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/os/name_to_major	Thu Sep 24 07:28:12 2009 -0700
@@ -231,3 +231,4 @@
 bridge 284
 iptun 285
 iptunq 286
+bpf 287
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/sparc/sockpfp/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -0,0 +1,96 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#	This makefile drives the production of the nca driver
+#	kernel module.
+#
+#	sparc architecture dependent
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= sockpfp
+OBJECTS		= $(PFP_SOCK_MOD_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(PFP_SOCK_MOD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(USR_SOCK_DIR)/$(MODULE)
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Nmisc/dls -Nmisc/mac -Ndrv/bpf
+INC_PATH += -I$(UTSBASE)/common/inet/sockmods -I$(UTSBASE)/common/io/bpf
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS	+= -erroff=E_PTRDIFF_OVERFLOW
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
--- a/usr/src/uts/sparc/spdsock/Makefile	Thu Sep 24 15:16:32 2009 +0200
+++ b/usr/src/uts/sparc/spdsock/Makefile	Thu Sep 24 07:28:12 2009 -0700
@@ -20,11 +20,9 @@
 #
 #
 # uts/sparc/keysock/Makefile
-# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
 #	This makefile drives the production of the spdsock driver
 #	kernel module.
 #
@@ -63,6 +61,11 @@
 LDFLAGS += -dy -Ndrv/ip

 #
+# Overrides
+#
+INC_PATH += -I$(UTSBASE)/common/io/bpf
+
+#
 # lint pass one enforcement
 #
 CFLAGS += $(CCVERBOSE)