changeset 6707:c3bc7e4da11b

6582335 TCP/IP receive-side zero CPU copy for support of Intel I/OAT hardware 6582330 sodirect - socket direct, for support of Intel I/OAT hardware 6582323 uioa - uio asynchronous, for support of Intel I/OAT hardware 6567008 driver for intel ioat v1 & v2 DMA engine needed 6691281 tcp->tcp_sodirect == 0, file: ../../common/inet/tcp/tcp_fusion .c, line: 291 6693127 putback for 6567008/6582323/6582330/6582335 causes PANIC when using SCTP sockets 6694188 Solaris PIT test net/tcp/tests/win0_urg_processing timed out 6694389 assertion failed: ((tcp)->tcp_sodirect == 0 || !((tcp)->tcp_sodirect->sod_state & 0x0001))
author brutus
date Fri, 23 May 2008 20:14:10 -0700
parents 2d5b422fab0d
children 6bec19b45851
files deleted_files/usr/src/pkgdefs/SUNWdcopy/Makefile deleted_files/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl deleted_files/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl deleted_files/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_com deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_i386 deleted_files/usr/src/uts/common/io/dcopy.c deleted_files/usr/src/uts/common/sys/dcopy.h deleted_files/usr/src/uts/common/sys/dcopy_device.h deleted_files/usr/src/uts/common/sys/sodirect.h deleted_files/usr/src/uts/i86pc/io/ioat/ioat.c deleted_files/usr/src/uts/i86pc/io/ioat/ioat.conf deleted_files/usr/src/uts/i86pc/io/ioat/ioat_chan.c deleted_files/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c deleted_files/usr/src/uts/i86pc/io/ioat/ioat_rs.c deleted_files/usr/src/uts/i86pc/ioat/Makefile deleted_files/usr/src/uts/i86pc/sys/ioat.h deleted_files/usr/src/uts/i86xpv/ioat/Makefile deleted_files/usr/src/uts/intel/dcopy/Makefile usr/src/pkgdefs/Makefile usr/src/pkgdefs/SUNWdcopy/Makefile usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl usr/src/pkgdefs/SUNWdcopy/preremove.tmpl usr/src/pkgdefs/SUNWdcopy/prototype_com usr/src/pkgdefs/SUNWdcopy/prototype_i386 usr/src/pkgdefs/SUNWhea/prototype_com usr/src/uts/common/fs/sockfs/socksctp.c usr/src/uts/common/fs/sockfs/socksdp.c usr/src/uts/common/fs/sockfs/sockstr.c usr/src/uts/common/fs/sockfs/socksubr.c usr/src/uts/common/fs/sockfs/socktpi.c usr/src/uts/common/fs/sockfs/sockvnops.c usr/src/uts/common/inet/tcp.h usr/src/uts/common/inet/tcp/tcp.c usr/src/uts/common/inet/tcp/tcp6ddi.c usr/src/uts/common/inet/tcp/tcp_fusion.c usr/src/uts/common/inet/tcp/tcpddi.c usr/src/uts/common/io/dcopy.c usr/src/uts/common/io/stream.c usr/src/uts/common/os/move.c usr/src/uts/common/os/streamio.c usr/src/uts/common/os/strsubr.c usr/src/uts/common/sys/Makefile usr/src/uts/common/sys/conf.h usr/src/uts/common/sys/dcopy.h usr/src/uts/common/sys/dcopy_device.h usr/src/uts/common/sys/socketvar.h usr/src/uts/common/sys/sodirect.h usr/src/uts/common/sys/stream.h usr/src/uts/common/sys/strsubr.h usr/src/uts/common/sys/uio.h usr/src/uts/i86pc/Makefile.files usr/src/uts/i86pc/Makefile.i86pc.shared usr/src/uts/i86pc/Makefile.rules usr/src/uts/i86pc/io/ioat/ioat.c usr/src/uts/i86pc/io/ioat/ioat.conf usr/src/uts/i86pc/io/ioat/ioat_chan.c usr/src/uts/i86pc/io/ioat/ioat_ioctl.c usr/src/uts/i86pc/io/ioat/ioat_rs.c usr/src/uts/i86pc/ioat/Makefile usr/src/uts/i86pc/sys/ioat.h usr/src/uts/i86xpv/Makefile.files usr/src/uts/i86xpv/Makefile.i86xpv.shared usr/src/uts/i86xpv/Makefile.rules usr/src/uts/i86xpv/ioat/Makefile usr/src/uts/intel/Makefile.files usr/src/uts/intel/Makefile.intel.shared usr/src/uts/intel/dcopy/Makefile usr/src/uts/intel/ia32/ml/modstubs.s usr/src/uts/sparc/ml/modstubs.s
diffstat 71 files changed, 6522 insertions(+), 5058 deletions(-) [+]
line wrap: on
line diff
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/Makefile	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-#
- 
-include ../Makefile.com
-
-TMPLFILES += postinstall preremove
-DATAFILES += depend
-
-.KEEP_STATE:
-
-all: $(FILES)
-install: all pkg
-
-include ../Makefile.targ
-include ../Makefile.prtarg
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
-
-#
-# This required package information file describes characteristics of the
-# package, such as package abbreviation, full package name, package version,
-# and package architecture.
-#
-PKG="SUNWdcopy"
-NAME="Sun dcopy DMA drivers"
-ARCH="i386"
-CATEGORY="system"
-BASEDIR=/
-SUNW_PKGVERS="1.0"
-SUNW_PKGTYPE="root"
-CLASSES="none"
-DESC="Sun dcopy DMA drivers"
-SUNW_PRODNAME="SunOS"
-SUNW_PRODVERS="RELEASE/VERSION"
-VERSION="ONVERS,REV=0.0.0"
-VENDOR="Sun Microsystems, Inc."
-HOTLINE="Please contact your local service provider"
-EMAIL=""
-MAXINST="1000"
-SUNW_PKG_ALLZONES="true"
-SUNW_PKG_HOLLOW="true"
-SUNW_PKG_THISZONE="false"
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-
-include drv_utils
-
-CB1='"pciex8086,1a38" "pciex8086,360b"'
-CB2='"pciex8086,402f"'
-
-pkg_drvadd -i "'$CB1 $CB2'" ioat || exit 1
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-#!/sbin/sh
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-
-include drv_utils
-
-pkg_drvrem ioat || exit 1
-
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_com	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-# This required package information file contains a list of package contents.
-# The 'pkgmk' command uses this file to identify the contents of a package
-# and their location on the development machine when building the package.
-# Can be created via a text editor or through use of the 'pkgproto' command.
-
-#!search <pathname pathname ...>	# where to find pkg objects
-#!include <filename>			# include another 'prototype' file
-#!default <mode> <owner> <group>	# default used if not specified on entry
-#!<param>=<value>			# puts parameter in pkg environment
-
-#
-# packaging files
-i copyright
-i depend
-i pkginfo
-i postinstall
-i preremove
-
-#
-# source locations relative to the prototype file
-#
-#
-# SUNWdcopy
-#
-d none kernel 0755 root sys
-d none kernel/misc 0755 root sys
-f none kernel/misc/dcopy 0755 root sys
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_i386	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-# ident	"%Z%%M%	%I%	%E% SMI"
-#
-# This required package information file contains a list of package contents.
-# The 'pkgmk' command uses this file to identify the contents of a package
-# and their location on the development machine when building the package.
-# Can be created via a text editor or through use of the 'pkgproto' command.
-
-#!search <pathname pathname ...>	# where to find pkg objects
-#!include <filename>			# include another 'prototype' file
-#!default <mode> <owner> <group>	# default used if not specified on entry
-#!<param>=<value>			# puts parameter in pkg environment
-#
-#
-# Include ISA independent files (prototype_com)
-#
-!include prototype_com
-#
-#
-# List files which are i386 specific here
-#
-# SUNWioat
-#
-d none kernel/misc/amd64 0755 root sys
-f none kernel/misc/amd64/dcopy 0755 root sys
-d none platform 0755 root sys
-d none platform/i86pc 0755 root sys
-d none platform/i86pc/kernel 0755 root sys
-d none platform/i86pc/kernel/drv 0755 root sys
-f none platform/i86pc/kernel/drv/ioat 755 root sys
-f none platform/i86pc/kernel/drv/ioat.conf 644 root sys
-d none platform/i86pc/kernel/drv/amd64 0755 root sys
-f none platform/i86pc/kernel/drv/amd64/ioat 755 root sys
-d none platform/i86xpv 0755 root sys
-d none platform/i86xpv/kernel 0755 root sys
-d none platform/i86xpv/kernel/drv 0755 root sys
-f none platform/i86xpv/kernel/drv/ioat 755 root sys
-f none platform/i86xpv/kernel/drv/ioat.conf 644 root sys
-d none platform/i86xpv/kernel/drv/amd64 0755 root sys
-f none platform/i86xpv/kernel/drv/amd64/ioat 755 root sys
--- a/deleted_files/usr/src/uts/common/io/dcopy.c	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,932 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-/*
- * dcopy.c
- *    dcopy misc module
- */
-
-#include <sys/conf.h>
-#include <sys/kmem.h>
-#include <sys/ddi.h>
-#include <sys/sunddi.h>
-#include <sys/modctl.h>
-#include <sys/sysmacros.h>
-#include <sys/atomic.h>
-
-
-#include <sys/dcopy.h>
-#include <sys/dcopy_device.h>
-
-
-/* Number of entries per channel to allocate */
-uint_t dcopy_channel_size = 1024;
-
-
-typedef struct dcopy_list_s {
-	list_t			dl_list;
-	kmutex_t		dl_mutex;
-	uint_t			dl_cnt; /* num entries on list */
-} dcopy_list_t;
-
-/* device state for register/unregister */
-struct dcopy_device_s {
-	/* DMA device drivers private pointer */
-	void			*dc_device_private;
-
-	/* to track list of channels from this DMA device */
-	dcopy_list_t		dc_devchan_list;
-	list_node_t		dc_device_list_node;
-
-	/*
-	 * dc_removing_cnt track how many channels still have to be freed up
-	 * before it's safe to allow the DMA device driver to detach.
-	 */
-	uint_t			dc_removing_cnt;
-	dcopy_device_cb_t	*dc_cb;
-
-	dcopy_device_info_t	dc_info;
-
-};
-
-typedef struct dcopy_stats_s {
-	kstat_named_t	cs_bytes_xfer;
-	kstat_named_t	cs_cmd_alloc;
-	kstat_named_t	cs_cmd_post;
-	kstat_named_t	cs_cmd_poll;
-	kstat_named_t	cs_notify_poll;
-	kstat_named_t	cs_notify_pending;
-	kstat_named_t	cs_id;
-	kstat_named_t	cs_capabilities;
-} dcopy_stats_t;
-
-/* DMA channel state */
-struct dcopy_channel_s {
-	/* DMA driver channel private pointer */
-	void			*ch_channel_private;
-
-	/* shortcut to device callbacks */
-	dcopy_device_cb_t	*ch_cb;
-
-	/*
-	 * number of outstanding allocs for this channel. used to track when
-	 * it's safe to free up this channel so the DMA device driver can
-	 * detach.
-	 */
-	uint64_t		ch_ref_cnt;
-
-	/* state for if channel needs to be removed when ch_ref_cnt gets to 0 */
-	boolean_t		ch_removing;
-
-	list_node_t		ch_devchan_list_node;
-	list_node_t		ch_globalchan_list_node;
-
-	/*
-	 * per channel list of commands actively blocking waiting for
-	 * completion.
-	 */
-	dcopy_list_t		ch_poll_list;
-
-	/* pointer back to our device */
-	struct dcopy_device_s	*ch_device;
-
-	dcopy_query_channel_t	ch_info;
-
-	kstat_t			*ch_kstat;
-	dcopy_stats_t		ch_stat;
-};
-
-/*
- * If grabbing both device_list mutex & globalchan_list mutex,
- * Always grab globalchan_list mutex before device_list mutex
- */
-typedef struct dcopy_state_s {
-	dcopy_list_t		d_device_list;
-	dcopy_list_t		d_globalchan_list;
-} dcopy_state_t;
-dcopy_state_t *dcopy_statep;
-
-
-/* Module Driver Info */
-static struct modlmisc dcopy_modlmisc = {
-	&mod_miscops,
-	"dcopy kernel module"
-};
-
-/* Module Linkage */
-static struct modlinkage dcopy_modlinkage = {
-	MODREV_1,
-	&dcopy_modlmisc,
-	NULL
-};
-
-static int dcopy_init();
-static void dcopy_fini();
-
-static int dcopy_list_init(dcopy_list_t *list, size_t node_size,
-    offset_t link_offset);
-static void dcopy_list_fini(dcopy_list_t *list);
-static void dcopy_list_push(dcopy_list_t *list, void *list_node);
-static void *dcopy_list_pop(dcopy_list_t *list);
-
-static void dcopy_device_cleanup(dcopy_device_handle_t device,
-    boolean_t do_callback);
-
-static int dcopy_stats_init(dcopy_handle_t channel);
-static void dcopy_stats_fini(dcopy_handle_t channel);
-
-
-/*
- * _init()
- */
-int
-_init()
-{
-	int e;
-
-	e = dcopy_init();
-	if (e != 0) {
-		return (e);
-	}
-
-	return (mod_install(&dcopy_modlinkage));
-}
-
-
-/*
- * _info()
- */
-int
-_info(struct modinfo *modinfop)
-{
-	return (mod_info(&dcopy_modlinkage, modinfop));
-}
-
-
-/*
- * _fini()
- */
-int
-_fini()
-{
-	int e;
-
-	e = mod_remove(&dcopy_modlinkage);
-	if (e != 0) {
-		return (e);
-	}
-
-	dcopy_fini();
-
-	return (e);
-}
-
-/*
- * dcopy_init()
- */
-static int
-dcopy_init()
-{
-	int e;
-
-
-	dcopy_statep = kmem_zalloc(sizeof (*dcopy_statep), KM_SLEEP);
-
-	/* Initialize the list we use to track device register/unregister */
-	e = dcopy_list_init(&dcopy_statep->d_device_list,
-	    sizeof (struct dcopy_device_s),
-	    offsetof(struct dcopy_device_s, dc_device_list_node));
-	if (e != DCOPY_SUCCESS) {
-		goto dcopyinitfail_device;
-	}
-
-	/* Initialize the list we use to track all DMA channels */
-	e = dcopy_list_init(&dcopy_statep->d_globalchan_list,
-	    sizeof (struct dcopy_channel_s),
-	    offsetof(struct dcopy_channel_s, ch_globalchan_list_node));
-	if (e != DCOPY_SUCCESS) {
-		goto dcopyinitfail_global;
-	}
-
-	return (0);
-
-dcopyinitfail_cback:
-	dcopy_list_fini(&dcopy_statep->d_globalchan_list);
-dcopyinitfail_global:
-	dcopy_list_fini(&dcopy_statep->d_device_list);
-dcopyinitfail_device:
-	kmem_free(dcopy_statep, sizeof (*dcopy_statep));
-
-	return (-1);
-}
-
-
-/*
- * dcopy_fini()
- */
-static void
-dcopy_fini()
-{
-	/*
-	 * if mod_remove was successfull, we shouldn't have any
-	 * devices/channels to worry about.
-	 */
-	ASSERT(list_head(&dcopy_statep->d_globalchan_list.dl_list) == NULL);
-	ASSERT(list_head(&dcopy_statep->d_device_list.dl_list) == NULL);
-
-	dcopy_list_fini(&dcopy_statep->d_globalchan_list);
-	dcopy_list_fini(&dcopy_statep->d_device_list);
-	kmem_free(dcopy_statep, sizeof (*dcopy_statep));
-}
-
-
-/* *** EXTERNAL INTERFACE *** */
-/*
- * dcopy_query()
- */
-void
-dcopy_query(dcopy_query_t *query)
-{
-	query->dq_version = DCOPY_QUERY_V0;
-	query->dq_num_channels = dcopy_statep->d_globalchan_list.dl_cnt;
-}
-
-
-/*
- * dcopy_alloc()
- */
-/*ARGSUSED*/
-int
-dcopy_alloc(int flags, dcopy_handle_t *handle)
-{
-	dcopy_handle_t channel;
-	dcopy_list_t *list;
-
-
-	/*
-	 * we don't use the dcopy_list_* code here because we need to due
-	 * some non-standard stuff.
-	 */
-
-	list = &dcopy_statep->d_globalchan_list;
-
-	/*
-	 * if nothing is on the channel list, return DCOPY_NORESOURCES. This
-	 * can happen if there aren't any DMA device registered.
-	 */
-	mutex_enter(&list->dl_mutex);
-	channel = list_head(&list->dl_list);
-	if (channel == NULL) {
-		mutex_exit(&list->dl_mutex);
-		return (DCOPY_NORESOURCES);
-	}
-
-	/*
-	 * increment the reference count, and pop the channel off the head and
-	 * push it on the tail. This ensures we rotate through the channels.
-	 * DMA channels are shared.
-	 */
-	channel->ch_ref_cnt++;
-	list_remove(&list->dl_list, channel);
-	list_insert_tail(&list->dl_list, channel);
-	mutex_exit(&list->dl_mutex);
-
-	*handle = (dcopy_handle_t)channel;
-	return (DCOPY_SUCCESS);
-}
-
-
-/*
- * dcopy_free()
- */
-void
-dcopy_free(dcopy_handle_t *channel)
-{
-	dcopy_device_handle_t device;
-	dcopy_list_t *list;
-	boolean_t cleanup;
-
-
-	ASSERT(*channel != NULL);
-
-	/*
-	 * we don't need to add the channel back to the list since we never
-	 * removed it. decrement the reference count.
-	 */
-	list = &dcopy_statep->d_globalchan_list;
-	mutex_enter(&list->dl_mutex);
-	(*channel)->ch_ref_cnt--;
-
-	/*
-	 * if we need to remove this channel, and the reference count is down
-	 * to 0, decrement the number of channels which still need to be
-	 * removed on the device.
-	 */
-	if ((*channel)->ch_removing && ((*channel)->ch_ref_cnt == 0)) {
-		cleanup = B_FALSE;
-		device = (*channel)->ch_device;
-		mutex_enter(&device->dc_devchan_list.dl_mutex);
-		device->dc_removing_cnt--;
-		if (device->dc_removing_cnt == 0) {
-			cleanup = B_TRUE;
-		}
-		mutex_exit(&device->dc_devchan_list.dl_mutex);
-	}
-	mutex_exit(&list->dl_mutex);
-
-	/*
-	 * if there are no channels which still need to be removed, cleanup the
-	 * device state and call back into the DMA device driver to tell them
-	 * the device is free.
-	 */
-	if (cleanup) {
-		dcopy_device_cleanup(device, B_TRUE);
-	}
-
-	*channel = NULL;
-}
-
-
-/*
- * dcopy_query_channel()
- */
-void
-dcopy_query_channel(dcopy_handle_t channel, dcopy_query_channel_t *query)
-{
-	*query = channel->ch_info;
-}
-
-
-/*
- * dcopy_cmd_alloc()
- */
-int
-dcopy_cmd_alloc(dcopy_handle_t handle, int flags, dcopy_cmd_t *cmd)
-{
-	dcopy_handle_t channel;
-	dcopy_cmd_priv_t priv;
-	int e;
-
-
-	channel = handle;
-
-	atomic_inc_64(&channel->ch_stat.cs_cmd_alloc.value.ui64);
-	e = channel->ch_cb->cb_cmd_alloc(channel->ch_channel_private, flags,
-	    cmd);
-	if (e == DCOPY_SUCCESS) {
-		priv = (*cmd)->dp_private;
-		priv->pr_channel = channel;
-		/*
-		 * we won't initialize the blocking state until we actually
-		 * need to block.
-		 */
-		priv->pr_block_init = B_FALSE;
-	}
-
-	return (e);
-}
-
-
-/*
- * dcopy_cmd_free()
- */
-void
-dcopy_cmd_free(dcopy_cmd_t *cmd)
-{
-	dcopy_handle_t channel;
-	dcopy_cmd_priv_t priv;
-
-
-	ASSERT(*cmd != NULL);
-
-	priv = (*cmd)->dp_private;
-	channel = priv->pr_channel;
-
-	/* if we initialized the blocking state, clean it up too */
-	if (priv->pr_block_init) {
-		cv_destroy(&priv->pr_cv);
-		mutex_destroy(&priv->pr_mutex);
-	}
-
-	channel->ch_cb->cb_cmd_free(channel->ch_channel_private, cmd);
-}
-
-
-/*
- * dcopy_cmd_post()
- */
-int
-dcopy_cmd_post(dcopy_cmd_t cmd)
-{
-	dcopy_handle_t channel;
-	int e;
-
-
-	channel = cmd->dp_private->pr_channel;
-
-	atomic_inc_64(&channel->ch_stat.cs_cmd_post.value.ui64);
-	if (cmd->dp_cmd == DCOPY_CMD_COPY) {
-		atomic_add_64(&channel->ch_stat.cs_bytes_xfer.value.ui64,
-		    cmd->dp.copy.cc_size);
-	}
-	e = channel->ch_cb->cb_cmd_post(channel->ch_channel_private, cmd);
-	if (e != DCOPY_SUCCESS) {
-		return (e);
-	}
-
-	return (DCOPY_SUCCESS);
-}
-
-
-/*
- * dcopy_cmd_poll()
- */
-int
-dcopy_cmd_poll(dcopy_cmd_t cmd, int flags)
-{
-	dcopy_handle_t channel;
-	dcopy_cmd_priv_t priv;
-	int e;
-
-
-	priv = cmd->dp_private;
-	channel = priv->pr_channel;
-
-	/*
-	 * if the caller is trying to block, they needed to post the
-	 * command with DCOPY_CMD_INTR set.
-	 */
-	if ((flags & DCOPY_POLL_BLOCK) && !(cmd->dp_flags & DCOPY_CMD_INTR)) {
-		return (DCOPY_FAILURE);
-	}
-
-	atomic_inc_64(&channel->ch_stat.cs_cmd_poll.value.ui64);
-
-repoll:
-	e = channel->ch_cb->cb_cmd_poll(channel->ch_channel_private, cmd);
-	if (e == DCOPY_PENDING) {
-		/*
-		 * if the command is still active, and the blocking flag
-		 * is set.
-		 */
-		if (flags & DCOPY_POLL_BLOCK) {
-
-			/*
-			 * if we haven't initialized the state, do it now. A
-			 * command can be re-used, so it's possible it's
-			 * already been initialized.
-			 */
-			if (!priv->pr_block_init) {
-				priv->pr_block_init = B_TRUE;
-				mutex_init(&priv->pr_mutex, NULL, MUTEX_DRIVER,
-				    NULL);
-				cv_init(&priv->pr_cv, NULL, CV_DRIVER, NULL);
-				priv->pr_cmd = cmd;
-			}
-
-			/* push it on the list for blocking commands */
-			priv->pr_wait = B_TRUE;
-			dcopy_list_push(&channel->ch_poll_list, priv);
-
-			mutex_enter(&priv->pr_mutex);
-			/*
-			 * it's possible we already cleared pr_wait before we
-			 * grabbed the mutex.
-			 */
-			if (priv->pr_wait) {
-				cv_wait(&priv->pr_cv, &priv->pr_mutex);
-			}
-			mutex_exit(&priv->pr_mutex);
-
-			/*
-			 * the command has completed, go back and poll so we
-			 * get the status.
-			 */
-			goto repoll;
-		}
-	}
-
-	return (e);
-}
-
-/* *** END OF EXTERNAL INTERFACE *** */
-
-/*
- * dcopy_list_init()
- */
-static int
-dcopy_list_init(dcopy_list_t *list, size_t node_size, offset_t link_offset)
-{
-	mutex_init(&list->dl_mutex, NULL, MUTEX_DRIVER, NULL);
-	list_create(&list->dl_list, node_size, link_offset);
-	list->dl_cnt = 0;
-
-	return (DCOPY_SUCCESS);
-}
-
-
-/*
- * dcopy_list_fini()
- */
-static void
-dcopy_list_fini(dcopy_list_t *list)
-{
-	list_destroy(&list->dl_list);
-	mutex_destroy(&list->dl_mutex);
-}
-
-
-/*
- * dcopy_list_push()
- */
-static void
-dcopy_list_push(dcopy_list_t *list, void *list_node)
-{
-	mutex_enter(&list->dl_mutex);
-	list_insert_tail(&list->dl_list, list_node);
-	list->dl_cnt++;
-	mutex_exit(&list->dl_mutex);
-}
-
-
-/*
- * dcopy_list_pop()
- */
-static void *
-dcopy_list_pop(dcopy_list_t *list)
-{
-	list_node_t *list_node;
-
-	mutex_enter(&list->dl_mutex);
-	list_node = list_head(&list->dl_list);
-	if (list_node == NULL) {
-		mutex_exit(&list->dl_mutex);
-		return (list_node);
-	}
-	list->dl_cnt--;
-	list_remove(&list->dl_list, list_node);
-	mutex_exit(&list->dl_mutex);
-
-	return (list_node);
-}
-
-
-/* *** DEVICE INTERFACE *** */
-/*
- * dcopy_device_register()
- */
-int
-dcopy_device_register(void *device_private, dcopy_device_info_t *info,
-    dcopy_device_handle_t *handle)
-{
-	struct dcopy_channel_s *channel;
-	struct dcopy_device_s *device;
-	int e;
-	int i;
-
-
-	/* initialize the per device state */
-	device = kmem_zalloc(sizeof (*device), KM_SLEEP);
-	device->dc_device_private = device_private;
-	device->dc_info = *info;
-	device->dc_removing_cnt = 0;
-	device->dc_cb = info->di_cb;
-
-	/*
-	 * we have a per device channel list so we can remove a device in the
-	 * future.
-	 */
-	e = dcopy_list_init(&device->dc_devchan_list,
-	    sizeof (struct dcopy_channel_s),
-	    offsetof(struct dcopy_channel_s, ch_devchan_list_node));
-	if (e != DCOPY_SUCCESS) {
-		goto registerfail_devchan;
-	}
-
-	/*
-	 * allocate state for each channel, allocate the channel,  and then add
-	 * the devices dma channels to the devices channel list.
-	 */
-	for (i = 0; i < info->di_num_dma; i++) {
-		channel = kmem_zalloc(sizeof (*channel), KM_SLEEP);
-		channel->ch_device = device;
-		channel->ch_removing = B_FALSE;
-		channel->ch_ref_cnt = 0;
-		channel->ch_cb = info->di_cb;
-
-		e = info->di_cb->cb_channel_alloc(device_private, channel,
-		    DCOPY_SLEEP, dcopy_channel_size, &channel->ch_info,
-		    &channel->ch_channel_private);
-		if (e != DCOPY_SUCCESS) {
-			kmem_free(channel, sizeof (*channel));
-			goto registerfail_alloc;
-		}
-
-		e = dcopy_stats_init(channel);
-		if (e != DCOPY_SUCCESS) {
-			info->di_cb->cb_channel_free(
-			    &channel->ch_channel_private);
-			kmem_free(channel, sizeof (*channel));
-			goto registerfail_alloc;
-		}
-
-		e = dcopy_list_init(&channel->ch_poll_list,
-		    sizeof (struct dcopy_cmd_priv_s),
-		    offsetof(struct dcopy_cmd_priv_s, pr_poll_list_node));
-		if (e != DCOPY_SUCCESS) {
-			dcopy_stats_fini(channel);
-			info->di_cb->cb_channel_free(
-			    &channel->ch_channel_private);
-			kmem_free(channel, sizeof (*channel));
-			goto registerfail_alloc;
-		}
-
-		dcopy_list_push(&device->dc_devchan_list, channel);
-	}
-
-	/* add the device to device list */
-	dcopy_list_push(&dcopy_statep->d_device_list, device);
-
-	/*
-	 * add the device's dma channels to the global channel list (where
-	 * dcopy_alloc's come from)
-	 */
-	mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex);
-	mutex_enter(&dcopy_statep->d_device_list.dl_mutex);
-	channel = list_head(&device->dc_devchan_list.dl_list);
-	while (channel != NULL) {
-		list_insert_tail(&dcopy_statep->d_globalchan_list.dl_list,
-		    channel);
-		dcopy_statep->d_globalchan_list.dl_cnt++;
-		channel = list_next(&device->dc_devchan_list.dl_list, channel);
-	}
-	mutex_exit(&dcopy_statep->d_device_list.dl_mutex);
-	mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex);
-
-	*handle = device;
-	return (DCOPY_SUCCESS);
-
-registerfail_alloc:
-	channel = list_head(&device->dc_devchan_list.dl_list);
-	while (channel != NULL) {
-		/* remove from the list */
-		channel = dcopy_list_pop(&device->dc_devchan_list);
-		ASSERT(channel != NULL);
-
-		dcopy_list_fini(&channel->ch_poll_list);
-		dcopy_stats_fini(channel);
-		info->di_cb->cb_channel_free(&channel->ch_channel_private);
-		kmem_free(channel, sizeof (*channel));
-	}
-
-	dcopy_list_fini(&device->dc_devchan_list);
-registerfail_devchan:
-	kmem_free(device, sizeof (*device));
-
-	return (DCOPY_FAILURE);
-}
-
-
-/*
- * dcopy_device_unregister()
- */
-/*ARGSUSED*/
-int
-dcopy_device_unregister(dcopy_device_handle_t *handle)
-{
-	struct dcopy_channel_s *channel;
-	dcopy_device_handle_t device;
-	boolean_t device_busy;
-
-
-	device = *handle;
-	device_busy = B_FALSE;
-
-	/*
-	 * remove the devices dma channels from the global channel list (where
-	 * dcopy_alloc's come from)
-	 */
-	mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex);
-	mutex_enter(&device->dc_devchan_list.dl_mutex);
-	channel = list_head(&device->dc_devchan_list.dl_list);
-	while (channel != NULL) {
-		/*
-		 * if the channel has outstanding allocs, mark it as having
-		 * to be removed and increment the number of channels which
-		 * need to be removed in the device state too.
-		 */
-		if (channel->ch_ref_cnt != 0) {
-			channel->ch_removing = B_TRUE;
-			device_busy = B_TRUE;
-			device->dc_removing_cnt++;
-		}
-		dcopy_statep->d_globalchan_list.dl_cnt--;
-		list_remove(&dcopy_statep->d_globalchan_list.dl_list, channel);
-		channel = list_next(&device->dc_devchan_list.dl_list, channel);
-	}
-	mutex_exit(&device->dc_devchan_list.dl_mutex);
-	mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex);
-
-	/*
-	 * if there are channels which still need to be removed, we will clean
-	 * up the device state after they are freed up.
-	 */
-	if (device_busy) {
-		return (DCOPY_PENDING);
-	}
-
-	dcopy_device_cleanup(device, B_FALSE);
-
-	*handle = NULL;
-	return (DCOPY_SUCCESS);
-}
-
-
-/*
- * dcopy_device_cleanup()
- */
-static void
-dcopy_device_cleanup(dcopy_device_handle_t device, boolean_t do_callback)
-{
-	struct dcopy_channel_s *channel;
-
-	/*
-	 * remove all the channels in the device list, free them, and clean up
-	 * the state.
-	 */
-	mutex_enter(&dcopy_statep->d_device_list.dl_mutex);
-	channel = list_head(&device->dc_devchan_list.dl_list);
-	while (channel != NULL) {
-		device->dc_devchan_list.dl_cnt--;
-		list_remove(&device->dc_devchan_list.dl_list, channel);
-		dcopy_list_fini(&channel->ch_poll_list);
-		dcopy_stats_fini(channel);
-		channel->ch_cb->cb_channel_free(&channel->ch_channel_private);
-		kmem_free(channel, sizeof (*channel));
-		channel = list_head(&device->dc_devchan_list.dl_list);
-	}
-
-	/* remove it from the list of devices */
-	list_remove(&dcopy_statep->d_device_list.dl_list, device);
-
-	mutex_exit(&dcopy_statep->d_device_list.dl_mutex);
-
-	/*
-	 * notify the DMA device driver that the device is free to be
-	 * detached.
-	 */
-	if (do_callback) {
-		device->dc_cb->cb_unregister_complete(
-		    device->dc_device_private, DCOPY_SUCCESS);
-	}
-
-	dcopy_list_fini(&device->dc_devchan_list);
-	kmem_free(device, sizeof (*device));
-}
-
-
-/*
- * dcopy_device_channel_notify()
- */
-/*ARGSUSED*/
-void
-dcopy_device_channel_notify(dcopy_handle_t handle, int status)
-{
-	struct dcopy_channel_s *channel;
-	dcopy_list_t *poll_list;
-	dcopy_cmd_priv_t priv;
-	int e;
-
-
-	ASSERT(status == DCOPY_COMPLETION);
-	channel = handle;
-
-	poll_list = &channel->ch_poll_list;
-
-	/*
-	 * when we get a completion notification from the device, go through
-	 * all of the commands blocking on this channel and see if they have
-	 * completed. Remove the command and wake up the block thread if they
-	 * have. Once we hit a command which is still pending, we are done
-	 * polling since commands in a channel complete in order.
-	 */
-	mutex_enter(&poll_list->dl_mutex);
-	if (poll_list->dl_cnt != 0) {
-		priv = list_head(&poll_list->dl_list);
-		while (priv != NULL) {
-			atomic_inc_64(&channel->
-			    ch_stat.cs_notify_poll.value.ui64);
-			e = channel->ch_cb->cb_cmd_poll(
-			    channel->ch_channel_private,
-			    priv->pr_cmd);
-			if (e == DCOPY_PENDING) {
-				atomic_inc_64(&channel->
-				    ch_stat.cs_notify_pending.value.ui64);
-				break;
-			}
-
-			poll_list->dl_cnt--;
-			list_remove(&poll_list->dl_list, priv);
-
-			mutex_enter(&priv->pr_mutex);
-			priv->pr_wait = B_FALSE;
-			cv_signal(&priv->pr_cv);
-			mutex_exit(&priv->pr_mutex);
-
-			priv = list_head(&poll_list->dl_list);
-		}
-	}
-
-	mutex_exit(&poll_list->dl_mutex);
-}
-
-
-/*
- * dcopy_stats_init()
- */
-static int
-dcopy_stats_init(dcopy_handle_t channel)
-{
-#define	CHANSTRSIZE	20
-	char chanstr[CHANSTRSIZE];
-	dcopy_stats_t *stats;
-	int instance;
-	char *name;
-
-
-	stats = &channel->ch_stat;
-	name = (char *)ddi_driver_name(channel->ch_device->dc_info.di_dip);
-	instance = ddi_get_instance(channel->ch_device->dc_info.di_dip);
-
-	(void) snprintf(chanstr, CHANSTRSIZE, "channel%d",
-	    (uint32_t)channel->ch_info.qc_chan_num);
-
-	channel->ch_kstat = kstat_create(name, instance, chanstr, "misc",
-	    KSTAT_TYPE_NAMED, sizeof (dcopy_stats_t) / sizeof (kstat_named_t),
-	    KSTAT_FLAG_VIRTUAL);
-	if (channel->ch_kstat == NULL) {
-		return (DCOPY_FAILURE);
-	}
-	channel->ch_kstat->ks_data = stats;
-
-	kstat_named_init(&stats->cs_bytes_xfer, "bytes_xfer",
-	    KSTAT_DATA_UINT64);
-	kstat_named_init(&stats->cs_cmd_alloc, "cmd_alloc",
-	    KSTAT_DATA_UINT64);
-	kstat_named_init(&stats->cs_cmd_post, "cmd_post",
-	    KSTAT_DATA_UINT64);
-	kstat_named_init(&stats->cs_cmd_poll, "cmd_poll",
-	    KSTAT_DATA_UINT64);
-	kstat_named_init(&stats->cs_notify_poll, "notify_poll",
-	    KSTAT_DATA_UINT64);
-	kstat_named_init(&stats->cs_notify_pending, "notify_pending",
-	    KSTAT_DATA_UINT64);
-	kstat_named_init(&stats->cs_id, "id",
-	    KSTAT_DATA_UINT64);
-	kstat_named_init(&stats->cs_capabilities, "capabilities",
-	    KSTAT_DATA_UINT64);
-
-	kstat_install(channel->ch_kstat);
-
-	channel->ch_stat.cs_id.value.ui64 = channel->ch_info.qc_id;
-	channel->ch_stat.cs_capabilities.value.ui64 =
-	    channel->ch_info.qc_capabilities;
-
-	return (DCOPY_SUCCESS);
-}
-
-
-/*
- * dcopy_stats_fini()
- */
-static void
-dcopy_stats_fini(dcopy_handle_t channel)
-{
-	kstat_delete(channel->ch_kstat);
-}
-/* *** END OF DEVICE INTERFACE *** */
--- a/deleted_files/usr/src/uts/common/sys/dcopy.h	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,235 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DCOPY_H
-#define	_SYS_DCOPY_H
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/types.h>
-
-/*
- * *** This interface is for private use by the IP stack only ***
- */
-
-/* Function return status */
-#define	DCOPY_FAILURE		(-1)
-#define	DCOPY_SUCCESS		(0)
-#define	DCOPY_NORESOURCES	(1) /* _alloc & _cmd_alloc, _cmd_post only */
-#define	DCOPY_PENDING		(0x10) /* dcopy_poll(), dcopy_unregister() */
-#define	DCOPY_COMPLETED		(0x20) /* dcopy_poll() only */
-
-
-/* dq_version */
-#define	DCOPY_QUERY_V0	0
-
-typedef struct dcopy_query_s {
-	int		dq_version; /* DCOPY_QUERY_V0 */
-	uint_t		dq_num_channels; /* number of dma channels */
-} dcopy_query_t;
-
-/*
- * dcopy_query()
- *   query for the number of DMA engines usable in the system.
- */
-void dcopy_query(dcopy_query_t *query);
-
-
-typedef struct dcopy_channel_s *dcopy_handle_t;
-
-/* dcopy_alloc() and dcopy_cmd_alloc() common flags */
-#define	DCOPY_SLEEP	(0)
-#define	DCOPY_NOSLEEP	(1 << 0)
-
-/*
- * dcopy_alloc()
- *   Allocate a DMA channel which is used for posting DMA requests. Note: this
- *   does not give the caller exclusive access to the DMA engine. Commands
- *   posted to a channel will complete in order.
- *     flags - (DCOPY_SLEEP, DCOPY_NOSLEEP)
- *     returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES
- */
-int dcopy_alloc(int flags, dcopy_handle_t *handle);
-
-/*
- * dcopy_free()
- *   Free the DMA channel. The client can no longer use the handle to post or
- *   poll for status on posts which were previously done on this channel.
- */
-void dcopy_free(dcopy_handle_t *handle);
-
-/* dq_version */
-#define	DCOPY_QUERY_CHANNEL_V0	0
-
-/* Per DMA channel info */
-typedef struct dcopy_query_channel_s {
-	int		qc_version; /* DCOPY_QUERY_CHANNEL_V0 */
-
-	/* Does DMA channel support DCA */
-	boolean_t	qc_dca_supported;
-
-	/* device id and device specific capabilities */
-	uint64_t	qc_id;
-	uint64_t	qc_capabilities;
-
-	/*
-	 * DMA channel size. This may not be the same as the number of posts
-	 * that the DMA channel can handle since a post may consume 1 or more
-	 * entries.
-	 */
-	uint64_t	qc_channel_size;
-
-	/* DMA channel number within the device. Not unique across devices */
-	uint64_t	qc_chan_num;
-} dcopy_query_channel_t;
-
-/*
- * dcopy_query_channel()
- *   query DMA engines capabilities
- */
-void dcopy_query_channel(dcopy_handle_t handle, dcopy_query_channel_t *query);
-
-
-/* dp_version */
-#define	DCOPY_CMD_V0	0
-
-/* dp_cmd */
-#define	DCOPY_CMD_COPY	0x1
-
-/* dp_flags */
-/*
- * DCOPY_CMD_QUEUE
- *    Hint to queue up the post but don't notify the DMA engine. This can be
- *    used as an optimization when multiple posts are going to be queued up and
- *    you only want notify the DMA engine after the last post. Note, this does
- *    not mean the DMA engine won't process the request since it could notice
- *    it anyway.
- * DCOPY_CMD_NOSTAT
- *    Don't generate a status. If this flag is used, You cannot poll for
- *    completion status on this command. This can be a useful performance
- *    optimization if your posting multiple commands and just want to poll on
- *    the last command.
- * DCOPY_CMD_DCA
- *    If DCA is supported, direct this and all future command data (until the
- *    next command with DCOPY_POST_DCA set) to the processor specified in
- *    dp_dca_id. This flag is ignored if DCA is not supported.
- * DCOPY_CMD_INTR
- *    Generate an interrupt when command completes. This flag is required if
- *    the caller is going to call dcopy_cmd_poll(() with DCOPY_POLL_BLOCK set
- *    for this command.
- */
-#define	DCOPY_CMD_NOFLAGS	(0)
-#define	DCOPY_CMD_QUEUE		(1 << 0)
-#define	DCOPY_CMD_NOSTAT	(1 << 1)
-#define	DCOPY_CMD_DCA		(1 << 2)
-#define	DCOPY_CMD_INTR		(1 << 3)
-
-typedef struct dcopy_cmd_copy_s {
-	uint64_t	cc_source; /* Source physical address */
-	uint64_t	cc_dest; /* Destination physical address */
-	size_t		cc_size;
-} dcopy_cmd_copy_t;
-
-typedef union dcopy_cmd_u {
-	dcopy_cmd_copy_t	copy;
-} dcopy_cmd_u_t;
-
-typedef struct dcopy_cmd_priv_s *dcopy_cmd_priv_t;
-
-struct dcopy_cmd_s {
-	uint_t			dp_version; /* DCOPY_CMD_V0 */
-	uint_t			dp_flags;
-	uint64_t		dp_cmd;
-	dcopy_cmd_u_t   	dp;
-	uint32_t		dp_dca_id;
-	dcopy_cmd_priv_t	dp_private;
-};
-typedef struct dcopy_cmd_s *dcopy_cmd_t;
-
-
-/*
- * dcopy_cmd_alloc() specific flags
- *   DCOPY_ALLOC_LINK - when set, the caller passes in a previously alloced
- *     command in cmd. dcopy_cmd_alloc() will allocate a new command and
- *     link it to the old command. The caller can use this to build a
- *     chain of commands, keeping only the last cmd alloced. calling
- *     dcopy_cmd_free() with the last cmd alloced in the chain will free all of
- *     the commands in the chain. dcopy_cmd_post() and dcopy_cmd_poll() have
- *     no knowledge of a chain of commands.  It's only used for alloc/free.
- */
-#define	DCOPY_ALLOC_LINK	(1 << 16)
-
-/*
- * dcopy_cmd_alloc()
- *   allocate a command. A command can be re-used after it completes.
- *     flags - (DCOPY_SLEEP || DCOPY_NOSLEEP), DCOPY_ALLOC_LINK
- *     returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES
- */
-int dcopy_cmd_alloc(dcopy_handle_t handle, int flags, dcopy_cmd_t *cmd);
-
-/*
- * dcopy_cmd_free()
- *   free the command. This call cannot be called after dcopy_free().
- */
-void dcopy_cmd_free(dcopy_cmd_t *cmd);
-
-/*
- * dcopy_cmd_post()
- *   post a command (allocated from dcopy_cmd_alloc()) to the DMA channel
- *     returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES
- */
-int dcopy_cmd_post(dcopy_cmd_t cmd);
-
-/* dcopy_cmd_poll() flags */
-#define	DCOPY_POLL_NOFLAGS	(0)
-#define	DCOPY_POLL_BLOCK	(1 << 0)
-
-/*
- * dcopy_cmd_poll()
- *   poll on completion status of a previous post. This call cannot be called
- *   after dcopy_free().
- *
- *   if flags == DCOPY_POLL_NOFLAGS, return status can be DCOPY_FAILURE,
- *   DCOPY_PENDING, or DCOPY_COMPLETED.
- *
- *   if flags & DCOPY_POLL_BLOCK, return status can be DCOPY_FAILURE or
- *   DCOPY_COMPLETED. DCOPY_POLL_BLOCK can only be set in base context.
- *
- *   The command cannot be re-used or freed until the command has completed
- *   (e.g. DCOPY_FAILURE or DCOPY_COMPLETED).
- */
-int dcopy_cmd_poll(dcopy_cmd_t cmd, int flags);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DCOPY_H */
--- a/deleted_files/usr/src/uts/common/sys/dcopy_device.h	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,154 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_DCOPY_DEVICE_H
-#define	_SYS_DCOPY_DEVICE_H
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/types.h>
-#include <sys/dcopy.h>
-
-/*
- * private command state. Space for this structure should be allocated during
- * (*cb_cmd_alloc). The DMA driver must set dp_private in dcopy_cmd_t to point
- * to the memory it allocated. Other than pr_device_cmd_private, the DMA driver
- * should not touch any of the fields in this structure. pr_device_cmd_private
- * is a private pointer for the DMA engine to use.
- */
-struct dcopy_cmd_priv_s {
-	/*
-	 * we only init the state used to track a command which blocks when it
-	 * actually blocks. pr_block_init tells us when we need to clean it
-	 * up during a cmd_free.
-	 */
-	boolean_t		pr_block_init;
-
-	/* dcopy_poll blocking state */
-	list_node_t		pr_poll_list_node;
-	volatile boolean_t	pr_wait;
-	kmutex_t		pr_mutex;
-	kcondvar_t		pr_cv;
-
-	/* back pointer to the command */
-	dcopy_cmd_t		pr_cmd;
-
-	/* shortcut to the channel we're on */
-	struct dcopy_channel_s	*pr_channel;
-
-	/* DMA driver private pointer */
-	void			*pr_device_cmd_private;
-};
-
-/* cb_version */
-#define	DCOPY_DEVICECB_V0	0
-
-typedef struct dcopy_device_chaninfo_s {
-	uint_t	di_chan_num;
-} dcopy_device_chaninfo_t;
-
-typedef struct dcopy_device_cb_s {
-	int	cb_version;
-	int	cb_res1;
-
-	/* allocate/free a DMA channel. See dcopy.h for return status  */
-	int	(*cb_channel_alloc)(void *device_private,
-		    dcopy_handle_t handle, int flags, uint_t size,
-		    dcopy_query_channel_t *info, void *channel_private);
-	void	(*cb_channel_free)(void *channel_private);
-
-	/* allocate/free a command. See dcopy.h for return status  */
-	int	(*cb_cmd_alloc)(void *channel_private, int flags,
-		    dcopy_cmd_t *cmd);
-	void	(*cb_cmd_free)(void *channel_private, dcopy_cmd_t *cmd);
-
-	/*
-	 * post a command/poll for command status. See dcopy.h for return
-	 * status
-	 */
-	int	(*cb_cmd_post)(void *channel_private, dcopy_cmd_t cmd);
-	int	(*cb_cmd_poll)(void *channel_private, dcopy_cmd_t cmd);
-
-	/*
-	 * if dcopy_device_unregister() returns DCOPY_PENDING, dcopy will
-	 * call this routine when all the channels are no longer being
-	 * used and have been free'd up. e.g. it's safe for the DMA driver
-	 * to detach.
-	 *   status = DCOPY_SUCCESS || DCOPY_FAILURE
-	 */
-	void	(*cb_unregister_complete)(void *device_private, int status);
-} dcopy_device_cb_t;
-
-
-typedef struct dcopy_device_info_s {
-	dev_info_t		*di_dip;
-	dcopy_device_cb_t	*di_cb; /* must be a static array */
-	uint_t			di_num_dma;
-	uint_t			di_maxxfer;
-	uint_t			di_capabilities;
-	uint64_t		di_id;
-} dcopy_device_info_t;
-
-typedef struct dcopy_device_s *dcopy_device_handle_t;
-
-/* dcopy_device_notify() status */
-#define	DCOPY_COMPLETION	0
-
-/*
- * dcopy_device_register()
- *   register the DMA device with dcopy.
- *    return status => DCOPY_FAILURE, DCOPY_SUCCESS
- */
-int dcopy_device_register(void *device_private, dcopy_device_info_t *info,
-    dcopy_device_handle_t *handle);
-
-/*
- * dcopy_device_unregister()
- *   try to unregister the DMA device with dcopy. If the DMA engines are
- *   still being used by upper layer modules, DCOPY_PENDING will be returned.
- *    return status => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_PENDING
- *      if DCOPY_PENDING, (*cb_unregister_complete)() will be called when
- *      completed.
- */
-int dcopy_device_unregister(dcopy_device_handle_t *handle);
-
-/*
- * dcopy_device_channel_notify()
- *   Notify dcopy of an event.
- *     dcopy_handle_t handle => what was passed into (*cb_alloc)()
- *     status => DCOPY_COMPLETION
- */
-void dcopy_device_channel_notify(dcopy_handle_t handle, int status);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_DCOPY_DEVICE_H */
--- a/deleted_files/usr/src/uts/common/sys/sodirect.h	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
-/*	  All Rights Reserved  	*/
-
-/*
- * University Copyright- Copyright (c) 1982, 1986, 1988
- * The Regents of the University of California
- * All Rights Reserved
- *
- * University Acknowledgment- Portions of this document are derived from
- * software developed by the University of California, Berkeley, and its
- * contributors.
- */
-
-#ifndef _SYS_SODIRECT_H
-#define	_SYS_SODIRECT_H
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-/*
- * Sodirect ...
- *
- * Currently the sodirect_t uses the sockfs streamhead STREAMS Q directly,
- * in the future when we have STREAMless sockets a sonode Q will have to
- * be implemented however the sodirect KPI shouldn't need to change.
- */
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-typedef struct sodirect_s {
-	uint32_t	sod_state;	/* State bits */
-	uint32_t	sod_want;	/* Pending read byte count or 0 */
-	queue_t		*sod_q;		/* Socket Q */
-	int		(*sod_enqueue)(); /* Call to enqueue an mblk_t */
-	void		(*sod_wakeup)(); /* Call to awkake a read()er, if any */
-	mblk_t		*sod_uioafh;	/* To be freed list head, or NULL */
-	mblk_t		*sod_uioaft;	/* To be freed list tail */
-	kmutex_t	*sod_lock;	/* Lock needed to protect all members */
-	uioa_t		sod_uioa;	/* Pending uio_t for uioa_t use */
-} sodirect_t;
-
-/*
- * sod_state bits:
- */
-
-#define	SOD_DISABLED	0		/* No more sodirect */
-
-#define	SOD_ENABLED	0x0001		/* sodirect_t enabled */
-
-#define	SOD_WAKE_NOT	0x0010		/* Wakeup not needed */
-#define	SOD_WAKE_NEED   0x0020		/* Wakeup needed */
-#define	SOD_WAKE_DONE	0x0040		/* Wakeup done */
-#define	SOD_WAKE_CLR	~(SOD_WAKE_NOT|SOD_WAKE_NEED|SOD_WAKE_DONE)
-
-/*
- * Usefull macros:
- */
-
-#define	SOD_QSETBE(p) ((p)->sod_q->q_flag |= QWANTW)
-#define	SOD_QCLRBE(p) ((p)->sod_q->q_flag &= ~QWANTW)
-#define	SOD_QEMPTY(p) ((p)->sod_q->q_first == NULL)
-#define	SOD_QFULL(p) ((p)->sod_q->q_flag & QFULL)
-#define	SOD_QCNT(p) ((p)->sod_q->q_count)
-
-#define	SOD_DISABLE(p) (p)->sod_state &= ~SOD_ENABLED
-
-#define	SOD_QTOSODP(q) (q)->q_stream->sd_sodirect
-
-
-#ifdef	__cplusplus
-}
-#endif
-
-#endif	/* _SYS_SODIRECT_H */
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat.c	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,665 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-#include <sys/errno.h>
-#include <sys/types.h>
-#include <sys/conf.h>
-#include <sys/kmem.h>
-#include <sys/ddi.h>
-#include <sys/stat.h>
-#include <sys/sunddi.h>
-#include <sys/file.h>
-#include <sys/open.h>
-#include <sys/modctl.h>
-#include <sys/ddi_impldefs.h>
-#include <sys/sysmacros.h>
-
-#include <sys/ioat.h>
-
-static int ioat_open(dev_t *devp, int flag, int otyp, cred_t *cred);
-static int ioat_close(dev_t devp, int flag, int otyp, cred_t *cred);
-static int ioat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
-static int ioat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
-static int ioat_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
-    void **result);
-
-static 	struct cb_ops ioat_cb_ops = {
-	ioat_open,		/* cb_open */
-	ioat_close,		/* cb_close */
-	nodev,			/* cb_strategy */
-	nodev,			/* cb_print */
-	nodev,			/* cb_dump */
-	nodev,			/* cb_read */
-	nodev,			/* cb_write */
-	ioat_ioctl,		/* cb_ioctl */
-	nodev,			/* cb_devmap */
-	nodev,			/* cb_mmap */
-	nodev,			/* cb_segmap */
-	nochpoll,		/* cb_chpoll */
-	ddi_prop_op,		/* cb_prop_op */
-	NULL,			/* cb_stream */
-	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
-	CB_REV
-};
-
-static struct dev_ops ioat_dev_ops = {
-	DEVO_REV,		/* devo_rev */
-	0,			/* devo_refcnt */
-	ioat_getinfo,		/* devo_getinfo */
-	nulldev,		/* devo_identify */
-	nulldev,		/* devo_probe */
-	ioat_attach,		/* devo_attach */
-	ioat_detach,		/* devo_detach */
-	nodev,			/* devo_reset */
-	&ioat_cb_ops,		/* devo_cb_ops */
-	NULL,			/* devo_bus_ops */
-	NULL			/* power */
-};
-
-static struct modldrv ioat_modldrv = {
-	&mod_driverops,		/* Type of module.  This one is a driver */
-	"ioat driver v%I%",	/* Name of the module. */
-	&ioat_dev_ops,		/* driver ops */
-};
-
-static struct modlinkage ioat_modlinkage = {
-	MODREV_1,
-	(void *) &ioat_modldrv,
-	NULL
-};
-
-
-void *ioat_statep;
-
-static int ioat_chip_init(ioat_state_t *state);
-static void ioat_chip_fini(ioat_state_t *state);
-static int ioat_drv_init(ioat_state_t *state);
-static void ioat_drv_fini(ioat_state_t *state);
-static uint_t ioat_isr(caddr_t parm);
-static void ioat_intr_enable(ioat_state_t *state);
-static void ioat_intr_disable(ioat_state_t *state);
-void ioat_detach_finish(ioat_state_t *state);
-
-
-ddi_device_acc_attr_t ioat_acc_attr = {
-	DDI_DEVICE_ATTR_V0,		/* devacc_attr_version */
-	DDI_NEVERSWAP_ACC,		/* devacc_attr_endian_flags */
-	DDI_STORECACHING_OK_ACC,	/* devacc_attr_dataorder */
-	DDI_DEFAULT_ACC			/* devacc_attr_access */
-};
-
-/* dcopy callback interface */
-dcopy_device_cb_t ioat_cb = {
-	DCOPY_DEVICECB_V0,
-	0,		/* reserved */
-	ioat_channel_alloc,
-	ioat_channel_free,
-	ioat_cmd_alloc,
-	ioat_cmd_free,
-	ioat_cmd_post,
-	ioat_cmd_poll,
-	ioat_unregister_complete
-};
-
-/*
- * _init()
- */
-int
-_init(void)
-{
-	int e;
-
-	e = ddi_soft_state_init(&ioat_statep, sizeof (ioat_state_t), 1);
-	if (e != 0) {
-		return (e);
-	}
-
-	e = mod_install(&ioat_modlinkage);
-	if (e != 0) {
-		ddi_soft_state_fini(&ioat_statep);
-		return (e);
-	}
-
-	return (0);
-}
-
-/*
- * _info()
- */
-int
-_info(struct modinfo *modinfop)
-{
-	return (mod_info(&ioat_modlinkage, modinfop));
-}
-
-/*
- * _fini()
- */
-int
-_fini(void)
-{
-	int e;
-
-	e = mod_remove(&ioat_modlinkage);
-	if (e != 0) {
-		return (e);
-	}
-
-	ddi_soft_state_fini(&ioat_statep);
-
-	return (0);
-}
-
-/*
- * ioat_attach()
- */
-static int
-ioat_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
-{
-	ioat_state_t *state;
-	int instance;
-	int e;
-
-
-	switch (cmd) {
-	case DDI_ATTACH:
-		break;
-
-	case DDI_RESUME:
-		instance = ddi_get_instance(dip);
-		state = ddi_get_soft_state(ioat_statep, instance);
-		if (state == NULL) {
-			return (DDI_FAILURE);
-		}
-		e = ioat_channel_resume(state);
-		if (e != DDI_SUCCESS) {
-			return (DDI_FAILURE);
-		}
-		ioat_intr_enable(state);
-		return (DDI_SUCCESS);
-
-	default:
-		return (DDI_FAILURE);
-	}
-
-	instance = ddi_get_instance(dip);
-	e = ddi_soft_state_zalloc(ioat_statep, instance);
-	if (e != DDI_SUCCESS) {
-		return (DDI_FAILURE);
-	}
-	state = ddi_get_soft_state(ioat_statep, instance);
-	if (state == NULL) {
-		goto attachfail_get_soft_state;
-	}
-
-	state->is_dip = dip;
-	state->is_instance = instance;
-
-	/* setup the registers, save away some device info */
-	e = ioat_chip_init(state);
-	if (e != DDI_SUCCESS) {
-		goto attachfail_chip_init;
-	}
-
-	/* initialize driver state, must be after chip init */
-	e = ioat_drv_init(state);
-	if (e != DDI_SUCCESS) {
-		goto attachfail_drv_init;
-	}
-
-	/* create the minor node (for the ioctl) */
-	e = ddi_create_minor_node(dip, "ioat", S_IFCHR, instance, DDI_PSEUDO,
-	    0);
-	if (e != DDI_SUCCESS) {
-		goto attachfail_minor_node;
-	}
-
-	/* Enable device interrupts */
-	ioat_intr_enable(state);
-
-	/* Report that driver was loaded */
-	ddi_report_dev(dip);
-
-	/* register with dcopy */
-	e = dcopy_device_register(state, &state->is_deviceinfo,
-	    &state->is_device_handle);
-	if (e != DCOPY_SUCCESS) {
-		goto attachfail_register;
-	}
-
-	return (DDI_SUCCESS);
-
-attachfail_register:
-	ioat_intr_disable(state);
-	ddi_remove_minor_node(dip, NULL);
-attachfail_minor_node:
-	ioat_drv_fini(state);
-attachfail_drv_init:
-	ioat_chip_fini(state);
-attachfail_chip_init:
-attachfail_get_soft_state:
-	(void) ddi_soft_state_free(ioat_statep, instance);
-
-	return (DDI_FAILURE);
-}
-
-/*
- * ioat_detach()
- */
-static int
-ioat_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
-{
-	ioat_state_t *state;
-	int instance;
-	int e;
-
-
-	instance = ddi_get_instance(dip);
-	state = ddi_get_soft_state(ioat_statep, instance);
-	if (state == NULL) {
-		return (DDI_FAILURE);
-	}
-
-	switch (cmd) {
-	case DDI_DETACH:
-		break;
-
-	case DDI_SUSPEND:
-		ioat_channel_suspend(state);
-		return (DDI_SUCCESS);
-
-	default:
-		return (DDI_FAILURE);
-	}
-
-	/*
-	 * try to unregister from dcopy.  Since this driver doesn't follow the
-	 * traditional parent/child model, we may still be in use so we can't
-	 * detach yet.
-	 */
-	e = dcopy_device_unregister(&state->is_device_handle);
-	if (e != DCOPY_SUCCESS) {
-		if (e == DCOPY_PENDING) {
-			cmn_err(CE_NOTE, "device busy, performing asynchronous"
-			    " detach\n");
-		}
-		return (DDI_FAILURE);
-	}
-
-	ioat_detach_finish(state);
-
-	return (DDI_SUCCESS);
-}
-
-/*
- * ioat_getinfo()
- */
-/*ARGSUSED*/
-static int
-ioat_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
-{
-	ioat_state_t *state;
-	int instance;
-	dev_t dev;
-	int e;
-
-
-	dev = (dev_t)arg;
-	instance = getminor(dev);
-
-	switch (cmd) {
-	case DDI_INFO_DEVT2DEVINFO:
-		state = ddi_get_soft_state(ioat_statep, instance);
-		if (state == NULL) {
-			return (DDI_FAILURE);
-		}
-		*result = (void *)state->is_dip;
-		e = DDI_SUCCESS;
-		break;
-
-	case DDI_INFO_DEVT2INSTANCE:
-		*result = (void *)(uintptr_t)instance;
-		e = DDI_SUCCESS;
-		break;
-
-	default:
-		e = DDI_FAILURE;
-		break;
-	}
-
-	return (e);
-}
-
-
-/*
- * ioat_open()
- */
-/*ARGSUSED*/
-static int
-ioat_open(dev_t *devp, int flag, int otyp, cred_t *cred)
-{
-	ioat_state_t *state;
-	int instance;
-
-	instance = getminor(*devp);
-	state = ddi_get_soft_state(ioat_statep, instance);
-	if (state == NULL) {
-		return (ENXIO);
-	}
-
-	return (0);
-}
-
-
-/*
- * ioat_close()
- */
-/*ARGSUSED*/
-static int
-ioat_close(dev_t devp, int flag, int otyp, cred_t *cred)
-{
-	return (0);
-}
-
-
-/*
- * ioat_chip_init()
- */
-static int
-ioat_chip_init(ioat_state_t *state)
-{
-	ddi_device_acc_attr_t attr;
-	int e;
-
-
-	attr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
-	attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
-	attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
-
-	e =  ddi_regs_map_setup(state->is_dip, 1, (caddr_t *)&state->is_genregs,
-	    0, 0, &attr, &state->is_reg_handle);
-	if (e != DDI_SUCCESS) {
-		goto chipinitfail_regsmap;
-	}
-
-	/* save away ioat chip info */
-	state->is_num_channels = (uint_t)ddi_get8(state->is_reg_handle,
-	    &state->is_genregs[IOAT_CHANCNT]);
-	state->is_maxxfer = (uint_t)ddi_get8(state->is_reg_handle,
-	    &state->is_genregs[IOAT_XFERCAP]);
-	state->is_chanoff = (uintptr_t)ddi_get16(state->is_reg_handle,
-	    (uint16_t *)&state->is_genregs[IOAT_PERPORT_OFF]);
-	state->is_cbver = (uint_t)ddi_get8(state->is_reg_handle,
-	    &state->is_genregs[IOAT_CBVER]);
-	state->is_intrdelay = (uint_t)ddi_get16(state->is_reg_handle,
-	    (uint16_t *)&state->is_genregs[IOAT_INTRDELAY]);
-	state->is_status = (uint_t)ddi_get16(state->is_reg_handle,
-	    (uint16_t *)&state->is_genregs[IOAT_CSSTATUS]);
-	state->is_capabilities = (uint_t)ddi_get32(state->is_reg_handle,
-	    (uint32_t *)&state->is_genregs[IOAT_DMACAPABILITY]);
-
-	if (state->is_cbver & 0x10) {
-		state->is_ver = IOAT_CBv1;
-	} else if (state->is_cbver & 0x20) {
-		state->is_ver = IOAT_CBv2;
-	} else {
-		goto chipinitfail_version;
-	}
-
-	return (DDI_SUCCESS);
-
-chipinitfail_version:
-	ddi_regs_map_free(&state->is_reg_handle);
-chipinitfail_regsmap:
-	return (DDI_FAILURE);
-}
-
-
-/*
- * ioat_chip_fini()
- */
-static void
-ioat_chip_fini(ioat_state_t *state)
-{
-	ddi_regs_map_free(&state->is_reg_handle);
-}
-
-
-/*
- * ioat_drv_init()
- */
-static int
-ioat_drv_init(ioat_state_t *state)
-{
-	ddi_acc_handle_t handle;
-	int e;
-
-
-	mutex_init(&state->is_mutex, NULL, MUTEX_DRIVER, NULL);
-
-	state->is_deviceinfo.di_dip = state->is_dip;
-	state->is_deviceinfo.di_num_dma = state->is_num_channels;
-	state->is_deviceinfo.di_maxxfer = state->is_maxxfer;
-	state->is_deviceinfo.di_capabilities = state->is_capabilities;
-	state->is_deviceinfo.di_cb = &ioat_cb;
-
-	e = pci_config_setup(state->is_dip, &handle);
-	if (e != DDI_SUCCESS) {
-		goto drvinitfail_config_setup;
-	}
-
-	/* read in Vendor ID */
-	state->is_deviceinfo.di_id = (uint64_t)pci_config_get16(handle, 0);
-	state->is_deviceinfo.di_id = state->is_deviceinfo.di_id << 16;
-
-	/* read in Device ID */
-	state->is_deviceinfo.di_id |= (uint64_t)pci_config_get16(handle, 2);
-	state->is_deviceinfo.di_id = state->is_deviceinfo.di_id << 32;
-
-	/* Add in chipset version */
-	state->is_deviceinfo.di_id |= (uint64_t)state->is_cbver;
-	pci_config_teardown(&handle);
-
-	e = ddi_intr_hilevel(state->is_dip, 0);
-	if (e != 0) {
-		cmn_err(CE_WARN, "hilevel interrupt not supported\n");
-		goto drvinitfail_hilevel;
-	}
-
-	/* we don't support MSIs for v2 yet */
-	e = ddi_add_intr(state->is_dip, 0, NULL, NULL, ioat_isr,
-	    (caddr_t)state);
-	if (e != DDI_SUCCESS) {
-		goto drvinitfail_add_intr;
-	}
-
-	e = ddi_get_iblock_cookie(state->is_dip, 0, &state->is_iblock_cookie);
-	if (e != DDI_SUCCESS) {
-		goto drvinitfail_iblock_cookie;
-	}
-
-	e = ioat_channel_init(state);
-	if (e != DDI_SUCCESS) {
-		goto drvinitfail_channel_init;
-	}
-
-	return (DDI_SUCCESS);
-
-drvinitfail_channel_init:
-drvinitfail_iblock_cookie:
-	ddi_remove_intr(state->is_dip, 0, state->is_iblock_cookie);
-drvinitfail_add_intr:
-drvinitfail_hilevel:
-drvinitfail_config_setup:
-	mutex_destroy(&state->is_mutex);
-
-	return (DDI_FAILURE);
-}
-
-
-/*
- * ioat_drv_fini()
- */
-static void
-ioat_drv_fini(ioat_state_t *state)
-{
-	ioat_channel_fini(state);
-	ddi_remove_intr(state->is_dip, 0, state->is_iblock_cookie);
-	mutex_destroy(&state->is_mutex);
-}
-
-
-/*
- * ioat_unregister_complete()
- */
-void
-ioat_unregister_complete(void *device_private, int status)
-{
-	ioat_state_t *state;
-
-
-	state = device_private;
-
-	if (status != DCOPY_SUCCESS) {
-		cmn_err(CE_WARN, "asynchronous detach aborted\n");
-		return;
-	}
-
-	cmn_err(CE_CONT, "detach completing\n");
-	ioat_detach_finish(state);
-}
-
-
-/*
- * ioat_detach_finish()
- */
-void
-ioat_detach_finish(ioat_state_t *state)
-{
-	ioat_intr_disable(state);
-	ddi_remove_minor_node(state->is_dip, NULL);
-	ioat_drv_fini(state);
-	ioat_chip_fini(state);
-	(void) ddi_soft_state_free(ioat_statep, state->is_instance);
-}
-
-
-/*
- * ioat_intr_enable()
- */
-static void
-ioat_intr_enable(ioat_state_t *state)
-{
-	uint32_t intr_status;
-
-
-	/* Clear any pending interrupts */
-	intr_status = ddi_get32(state->is_reg_handle,
-	    (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS]);
-	if (intr_status != 0) {
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS],
-		    intr_status);
-	}
-
-	/* Enable interrupts on the device */
-	ddi_put8(state->is_reg_handle, &state->is_genregs[IOAT_INTRCTL],
-	    IOAT_INTRCTL_MASTER_EN);
-}
-
-
-/*
- * ioat_intr_disable()
- */
-static void
-ioat_intr_disable(ioat_state_t *state)
-{
-	/*
-	 * disable interrupts on the device. A read of the interrupt control
-	 * register clears the enable bit.
-	 */
-	(void) ddi_get8(state->is_reg_handle,
-	    &state->is_genregs[IOAT_INTRCTL]);
-}
-
-
-/*
- * ioat_isr()
- */
-static uint_t
-ioat_isr(caddr_t parm)
-{
-	uint32_t intr_status;
-	ioat_state_t *state;
-	uint8_t intrctrl;
-	uint32_t chan;
-	uint_t r;
-	int i;
-
-	state = (ioat_state_t *)parm;
-
-	intrctrl = ddi_get8(state->is_reg_handle,
-	    &state->is_genregs[IOAT_INTRCTL]);
-	/* master interrupt enable should always be set */
-	ASSERT(intrctrl & IOAT_INTRCTL_MASTER_EN);
-
-	/* If the interrupt status bit isn't set, it's not ours */
-	if (!(intrctrl & IOAT_INTRCTL_INTR_STAT)) {
-		/* re-set master interrupt enable (since it clears on read) */
-		ddi_put8(state->is_reg_handle,
-		    &state->is_genregs[IOAT_INTRCTL], intrctrl);
-		return (DDI_INTR_UNCLAIMED);
-	}
-
-	/* see which channels generated the interrupt */
-	intr_status = ddi_get32(state->is_reg_handle,
-	    (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS]);
-
-	/* call the intr handler for the channels */
-	r = DDI_INTR_UNCLAIMED;
-	chan = 1;
-	for (i = 0; i < state->is_num_channels; i++) {
-		if (intr_status & chan) {
-			ioat_channel_intr(&state->is_channel[i]);
-			r = DDI_INTR_CLAIMED;
-		}
-		chan = chan << 1;
-	}
-
-	/*
-	 * if interrupt status bit was set, there should have been an
-	 * attention status bit set too.
-	 */
-	ASSERT(r == DDI_INTR_CLAIMED);
-
-	/* re-set master interrupt enable (since it clears on read) */
-	ddi_put8(state->is_reg_handle, &state->is_genregs[IOAT_INTRCTL],
-	    intrctrl);
-
-	return (r);
-}
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat.conf	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#ident	"%Z%%M%	%I%	%E% SMI"
-
-#
-# force attach this driver to support misc/driver
-ddi-forceattach=1;
-
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_chan.c	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1319 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-#include <sys/errno.h>
-#include <sys/types.h>
-#include <sys/conf.h>
-#include <sys/kmem.h>
-#include <sys/ddi.h>
-#include <sys/stat.h>
-#include <sys/sunddi.h>
-#include <sys/file.h>
-#include <sys/open.h>
-#include <sys/modctl.h>
-#include <sys/ddi_impldefs.h>
-#include <sys/sysmacros.h>
-#include <vm/hat.h>
-#include <vm/as.h>
-#include <sys/mach_mmu.h>
-#ifdef __xpv
-#include <sys/hypervisor.h>
-#endif
-
-#include <sys/ioat.h>
-
-
-extern ddi_device_acc_attr_t ioat_acc_attr;
-
-/* dma attr for the descriptor rings */
-ddi_dma_attr_t ioat_desc_dma_attr = {
-	DMA_ATTR_V0,		/* dma_attr_version */
-	0x0,			/* dma_attr_addr_lo */
-	0xffffffffffffffff,	/* dma_attr_addr_hi */
-	0xffffffff,		/* dma_attr_count_max */
-	0x1000,			/* dma_attr_align */
-	0x1,			/* dma_attr_burstsizes */
-	0x1,			/* dma_attr_minxfer */
-	0xffffffff,		/* dma_attr_maxxfer */
-	0xffffffff,		/* dma_attr_seg */
-	0x1,			/* dma_attr_sgllen */
-	0x1,			/* dma_attr_granular */
-	0x0,			/* dma_attr_flags */
-};
-
-/* dma attr for the completion buffers */
-ddi_dma_attr_t ioat_cmpl_dma_attr = {
-	DMA_ATTR_V0,		/* dma_attr_version */
-	0x0,			/* dma_attr_addr_lo */
-	0xffffffffffffffff,	/* dma_attr_addr_hi */
-	0xffffffff,		/* dma_attr_count_max */
-	0x40,			/* dma_attr_align */
-	0x1,			/* dma_attr_burstsizes */
-	0x1,			/* dma_attr_minxfer */
-	0xffffffff,		/* dma_attr_maxxfer */
-	0xffffffff,		/* dma_attr_seg */
-	0x1,			/* dma_attr_sgllen */
-	0x1,			/* dma_attr_granular */
-	0x0,			/* dma_attr_flags */
-};
-
-static int ioat_completion_alloc(ioat_channel_t channel);
-static void ioat_completion_free(ioat_channel_t channel);
-static void ioat_channel_start(ioat_channel_t channel);
-static void ioat_channel_reset(ioat_channel_t channel);
-
-int ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt);
-void ioat_ring_free(ioat_channel_t channel);
-void ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *desc);
-int ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
-    dcopy_cmd_t cmd);
-
-static void ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
-    uint64_t dest_addr, uint32_t size, uint32_t ctrl);
-static void ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id);
-
-
-/*
- * ioat_channel_init()
- */
-int
-ioat_channel_init(ioat_state_t *state)
-{
-	int i;
-
-	/*
-	 * initialize each dma channel's state which doesn't change across
-	 * channel alloc/free.
-	 */
-	state->is_chansize = sizeof (struct ioat_channel_s) *
-	    state->is_num_channels;
-	state->is_channel = kmem_zalloc(state->is_chansize, KM_SLEEP);
-	for (i = 0; i < state->is_num_channels; i++) {
-		state->is_channel[i].ic_state = state;
-		state->is_channel[i].ic_regs = (uint8_t *)
-		    ((uintptr_t)state->is_genregs +
-		    (uintptr_t)(IOAT_CHANNELREG_OFFSET * (i + 1)));
-	}
-
-	/* initial the allocator (from 0 to state->is_num_channels) */
-	ioat_rs_init(state, 0, state->is_num_channels, &state->is_channel_rs);
-
-	return (DDI_SUCCESS);
-}
-
-
-/*
- * ioat_channel_fini()
- */
-void
-ioat_channel_fini(ioat_state_t *state)
-{
-	ioat_rs_fini(&state->is_channel_rs);
-	kmem_free(state->is_channel, state->is_chansize);
-}
-
-
-/*
- * ioat_channel_alloc()
- *   NOTE: We intentionaly don't handle DCOPY_SLEEP (if no channels are
- *	available)
- */
-/*ARGSUSED*/
-int
-ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags,
-    uint_t size, dcopy_query_channel_t *info, void *channel_private)
-{
-#define	CHANSTRSIZE	20
-	struct ioat_channel_s *channel;
-	char chanstr[CHANSTRSIZE];
-	ioat_channel_t *chan;
-	ioat_state_t *state;
-	size_t cmd_size;
-	uint_t chan_num;
-	uint32_t estat;
-	int e;
-
-
-	state = (ioat_state_t *)device_private;
-	chan = (ioat_channel_t *)channel_private;
-
-	/* allocate a H/W channel */
-	e = ioat_rs_alloc(state->is_channel_rs, &chan_num);
-	if (e != DDI_SUCCESS) {
-		return (DCOPY_NORESOURCES);
-	}
-
-	channel = &state->is_channel[chan_num];
-	channel->ic_inuse = B_TRUE;
-	channel->ic_chan_num = chan_num;
-	channel->ic_ver = state->is_ver;
-	channel->ic_dca_active = B_FALSE;
-	channel->ic_channel_state = IOAT_CHANNEL_OK;
-	channel->ic_dcopy_handle = handle;
-
-#ifdef	DEBUG
-	{
-		/* if we're cbv2, verify that the V2 compatibility bit is set */
-		uint16_t reg;
-		if (channel->ic_ver == IOAT_CBv2) {
-			reg = ddi_get16(state->is_reg_handle,
-			    (uint16_t *)&channel->ic_regs[IOAT_CHAN_COMP]);
-			ASSERT(reg & 0x2);
-		}
-	}
-#endif
-
-	/*
-	 * Configure DMA channel
-	 *   Channel In Use
-	 *   Error Interrupt Enable
-	 *   Any Error Abort Enable
-	 *   Error Completion Enable
-	 */
-	ddi_put16(state->is_reg_handle,
-	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
-
-	/* check channel error register, clear any errors */
-	estat = ddi_get32(state->is_reg_handle,
-	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
-	if (estat != 0) {
-#ifdef	DEBUG
-		cmn_err(CE_CONT, "cleared errors (0x%x) before channel (%d) "
-		    "enable\n", estat, channel->ic_chan_num);
-#endif
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], estat);
-	}
-
-	/* allocate and initialize the descriptor buf */
-	e = ioat_ring_alloc(channel, size);
-	if (e != DDI_SUCCESS) {
-		goto chinitfail_desc_alloc;
-	}
-
-	/* allocate and initialize the completion space */
-	e = ioat_completion_alloc(channel);
-	if (e != DDI_SUCCESS) {
-		goto chinitfail_completion_alloc;
-	}
-
-	/* setup kmem_cache for commands */
-	cmd_size = sizeof (struct dcopy_cmd_s) +
-	    sizeof (struct dcopy_cmd_priv_s) +
-	    sizeof (struct ioat_cmd_private_s);
-	(void) snprintf(chanstr, CHANSTRSIZE, "ioat%dchan%dcmd",
-	    state->is_instance, channel->ic_chan_num);
-	channel->ic_cmd_cache = kmem_cache_create(chanstr, cmd_size, 64,
-	    NULL, NULL, NULL, NULL, NULL, 0);
-	if (channel->ic_cmd_cache == NULL) {
-		goto chinitfail_kmem_cache;
-	}
-
-	/* start-up the channel */
-	ioat_channel_start(channel);
-
-	/* fill in the channel info returned to dcopy */
-	info->qc_version = DCOPY_QUERY_CHANNEL_V0;
-	info->qc_id = state->is_deviceinfo.di_id;
-	info->qc_capabilities = (uint64_t)state->is_capabilities;
-	info->qc_channel_size = (uint64_t)size;
-	info->qc_chan_num = (uint64_t)channel->ic_chan_num;
-	if (channel->ic_ver == IOAT_CBv1) {
-		info->qc_dca_supported = B_FALSE;
-	} else {
-		if (info->qc_capabilities & IOAT_DMACAP_DCA) {
-			info->qc_dca_supported = B_TRUE;
-		} else {
-			info->qc_dca_supported = B_FALSE;
-		}
-	}
-
-	*chan = channel;
-
-	return (DCOPY_SUCCESS);
-
-chinitfail_kmem_cache:
-	ioat_completion_free(channel);
-chinitfail_completion_alloc:
-	ioat_ring_free(channel);
-chinitfail_desc_alloc:
-	return (DCOPY_FAILURE);
-}
-
-
-/*
- * ioat_channel_suspend()
- */
-/*ARGSUSED*/
-void
-ioat_channel_suspend(ioat_state_t *state)
-{
-	/*
-	 * normally you would disable interrupts and reset the H/W here. But
-	 * since the suspend framework doesn't know who is using us, it may
-	 * not suspend their I/O before us.  Since we won't actively be doing
-	 * any DMA or interrupts unless someone asks us to, it's safe to not
-	 * do anything here.
-	 */
-}
-
-
-/*
- * ioat_channel_resume()
- */
-int
-ioat_channel_resume(ioat_state_t *state)
-{
-	ioat_channel_ring_t *ring;
-	ioat_channel_t channel;
-	uint32_t estat;
-	int i;
-
-
-	for (i = 0; i < state->is_num_channels; i++) {
-		channel = &state->is_channel[i];
-		ring = channel->ic_ring;
-
-		if (!channel->ic_inuse) {
-			continue;
-		}
-
-		/*
-		 * Configure DMA channel
-		 *   Channel In Use
-		 *   Error Interrupt Enable
-		 *   Any Error Abort Enable
-		 *   Error Completion Enable
-		 */
-		ddi_put16(state->is_reg_handle,
-		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
-
-		/* check channel error register, clear any errors */
-		estat = ddi_get32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
-		if (estat != 0) {
-#ifdef	DEBUG
-			cmn_err(CE_CONT, "cleared errors (0x%x) before channel"
-			    " (%d) enable\n", estat, channel->ic_chan_num);
-#endif
-			ddi_put32(state->is_reg_handle,
-			    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR],
-			    estat);
-		}
-
-		/* Re-initialize the ring */
-		bzero(ring->cr_desc, channel->ic_desc_alloc_size);
-		/* write the physical address into the chain address register */
-		if (channel->ic_ver == IOAT_CBv1) {
-			ddi_put32(state->is_reg_handle,
-			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
-			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
-			ddi_put32(state->is_reg_handle,
-			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
-			    (uint32_t)(ring->cr_phys_desc >> 32));
-		} else {
-			ASSERT(channel->ic_ver == IOAT_CBv2);
-			ddi_put32(state->is_reg_handle,
-			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
-			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
-			ddi_put32(state->is_reg_handle,
-			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
-			    (uint32_t)(ring->cr_phys_desc >> 32));
-		}
-
-		/* re-initialize the completion buffer */
-		bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
-		/* write the phys addr into the completion address register */
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
-		    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
-		    (uint32_t)(channel->ic_phys_cmpl >> 32));
-
-		/* start-up the channel */
-		ioat_channel_start(channel);
-
-	}
-
-	return (DDI_SUCCESS);
-}
-
-
-/*
- * ioat_channel_free()
- */
-void
-ioat_channel_free(void *channel_private)
-{
-	struct ioat_channel_s *channel;
-	ioat_channel_t *chan;
-	ioat_state_t *state;
-	uint_t chan_num;
-
-
-	chan = (ioat_channel_t *)channel_private;
-	channel = *chan;
-
-	state = channel->ic_state;
-	chan_num = channel->ic_chan_num;
-
-	/* disable the interrupts */
-	ddi_put16(state->is_reg_handle,
-	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x0);
-
-	ioat_channel_reset(channel);
-
-	/* cleanup command cache */
-	kmem_cache_destroy(channel->ic_cmd_cache);
-
-	/* clean-up/free-up the completion space and descriptors */
-	ioat_completion_free(channel);
-	ioat_ring_free(channel);
-
-	channel->ic_inuse = B_FALSE;
-
-	/* free the H/W DMA engine */
-	ioat_rs_free(state->is_channel_rs, chan_num);
-
-	*chan = NULL;
-}
-
-
-/*
- * ioat_channel_intr()
- */
-void
-ioat_channel_intr(ioat_channel_t channel)
-{
-	ioat_state_t *state;
-	uint16_t chanctrl;
-	uint32_t chanerr;
-	uint32_t status;
-
-
-	state = channel->ic_state;
-
-	if (channel->ic_ver == IOAT_CBv1) {
-		status = ddi_get32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_STS_LO]);
-	} else {
-		ASSERT(channel->ic_ver == IOAT_CBv2);
-		status = ddi_get32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_STS_LO]);
-	}
-
-	/* if that status isn't ACTIVE or IDLE, the channel has failed */
-	if (status & IOAT_CHAN_STS_FAIL_MASK) {
-		chanerr = ddi_get32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
-		cmn_err(CE_WARN, "channel(%d) fatal failure! "
-		    "chanstat_lo=0x%X; chanerr=0x%X\n",
-		    channel->ic_chan_num, status, chanerr);
-		channel->ic_channel_state = IOAT_CHANNEL_IN_FAILURE;
-		ioat_channel_reset(channel);
-
-		return;
-	}
-
-	/*
-	 * clear interrupt disable bit if set (it's a RW1C). Read it back to
-	 * ensure the write completes.
-	 */
-	chanctrl = ddi_get16(state->is_reg_handle,
-	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
-	ddi_put16(state->is_reg_handle,
-	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], chanctrl);
-	(void) ddi_get16(state->is_reg_handle,
-	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
-
-	/* tell dcopy we have seen a completion on this channel */
-	dcopy_device_channel_notify(channel->ic_dcopy_handle, DCOPY_COMPLETION);
-}
-
-
-/*
- * ioat_channel_start()
- */
-void
-ioat_channel_start(ioat_channel_t channel)
-{
-	ioat_chan_dma_desc_t desc;
-
-	/* set the first descriptor up as a NULL descriptor */
-	bzero(&desc, sizeof (desc));
-	desc.dd_size = 0;
-	desc.dd_ctrl = IOAT_DESC_CTRL_OP_DMA | IOAT_DESC_DMACTRL_NULL |
-	    IOAT_DESC_CTRL_CMPL;
-	desc.dd_next_desc = 0x0;
-
-	/* setup the very first descriptor */
-	ioat_ring_seed(channel, &desc);
-}
-
-
-/*
- * ioat_channel_reset()
- */
-void
-ioat_channel_reset(ioat_channel_t channel)
-{
-	ioat_state_t *state;
-
-	state = channel->ic_state;
-
-	/* hit the reset bit */
-	if (channel->ic_ver == IOAT_CBv1) {
-		ddi_put8(state->is_reg_handle,
-		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x20);
-	} else {
-		ASSERT(channel->ic_ver == IOAT_CBv2);
-		ddi_put8(state->is_reg_handle,
-		    &channel->ic_regs[IOAT_V2_CHAN_CMD], 0x20);
-	}
-}
-
-
-/*
- * ioat_completion_alloc()
- */
-int
-ioat_completion_alloc(ioat_channel_t channel)
-{
-	ioat_state_t *state;
-	size_t real_length;
-	uint_t cookie_cnt;
-	int e;
-
-
-	state = channel->ic_state;
-
-	/*
-	 * allocate memory for the completion status, zero it out, and get
-	 * the paddr. We'll allocate a physically contiguous cache line.
-	 */
-	e = ddi_dma_alloc_handle(state->is_dip, &ioat_cmpl_dma_attr,
-	    DDI_DMA_SLEEP, NULL, &channel->ic_cmpl_dma_handle);
-	if (e != DDI_SUCCESS) {
-		goto cmplallocfail_alloc_handle;
-	}
-	channel->ic_cmpl_alloc_size = 64;
-	e = ddi_dma_mem_alloc(channel->ic_cmpl_dma_handle,
-	    channel->ic_cmpl_alloc_size, &ioat_acc_attr,
-	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
-	    (caddr_t *)&channel->ic_cmpl, &real_length,
-	    &channel->ic_cmpl_handle);
-	if (e != DDI_SUCCESS) {
-		goto cmplallocfail_mem_alloc;
-	}
-	bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
-	e = ddi_dma_addr_bind_handle(channel->ic_cmpl_dma_handle, NULL,
-	    (caddr_t)channel->ic_cmpl, channel->ic_cmpl_alloc_size,
-	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
-	    &channel->ic_cmpl_cookie, &cookie_cnt);
-	if (e != DDI_SUCCESS) {
-		goto cmplallocfail_addr_bind;
-	}
-	ASSERT(cookie_cnt == 1);
-	ASSERT(channel->ic_cmpl_cookie.dmac_size ==
-	    channel->ic_cmpl_alloc_size);
-	channel->ic_phys_cmpl = channel->ic_cmpl_cookie.dmac_laddress;
-
-	/* write the physical address into the completion address register */
-	ddi_put32(state->is_reg_handle,
-	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
-	    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
-	ddi_put32(state->is_reg_handle,
-	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
-	    (uint32_t)(channel->ic_phys_cmpl >> 32));
-
-	return (DDI_SUCCESS);
-
-cmplallocfail_addr_bind:
-	ddi_dma_mem_free(&channel->ic_desc_handle);
-cmplallocfail_mem_alloc:
-	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
-cmplallocfail_alloc_handle:
-	return (DDI_FAILURE);
-}
-
-
-/*
- * ioat_completion_free()
- */
-void
-ioat_completion_free(ioat_channel_t channel)
-{
-	ioat_state_t *state;
-
-	state = channel->ic_state;
-
-	/* reset the completion address register */
-	ddi_put32(state->is_reg_handle,
-	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], 0x0);
-	ddi_put32(state->is_reg_handle,
-	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], 0x0);
-
-	/* unbind, then free up the memory, dma handle */
-	(void) ddi_dma_unbind_handle(channel->ic_cmpl_dma_handle);
-	ddi_dma_mem_free(&channel->ic_cmpl_handle);
-	ddi_dma_free_handle(&channel->ic_cmpl_dma_handle);
-}
-
-/*
- * ioat_ring_alloc()
- */
-int
-ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt)
-{
-	ioat_channel_ring_t *ring;
-	ioat_state_t *state;
-	size_t real_length;
-	uint_t cookie_cnt;
-	int e;
-
-
-	state = channel->ic_state;
-
-	ring = kmem_zalloc(sizeof (ioat_channel_ring_t), KM_SLEEP);
-	channel->ic_ring = ring;
-	ring->cr_chan = channel;
-	ring->cr_post_cnt = 0;
-
-	mutex_init(&ring->cr_cmpl_mutex, NULL, MUTEX_DRIVER,
-	    channel->ic_state->is_iblock_cookie);
-	mutex_init(&ring->cr_desc_mutex, NULL, MUTEX_DRIVER,
-	    channel->ic_state->is_iblock_cookie);
-
-	/*
-	 * allocate memory for the ring, zero it out, and get the paddr.
-	 * We'll allocate a physically contiguous chunck of memory  which
-	 * simplifies the completion logic.
-	 */
-	e = ddi_dma_alloc_handle(state->is_dip, &ioat_desc_dma_attr,
-	    DDI_DMA_SLEEP, NULL, &channel->ic_desc_dma_handle);
-	if (e != DDI_SUCCESS) {
-		goto ringallocfail_alloc_handle;
-	}
-	/*
-	 * allocate one extra descriptor so we can simplify the empty/full
-	 * logic. Then round that number up to a whole multiple of 4.
-	 */
-	channel->ic_chan_desc_cnt = ((desc_cnt + 1) + 3) & ~0x3;
-	ring->cr_desc_last = channel->ic_chan_desc_cnt - 1;
-	channel->ic_desc_alloc_size = channel->ic_chan_desc_cnt *
-	    sizeof (ioat_chan_desc_t);
-	e = ddi_dma_mem_alloc(channel->ic_desc_dma_handle,
-	    channel->ic_desc_alloc_size, &ioat_acc_attr,
-	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
-	    (caddr_t *)&ring->cr_desc, &real_length, &channel->ic_desc_handle);
-	if (e != DDI_SUCCESS) {
-		goto ringallocfail_mem_alloc;
-	}
-	bzero(ring->cr_desc, channel->ic_desc_alloc_size);
-	e = ddi_dma_addr_bind_handle(channel->ic_desc_dma_handle, NULL,
-	    (caddr_t)ring->cr_desc, channel->ic_desc_alloc_size,
-	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
-	    &channel->ic_desc_cookies, &cookie_cnt);
-	if (e != DDI_SUCCESS) {
-		goto ringallocfail_addr_bind;
-	}
-	ASSERT(cookie_cnt == 1);
-	ASSERT(channel->ic_desc_cookies.dmac_size ==
-	    channel->ic_desc_alloc_size);
-	ring->cr_phys_desc = channel->ic_desc_cookies.dmac_laddress;
-
-	/* write the physical address into the chain address register */
-	if (channel->ic_ver == IOAT_CBv1) {
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
-		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
-		    (uint32_t)(ring->cr_phys_desc >> 32));
-	} else {
-		ASSERT(channel->ic_ver == IOAT_CBv2);
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
-		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
-		    (uint32_t)(ring->cr_phys_desc >> 32));
-	}
-
-	return (DCOPY_SUCCESS);
-
-ringallocfail_addr_bind:
-	ddi_dma_mem_free(&channel->ic_desc_handle);
-ringallocfail_mem_alloc:
-	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
-ringallocfail_alloc_handle:
-	mutex_destroy(&ring->cr_desc_mutex);
-	mutex_destroy(&ring->cr_cmpl_mutex);
-	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
-
-	return (DCOPY_FAILURE);
-}
-
-
-/*
- * ioat_ring_free()
- */
-void
-ioat_ring_free(ioat_channel_t channel)
-{
-	ioat_state_t *state;
-
-
-	state = channel->ic_state;
-
-	/* reset the chain address register */
-	if (channel->ic_ver == IOAT_CBv1) {
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], 0x0);
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], 0x0);
-	} else {
-		ASSERT(channel->ic_ver == IOAT_CBv2);
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], 0x0);
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], 0x0);
-	}
-
-	/* unbind, then free up the memory, dma handle */
-	(void) ddi_dma_unbind_handle(channel->ic_desc_dma_handle);
-	ddi_dma_mem_free(&channel->ic_desc_handle);
-	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
-
-	mutex_destroy(&channel->ic_ring->cr_desc_mutex);
-	mutex_destroy(&channel->ic_ring->cr_cmpl_mutex);
-	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
-
-}
-
-
-/*
- * ioat_ring_seed()
- *    write the first descriptor in the ring.
- */
-void
-ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *in_desc)
-{
-	ioat_channel_ring_t *ring;
-	ioat_chan_dma_desc_t *desc;
-	ioat_chan_dma_desc_t *prev;
-	ioat_state_t *state;
-
-
-	state = channel->ic_state;
-	ring = channel->ic_ring;
-
-	/* init the completion state */
-	ring->cr_cmpl_gen = 0x0;
-	ring->cr_cmpl_last = 0x0;
-
-	/* write in the descriptor and init the descriptor state */
-	ring->cr_post_cnt++;
-	channel->ic_ring->cr_desc[0] = *(ioat_chan_desc_t *)in_desc;
-	ring->cr_desc_gen = 0;
-	ring->cr_desc_prev = 0;
-	ring->cr_desc_next = 1;
-
-	if (channel->ic_ver == IOAT_CBv1) {
-		/* hit the start bit */
-		ddi_put8(state->is_reg_handle,
-		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x1);
-	} else {
-		/*
-		 * if this is CBv2, link the descriptor to an empty
-		 * descriptor
-		 */
-		ASSERT(ring->cr_chan->ic_ver == IOAT_CBv2);
-		desc = (ioat_chan_dma_desc_t *)
-		    &ring->cr_desc[ring->cr_desc_next];
-		prev = (ioat_chan_dma_desc_t *)
-		    &ring->cr_desc[ring->cr_desc_prev];
-
-		desc->dd_ctrl = 0;
-		desc->dd_next_desc = 0x0;
-
-		prev->dd_next_desc = ring->cr_phys_desc +
-		    (ring->cr_desc_next << 6);
-
-		ddi_put16(state->is_reg_handle,
-		    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
-		    (uint16_t)1);
-	}
-
-}
-
-
-/*
- * ioat_cmd_alloc()
- */
-int
-ioat_cmd_alloc(void *private, int flags, dcopy_cmd_t *cmd)
-{
-	ioat_cmd_private_t *priv;
-	ioat_channel_t channel;
-	dcopy_cmd_t oldcmd;
-	int kmflag;
-
-
-	channel = (ioat_channel_t)private;
-
-	if (flags & DCOPY_NOSLEEP) {
-		kmflag = KM_NOSLEEP;
-	} else {
-		kmflag = KM_SLEEP;
-	}
-
-	/* save the command passed incase DCOPY_ALLOC_LINK is set */
-	oldcmd = *cmd;
-
-	*cmd = kmem_cache_alloc(channel->ic_cmd_cache, kmflag);
-	if (*cmd == NULL) {
-		return (DCOPY_NORESOURCES);
-	}
-
-	/* setup the dcopy and ioat private state pointers */
-	(*cmd)->dp_version = DCOPY_CMD_V0;
-	(*cmd)->dp_cmd = 0;
-	(*cmd)->dp_private = (struct dcopy_cmd_priv_s *)
-	    ((uintptr_t)(*cmd) + sizeof (struct dcopy_cmd_s));
-	(*cmd)->dp_private->pr_device_cmd_private =
-	    (struct ioat_cmd_private_s *)((uintptr_t)(*cmd)->dp_private +
-	    sizeof (struct dcopy_cmd_priv_s));
-
-	/*
-	 * if DCOPY_ALLOC_LINK is set, link the old command to the new one
-	 * just allocated.
-	 */
-	priv = (*cmd)->dp_private->pr_device_cmd_private;
-	if (flags & DCOPY_ALLOC_LINK) {
-		priv->ip_next = oldcmd;
-	} else {
-		priv->ip_next = NULL;
-	}
-
-	return (DCOPY_SUCCESS);
-}
-
-
-/*
- * ioat_cmd_free()
- */
-void
-ioat_cmd_free(void *private, dcopy_cmd_t *cmdp)
-{
-	ioat_cmd_private_t *priv;
-	ioat_channel_t channel;
-	dcopy_cmd_t next;
-	dcopy_cmd_t cmd;
-
-
-	channel = (ioat_channel_t)private;
-	cmd = *(cmdp);
-
-	/*
-	 * free all the commands in the chain (see DCOPY_ALLOC_LINK in
-	 * ioat_cmd_alloc() for more info).
-	 */
-	while (cmd != NULL) {
-		priv = cmd->dp_private->pr_device_cmd_private;
-		next = priv->ip_next;
-		kmem_cache_free(channel->ic_cmd_cache, cmd);
-		cmd = next;
-	}
-	*cmdp = NULL;
-}
-
-
-/*
- * ioat_cmd_post()
- */
-int
-ioat_cmd_post(void *private, dcopy_cmd_t cmd)
-{
-	ioat_channel_ring_t *ring;
-	ioat_cmd_private_t *priv;
-	ioat_channel_t channel;
-	ioat_state_t *state;
-	uint64_t dest_paddr;
-	uint64_t src_paddr;
-	uint64_t dest_addr;
-	uint32_t dest_size;
-	uint64_t src_addr;
-	uint32_t src_size;
-	size_t xfer_size;
-	uint32_t ctrl;
-	size_t size;
-	int e;
-
-
-	channel = (ioat_channel_t)private;
-	priv = cmd->dp_private->pr_device_cmd_private;
-
-	state = channel->ic_state;
-	ring = channel->ic_ring;
-
-	mutex_enter(&ring->cr_desc_mutex);
-
-	/* if the channel has had a fatal failure, return failure */
-	if (channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) {
-		mutex_exit(&ring->cr_cmpl_mutex);
-		return (DCOPY_FAILURE);
-	}
-
-	/* make sure we have space for the descriptors */
-	e = ioat_ring_reserve(channel, ring, cmd);
-	if (e != DCOPY_SUCCESS) {
-		mutex_exit(&ring->cr_cmpl_mutex);
-		return (DCOPY_NORESOURCES);
-	}
-
-	/* if we support DCA, and the DCA flag is set, post a DCA desc */
-	if ((channel->ic_ver == IOAT_CBv2) &&
-	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
-		ioat_cmd_post_dca(ring, cmd->dp_dca_id);
-	}
-
-	/*
-	 * the dma copy may have to be broken up into multiple descriptors
-	 * since we can't cross a page boundary.
-	 */
-	ASSERT(cmd->dp_version == DCOPY_CMD_V0);
-	ASSERT(cmd->dp_cmd == DCOPY_CMD_COPY);
-	src_addr = cmd->dp.copy.cc_source;
-	dest_addr = cmd->dp.copy.cc_dest;
-	size = cmd->dp.copy.cc_size;
-	while (size > 0) {
-		src_paddr = pa_to_ma(src_addr);
-		dest_paddr = pa_to_ma(dest_addr);
-
-		/* adjust for any offset into the page */
-		if ((src_addr & PAGEOFFSET) == 0) {
-			src_size = PAGESIZE;
-		} else {
-			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
-		}
-		if ((dest_addr & PAGEOFFSET) == 0) {
-			dest_size = PAGESIZE;
-		} else {
-			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
-		}
-
-		/* take the smallest of the three */
-		xfer_size = MIN(src_size, dest_size);
-		xfer_size = MIN(xfer_size, size);
-
-		/*
-		 * if this is the last descriptor, and we are supposed to
-		 * generate a completion, generate a completion. same logic
-		 * for interrupt.
-		 */
-		ctrl = 0;
-		if (xfer_size == size) {
-			if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
-				ctrl |= IOAT_DESC_CTRL_CMPL;
-			}
-			if ((cmd->dp_flags & DCOPY_CMD_INTR)) {
-				ctrl |= IOAT_DESC_CTRL_INTR;
-			}
-		}
-
-		ioat_cmd_post_copy(ring, src_paddr, dest_paddr, xfer_size,
-		    ctrl);
-
-		/* go to the next page */
-		src_addr += xfer_size;
-		dest_addr += xfer_size;
-		size -= xfer_size;
-	}
-
-	/*
-	 * if we are going to create a completion, save away the state so we
-	 * can poll on it.
-	 */
-	if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
-		priv->ip_generation = ring->cr_desc_gen_prev;
-		priv->ip_index = ring->cr_desc_prev;
-	}
-
-	/* if queue not defined, tell the DMA engine about it */
-	if (!(cmd->dp_flags & DCOPY_CMD_QUEUE)) {
-		if (channel->ic_ver == IOAT_CBv1) {
-			ddi_put8(state->is_reg_handle,
-			    (uint8_t *)&channel->ic_regs[IOAT_V1_CHAN_CMD],
-			    0x2);
-		} else {
-			ASSERT(channel->ic_ver == IOAT_CBv2);
-			ddi_put16(state->is_reg_handle,
-			    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
-			    (uint16_t)(ring->cr_post_cnt & 0xFFFF));
-		}
-	}
-
-	mutex_exit(&ring->cr_desc_mutex);
-
-	return (DCOPY_SUCCESS);
-}
-
-
-/*
- * ioat_cmd_post_dca()
- */
-static void
-ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id)
-{
-	ioat_chan_dca_desc_t *desc;
-	ioat_chan_dca_desc_t *prev;
-	ioat_channel_t channel;
-
-
-	channel = ring->cr_chan;
-	desc = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_next];
-	prev = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
-
-	/* keep track of the number of descs posted for cbv2 */
-	ring->cr_post_cnt++;
-
-	/*
-	 * post a context change desriptor. If dca has never been used on
-	 * this channel, or if the id doesn't match the last id used on this
-	 * channel, set CONTEXT_CHANGE bit and dca id, set dca state to active,
-	 * and save away the id we're using.
-	 */
-	desc->dd_ctrl = IOAT_DESC_CTRL_OP_CNTX;
-	desc->dd_next_desc = 0x0;
-	if (!channel->ic_dca_active || (channel->ic_dca_current != dca_id)) {
-		channel->ic_dca_active = B_TRUE;
-		channel->ic_dca_current = dca_id;
-		desc->dd_ctrl |= IOAT_DESC_CTRL_CNTX_CHNG;
-		desc->dd_cntx = dca_id;
-	}
-
-	/* Put the descriptors physical address in the previous descriptor */
-	/*LINTED:E_TRUE_LOGICAL_EXPR*/
-	ASSERT(sizeof (ioat_chan_dca_desc_t) == 64);
-
-	/* sync the current desc */
-	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
-	    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
-
-	/* update the previous desc and sync it too */
-	prev->dd_next_desc = ring->cr_phys_desc +
-	    (ring->cr_desc_next << 6);
-	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
-	    ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV);
-
-	/* save the current desc_next and desc_last for the completion */
-	ring->cr_desc_prev = ring->cr_desc_next;
-	ring->cr_desc_gen_prev = ring->cr_desc_gen;
-
-	/* increment next/gen so it points to the next free desc */
-	ring->cr_desc_next++;
-	if (ring->cr_desc_next > ring->cr_desc_last) {
-		ring->cr_desc_next = 0;
-		ring->cr_desc_gen++;
-	}
-
-	/*
-	 * if this is CBv2, link the descriptor to an empty descriptor. Since
-	 * we always leave on desc empty to detect full, this works out.
-	 */
-	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
-		desc = (ioat_chan_dca_desc_t *)
-		    &ring->cr_desc[ring->cr_desc_next];
-		prev = (ioat_chan_dca_desc_t *)
-		    &ring->cr_desc[ring->cr_desc_prev];
-		desc->dd_ctrl = 0;
-		desc->dd_next_desc = 0x0;
-
-		prev->dd_next_desc = ring->cr_phys_desc +
-		    (ring->cr_desc_next << 6);
-	}
-}
-
-
-/*
- * ioat_cmd_post_copy()
- *
- */
-static void
-ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
-    uint64_t dest_addr, uint32_t size, uint32_t ctrl)
-{
-	ioat_chan_dma_desc_t *desc;
-	ioat_chan_dma_desc_t *prev;
-	ioat_channel_t channel;
-
-
-	channel = ring->cr_chan;
-	desc = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_next];
-	prev = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
-
-	/* keep track of the number of descs posted for cbv2 */
-	ring->cr_post_cnt++;
-
-	/* write in the DMA desc */
-	desc->dd_ctrl = IOAT_DESC_CTRL_OP_DMA | ctrl;
-	desc->dd_size = size;
-	desc->dd_src_paddr = src_addr;
-	desc->dd_dest_paddr = dest_addr;
-	desc->dd_next_desc = 0x0;
-
-	/* Put the descriptors physical address in the previous descriptor */
-	/*LINTED:E_TRUE_LOGICAL_EXPR*/
-	ASSERT(sizeof (ioat_chan_dma_desc_t) == 64);
-
-	/* sync the current desc */
-	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
-	    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
-
-	/* update the previous desc and sync it too */
-	prev->dd_next_desc = ring->cr_phys_desc +
-	    (ring->cr_desc_next << 6);
-	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
-	    ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV);
-
-	/* increment next/gen so it points to the next free desc */
-	ring->cr_desc_prev = ring->cr_desc_next;
-	ring->cr_desc_gen_prev = ring->cr_desc_gen;
-
-	/* increment next/gen so it points to the next free desc */
-	ring->cr_desc_next++;
-	if (ring->cr_desc_next > ring->cr_desc_last) {
-		ring->cr_desc_next = 0;
-		ring->cr_desc_gen++;
-	}
-
-	/*
-	 * if this is CBv2, link the descriptor to an empty descriptor. Since
-	 * we always leave on desc empty to detect full, this works out.
-	 */
-	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
-		desc = (ioat_chan_dma_desc_t *)
-		    &ring->cr_desc[ring->cr_desc_next];
-		prev = (ioat_chan_dma_desc_t *)
-		    &ring->cr_desc[ring->cr_desc_prev];
-		desc->dd_size = 0;
-		desc->dd_ctrl = 0;
-		desc->dd_next_desc = 0x0;
-
-		prev->dd_next_desc = ring->cr_phys_desc +
-		    (ring->cr_desc_next << 6);
-	}
-}
-
-
-/*
- * ioat_cmd_poll()
- */
-int
-ioat_cmd_poll(void *private, dcopy_cmd_t cmd)
-{
-	ioat_channel_ring_t *ring;
-	ioat_cmd_private_t *priv;
-	ioat_channel_t channel;
-	uint64_t generation;
-	uint64_t last_cmpl;
-
-
-	channel = (ioat_channel_t)private;
-	priv = cmd->dp_private->pr_device_cmd_private;
-
-	ring = channel->ic_ring;
-	ASSERT(ring != NULL);
-
-	mutex_enter(&ring->cr_cmpl_mutex);
-
-	/* if the channel had a fatal failure, fail all polls */
-	if ((channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) ||
-	    IOAT_CMPL_FAILED(channel)) {
-		mutex_exit(&ring->cr_cmpl_mutex);
-		return (DCOPY_FAILURE);
-	}
-
-	/*
-	 * if the current completion is the same as the last time we read one,
-	 * post is still pending, nothing further to do. We track completions
-	 * as indexes into the ring since post uses VAs and the H/W returns
-	 * PAs. We grab a snapshot of generation and last_cmpl in the mutex.
-	 */
-	(void) ddi_dma_sync(channel->ic_cmpl_dma_handle, 0, 0,
-	    DDI_DMA_SYNC_FORCPU);
-	last_cmpl = IOAT_CMPL_INDEX(channel);
-	if (last_cmpl != ring->cr_cmpl_last) {
-		/*
-		 * if we wrapped the ring, increment the generation. Store
-		 * the last cmpl. This logic assumes a physically contiguous
-		 * ring.
-		 */
-		if (last_cmpl < ring->cr_cmpl_last) {
-			ring->cr_cmpl_gen++;
-		}
-		ring->cr_cmpl_last = last_cmpl;
-		generation = ring->cr_cmpl_gen;
-
-	} else {
-		generation = ring->cr_cmpl_gen;
-	}
-
-	mutex_exit(&ring->cr_cmpl_mutex);
-
-	/*
-	 * if cmd isn't passed in, well return.  Useful for updating the
-	 * consumer pointer (ring->cr_cmpl_last).
-	 */
-	if (cmd == NULL) {
-		return (DCOPY_PENDING);
-	}
-
-	/*
-	 * if the post's generation is old, this post has completed. No reason
-	 * to go check the last completion. if the generation is the same
-	 * and if the post is before or = to the last completion processed,
-	 * the post has completed.
-	 */
-	if (priv->ip_generation < generation) {
-		return (DCOPY_COMPLETED);
-	} else if ((priv->ip_generation == generation) &&
-	    (priv->ip_index <= last_cmpl)) {
-		return (DCOPY_COMPLETED);
-	}
-
-	return (DCOPY_PENDING);
-}
-
-
-/*
- * ioat_ring_reserve()
- */
-int
-ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
-    dcopy_cmd_t cmd)
-{
-	uint64_t dest_addr;
-	uint32_t dest_size;
-	uint64_t src_addr;
-	uint32_t src_size;
-	size_t xfer_size;
-	uint64_t desc;
-	int num_desc;
-	size_t size;
-	int i;
-
-
-	/*
-	 * figure out how many descriptors we need. This can include a dca
-	 * desc and multiple desc for a dma copy.
-	 */
-	num_desc = 0;
-	if ((channel->ic_ver == IOAT_CBv2) &&
-	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
-		num_desc++;
-	}
-	src_addr = cmd->dp.copy.cc_source;
-	dest_addr = cmd->dp.copy.cc_dest;
-	size = cmd->dp.copy.cc_size;
-	while (size > 0) {
-		num_desc++;
-
-		/* adjust for any offset into the page */
-		if ((src_addr & PAGEOFFSET) == 0) {
-			src_size = PAGESIZE;
-		} else {
-			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
-		}
-		if ((dest_addr & PAGEOFFSET) == 0) {
-			dest_size = PAGESIZE;
-		} else {
-			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
-		}
-
-		/* take the smallest of the three */
-		xfer_size = MIN(src_size, dest_size);
-		xfer_size = MIN(xfer_size, size);
-
-		/* go to the next page */
-		src_addr += xfer_size;
-		dest_addr += xfer_size;
-		size -= xfer_size;
-	}
-
-	/* Make sure we have space for these descriptors */
-	desc = ring->cr_desc_next;
-	for (i = 0; i < num_desc; i++) {
-
-		/*
-		 * if this is the last descriptor in the ring, see if the
-		 * last completed descriptor is #0.
-		 */
-		if (desc == ring->cr_desc_last) {
-			if (ring->cr_cmpl_last == 0) {
-				/*
-				 * if we think the ring is full, update where
-				 * the H/W really is and check for full again.
-				 */
-				(void) ioat_cmd_poll(channel, NULL);
-				if (ring->cr_cmpl_last == 0) {
-					return (DCOPY_NORESOURCES);
-				}
-			}
-
-			/*
-			 * go to the next descriptor which is zero in this
-			 * case.
-			 */
-			desc = 0;
-
-		/*
-		 * if this is not the last descriptor in the ring, see if
-		 * the last completion we saw was the next descriptor.
-		 */
-		} else {
-			if ((desc + 1) == ring->cr_cmpl_last) {
-				/*
-				 * if we think the ring is full, update where
-				 * the H/W really is and check for full again.
-				 */
-				(void) ioat_cmd_poll(channel, NULL);
-				if ((desc + 1) == ring->cr_cmpl_last) {
-					return (DCOPY_NORESOURCES);
-				}
-			}
-
-			/* go to the next descriptor */
-			desc++;
-		}
-	}
-
-	return (DCOPY_SUCCESS);
-}
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,343 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-#include <sys/errno.h>
-#include <sys/types.h>
-#include <sys/conf.h>
-#include <sys/kmem.h>
-#include <sys/ddi.h>
-#include <sys/stat.h>
-#include <sys/sunddi.h>
-#include <sys/file.h>
-#include <sys/open.h>
-#include <sys/modctl.h>
-#include <sys/ddi_impldefs.h>
-#include <sys/sysmacros.h>
-
-#include <vm/hat.h>
-#include <vm/as.h>
-
-#include <sys/ioat.h>
-
-
-extern void *ioat_statep;
-#define	ptob64(x)	(((uint64_t)(x)) << PAGESHIFT)
-
-static int ioat_ioctl_rdreg(ioat_state_t *state, void *arg, int mode);
-#ifdef	DEBUG
-static int ioat_ioctl_wrreg(ioat_state_t *state, void *arg, int mode);
-static int ioat_ioctl_test(ioat_state_t *state, void *arg, int mode);
-#endif
-
-/*
- * ioat_ioctl()
- */
-/*ARGSUSED*/
-int
-ioat_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
-{
-	ioat_state_t *state;
-	int instance;
-	int e;
-
-
-	e = drv_priv(cred);
-	if (e != 0) {
-		return (EPERM);
-	}
-	instance = getminor(dev);
-	if (instance == -1) {
-		return (EBADF);
-	}
-	state = ddi_get_soft_state(ioat_statep, instance);
-	if (state == NULL) {
-		return (EBADF);
-	}
-
-	switch (cmd) {
-	case IOAT_IOCTL_READ_REG:
-		e = ioat_ioctl_rdreg(state, (void *)arg, mode);
-		break;
-#ifdef	DEBUG
-	case IOAT_IOCTL_WRITE_REG:
-		e = ioat_ioctl_wrreg(state, (void *)arg, mode);
-		break;
-	case IOAT_IOCTL_TEST:
-		e = ioat_ioctl_test(state, (void *)arg, mode);
-		break;
-#endif
-
-	default:
-		e = ENXIO;
-	}
-
-	return (e);
-}
-
-
-/*
- * ioat_ioctl_rdreg()
- */
-static int
-ioat_ioctl_rdreg(ioat_state_t *state, void *arg, int mode)
-{
-	ioat_ioctl_rdreg_t rdreg;
-	int e;
-
-
-	e = ddi_copyin(arg, &rdreg, sizeof (ioat_ioctl_rdreg_t), mode);
-	if (e != 0) {
-		return (EFAULT);
-	}
-
-	/*
-	 * read a device register, where size is read size in bits, addr is
-	 * the offset into MMIO registers.
-	 */
-	switch (rdreg.size) {
-	case 8:
-		rdreg.data = (uint64_t)ddi_get8(state->is_reg_handle,
-		    (uint8_t *)&state->is_genregs[rdreg.addr]);
-		break;
-	case 16:
-		rdreg.data = (uint64_t)ddi_get16(state->is_reg_handle,
-		    (uint16_t *)&state->is_genregs[rdreg.addr]);
-		break;
-	case 32:
-		rdreg.data = (uint64_t)ddi_get32(state->is_reg_handle,
-		    (uint32_t *)&state->is_genregs[rdreg.addr]);
-		break;
-	case 64:
-		rdreg.data = (uint64_t)ddi_get64(state->is_reg_handle,
-		    (uint64_t *)&state->is_genregs[rdreg.addr]);
-		break;
-	default:
-		return (EFAULT);
-	}
-
-	e = ddi_copyout(&rdreg, arg, sizeof (ioat_ioctl_rdreg_t), mode);
-	if (e != 0) {
-		return (EFAULT);
-	}
-
-	return (0);
-}
-
-
-#ifdef	DEBUG
-/*
- * ioat_ioctl_wrreg()
- */
-static int
-ioat_ioctl_wrreg(ioat_state_t *state, void *arg, int mode)
-{
-	ioat_ioctl_wrreg_t wrreg;
-	int e;
-
-
-	e = ddi_copyin(arg, &wrreg, sizeof (ioat_ioctl_wrreg_t), mode);
-	if (e != 0) {
-		return (EFAULT);
-	}
-
-	/*
-	 * write a device register, where size is write size in bits, addr is
-	 * the offset into MMIO registers.
-	 */
-	switch (wrreg.size) {
-	case 8:
-		ddi_put8(state->is_reg_handle,
-		    (uint8_t *)&state->is_genregs[wrreg.addr],
-		    (uint8_t)wrreg.data);
-		break;
-	case 16:
-		ddi_put16(state->is_reg_handle,
-		    (uint16_t *)&state->is_genregs[wrreg.addr],
-		    (uint16_t)wrreg.data);
-		break;
-	case 32:
-		ddi_put32(state->is_reg_handle,
-		    (uint32_t *)&state->is_genregs[wrreg.addr],
-		    (uint32_t)wrreg.data);
-		break;
-	case 64:
-		ddi_put64(state->is_reg_handle,
-		    (uint64_t *)&state->is_genregs[wrreg.addr],
-		    (uint64_t)wrreg.data);
-		break;
-	default:
-		return (EFAULT);
-	}
-
-	return (0);
-}
-
-
-/*
- * ioat_ioctl_test()
- */
-/*ARGSUSED*/
-static int
-ioat_ioctl_test(ioat_state_t *state, void *arg, int mode)
-{
-	dcopy_handle_t channel;
-	dcopy_cmd_t cmd;
-	uint8_t *source;
-	uint_t buf_size;
-	uint_t poll_cnt;
-	uint8_t *dest;
-	uint8_t *buf;
-	int flags;
-	int i;
-	int e;
-
-
-	/* allocate 2 paged aligned 4k pages */
-	buf_size = 0x1000;
-	buf = kmem_zalloc((buf_size * 2) + 0x1000, KM_SLEEP);
-	source = (uint8_t *)(((uintptr_t)buf + PAGEOFFSET) & PAGEMASK);
-	dest = source + buf_size;
-
-	/* Init source buffer */
-	for (i = 0; i < buf_size; i++) {
-		source[i] = (uint8_t)(i & 0xFF);
-	}
-
-	/* allocate a DMA channel */
-	e = dcopy_alloc(DCOPY_SLEEP, &channel);
-	if (e != DCOPY_SUCCESS) {
-		cmn_err(CE_CONT, "dcopy_alloc() failed\n");
-		goto testfail_alloc;
-	}
-
-	/*
-	 * post 32 DMA copy's from dest to dest.  These will complete in order
-	 * so they won't stomp on each other. We don't care about the data
-	 * right now which is why we go dest to dest.
-	 */
-	flags = DCOPY_SLEEP;
-	for (i = 0; i < 32; i++) {
-		/*
-		 * if this is the second command, link the commands from here
-		 * on out. We only want to keep track of the last command. We
-		 * will poll on the last command completing (which infers that
-		 * the other commands completed). If any of the previous
-		 * commands fail, so will the last one. Linking the commands
-		 * also allows us to only call free for the last command. free
-		 * will free up the entire chain of commands.
-		 */
-		if (i == 1) {
-			flags |= DCOPY_ALLOC_LINK;
-		}
-		e = dcopy_cmd_alloc(channel, flags, &cmd);
-		if (e != DCOPY_SUCCESS) {
-			cmn_err(CE_CONT, "dcopy_cmd_alloc() failed\n");
-			goto testfail_alloc;
-		}
-
-		ASSERT(cmd->dp_version == DCOPY_CMD_V0);
-		cmd->dp_cmd = DCOPY_CMD_COPY;
-		cmd->dp_flags = DCOPY_CMD_NOFLAGS;
-
-		/* do a bunch of dest to dest DMA's */
-		cmd->dp.copy.cc_source = ptob64(hat_getpfnum(kas.a_hat,
-		    (caddr_t)source)) + ((uintptr_t)dest & PAGEOFFSET);
-		cmd->dp.copy.cc_dest = ptob64(hat_getpfnum(kas.a_hat,
-		    (caddr_t)dest)) + ((uintptr_t)dest & PAGEOFFSET);
-		cmd->dp.copy.cc_size = PAGESIZE;
-
-		e = dcopy_cmd_post(cmd);
-		if (e != DCOPY_SUCCESS) {
-			cmn_err(CE_CONT, "dcopy_post() failed\n");
-			goto testfail_post;
-		}
-	}
-
-	e = dcopy_cmd_alloc(channel, flags, &cmd);
-	if (e != DCOPY_SUCCESS) {
-		cmn_err(CE_CONT, "dcopy_cmd_alloc() failed\n");
-		goto testfail_alloc;
-	}
-
-	/* now queue up the DMA we are going to check status and data for  */
-	cmd->dp_cmd = DCOPY_CMD_COPY;
-	cmd->dp_flags = DCOPY_CMD_INTR;
-	cmd->dp.copy.cc_source = ptob64(hat_getpfnum(kas.a_hat,
-	    (caddr_t)source)) + ((uintptr_t)source & PAGEOFFSET);
-	cmd->dp.copy.cc_dest = ptob64(hat_getpfnum(kas.a_hat,
-	    (caddr_t)dest)) + ((uintptr_t)dest & PAGEOFFSET);
-	cmd->dp.copy.cc_size = PAGESIZE;
-	e = dcopy_cmd_post(cmd);
-	if (e != DCOPY_SUCCESS) {
-		cmn_err(CE_CONT, "dcopy_post() failed\n");
-		goto testfail_post;
-	}
-
-	/* check the status of the last command */
-	poll_cnt = 0;
-	flags = DCOPY_POLL_NOFLAGS;
-	while ((e = dcopy_cmd_poll(cmd, flags)) == DCOPY_PENDING) {
-		poll_cnt++;
-		if (poll_cnt >= 16) {
-			flags |= DCOPY_POLL_BLOCK;
-		}
-	}
-	if (e != DCOPY_COMPLETED) {
-		cmn_err(CE_CONT, "dcopy_poll() failed\n");
-		goto testfail_poll;
-	}
-
-	/* since the cmd's are linked we only need to pass in the last cmd */
-	dcopy_cmd_free(&cmd);
-	dcopy_free(&channel);
-
-	/* verify the data */
-	for (i = 0; i < PAGESIZE; i++) {
-		if (dest[i] != (uint8_t)(i & 0xFF)) {
-			cmn_err(CE_CONT,
-			    "dcopy_data_compare() failed, %p[%d]: %x, %x\n",
-			    (void *)dest, i, dest[i], i & 0xFF);
-			return (-1);
-		}
-	}
-
-	kmem_free(buf, (buf_size * 2) + 0x1000);
-
-	return (0);
-
-testfail_data_compare:
-testfail_poll:
-testfail_post:
-	dcopy_cmd_free(&cmd);
-	dcopy_free(&channel);
-testfail_alloc:
-	kmem_free(buf, (buf_size * 2) + 0x1000);
-
-	return (-1);
-}
-#endif
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_rs.c	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,246 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-#include <sys/kmem.h>
-#include <sys/types.h>
-#include <sys/conf.h>
-#include <sys/ddi.h>
-#include <sys/sunddi.h>
-
-#include <sys/ioat.h>
-
-
-/* structure used to keep track of resources */
-typedef struct ioat_rs_s {
-	/*
-	 * Bounds of resource allocation. We will start allocating at rs_min
-	 * and rollover at rs_max+1 (rs_max is included). e.g. for rs_min=0
-	 * and rs_max=7, we will have 8 total resources which can be alloced.
-	 */
-	uint_t rs_min;
-	uint_t rs_max;
-
-	/*
-	 * rs_free points to an array of 64-bit values used to track resource
-	 * allocation. rs_free_size is the free buffer size in bytes.
-	 */
-	uint64_t *rs_free;
-	uint_t rs_free_size;
-
-	/*
-	 * last tracks the last alloc'd resource. This allows us to do a round
-	 * robin allocation.
-	 */
-	uint_t rs_last;
-
-	kmutex_t rs_mutex;
-} ioat_rs_t;
-
-
-/*
- * ioat_rs_init()
- *    Initialize the resource structure. This structure will be protected
- *    by a mutex at the iblock_cookie passed in. init() returns a handle to be
- *    used for the rest of the resource functions. This code is written assuming
- *    that min_val will be close to 0. Therefore, we will allocate the free
- *    buffer only taking max_val into account.
- */
-void
-ioat_rs_init(ioat_state_t *state, uint_t min_val, uint_t max_val,
-    ioat_rs_hdl_t *handle)
-{
-	ioat_rs_t *rstruct;
-	uint_t array_size;
-	uint_t index;
-
-
-	ASSERT(handle != NULL);
-	ASSERT(min_val < max_val);
-
-	/* alloc space for resource structure */
-	rstruct = kmem_alloc(sizeof (ioat_rs_t), KM_SLEEP);
-
-	/*
-	 * Test to see if the max value is 64-bit aligned. If so, we don't need
-	 * to allocate an extra 64-bit word. alloc space for free buffer
-	 * (8 bytes per uint64_t).
-	 */
-	if ((max_val & 0x3F) == 0) {
-		rstruct->rs_free_size = (max_val >> 6) * 8;
-	} else {
-		rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
-	}
-	rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
-
-	/* Initialize resource structure */
-	rstruct->rs_min = min_val;
-	rstruct->rs_last = min_val;
-	rstruct->rs_max = max_val;
-	mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER,
-	    state->is_iblock_cookie);
-
-	/* Mark all resources as free */
-	array_size = rstruct->rs_free_size >> 3;
-	for (index = 0; index < array_size; index++) {
-		rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
-	}
-
-	/* setup handle which is returned from this function */
-	*handle = rstruct;
-}
-
-
-/*
- * ioat_rs_fini()
- *    Frees up the space allocated in init().  Notice that a pointer to the
- *    handle is used for the parameter.  fini() will set the handle to NULL
- *    before returning.
- */
-void
-ioat_rs_fini(ioat_rs_hdl_t *handle)
-{
-	ioat_rs_t *rstruct;
-
-
-	ASSERT(handle != NULL);
-
-	rstruct = (ioat_rs_t *)*handle;
-
-	mutex_destroy(&rstruct->rs_mutex);
-	kmem_free(rstruct->rs_free, rstruct->rs_free_size);
-	kmem_free(rstruct, sizeof (ioat_rs_t));
-
-	/* set handle to null.  This helps catch bugs. */
-	*handle = NULL;
-}
-
-
-/*
- * ioat_rs_alloc()
- *    alloc a resource. If alloc fails, we are out of resources.
- */
-int
-ioat_rs_alloc(ioat_rs_hdl_t handle, uint_t *resource)
-{
-	ioat_rs_t *rstruct;
-	uint_t array_idx;
-	uint64_t free;
-	uint_t index;
-	uint_t last;
-	uint_t min;
-	uint_t max;
-
-
-	ASSERT(handle != NULL);
-	ASSERT(resource != NULL);
-
-	rstruct = (ioat_rs_t *)handle;
-
-	mutex_enter(&rstruct->rs_mutex);
-	min = rstruct->rs_min;
-	max = rstruct->rs_max;
-
-	/*
-	 * Find a free resource. This will return out of the loop once it finds
-	 * a free resource. There are a total of 'max'-'min'+1 resources.
-	 * Performs a round robin allocation.
-	 */
-	for (index = min; index <= max; index++) {
-
-		array_idx = rstruct->rs_last >> 6;
-		free = rstruct->rs_free[array_idx];
-		last = rstruct->rs_last & 0x3F;
-
-		/* if the next resource to check is free */
-		if ((free & ((uint64_t)1 << last)) != 0) {
-			/* we are using this resource */
-			*resource = rstruct->rs_last;
-
-			/* take it out of the free list */
-			rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
-
-			/*
-			 * increment the last count so we start checking the
-			 * next resource on the next alloc().  Note the rollover
-			 * at 'max'+1.
-			 */
-			rstruct->rs_last++;
-			if (rstruct->rs_last > max) {
-				rstruct->rs_last = rstruct->rs_min;
-			}
-
-			/* unlock the resource structure */
-			mutex_exit(&rstruct->rs_mutex);
-
-			return (DDI_SUCCESS);
-		}
-
-		/*
-		 * This resource is not free, lets go to the next one. Note the
-		 * rollover at 'max'.
-		 */
-		rstruct->rs_last++;
-		if (rstruct->rs_last > max) {
-			rstruct->rs_last = rstruct->rs_min;
-		}
-	}
-
-	mutex_exit(&rstruct->rs_mutex);
-
-	return (DDI_FAILURE);
-}
-
-
-/*
- * ioat_rs_free()
- *    Free the previously alloc'd resource.  Once a resource has been free'd,
- *    it can be used again when alloc is called.
- */
-void
-ioat_rs_free(ioat_rs_hdl_t handle, uint_t resource)
-{
-	ioat_rs_t *rstruct;
-	uint_t array_idx;
-	uint_t offset;
-
-
-	ASSERT(handle != NULL);
-
-	rstruct = (ioat_rs_t *)handle;
-	ASSERT(resource >= rstruct->rs_min);
-	ASSERT(resource <= rstruct->rs_max);
-
-	mutex_enter(&rstruct->rs_mutex);
-
-	/* Put the resource back in the free list */
-	array_idx = resource >> 6;
-	offset = resource & 0x3F;
-	rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
-
-	mutex_exit(&rstruct->rs_mutex);
-}
--- a/deleted_files/usr/src/uts/i86pc/ioat/Makefile	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# uts/i86pc/ioat/Makefile
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-#
-#	This makefile drives the production of the ioat driver kernel
-#	module.
-#
-
-#
-#	Path to the base of the uts directory tree (usually /usr/src/uts).
-#
-UTSBASE	= ../..
-
-#
-#	Define the module and object file sets.
-#
-MODULE		= ioat
-OBJECTS		= $(IOAT_OBJS:%=$(OBJS_DIR)/%)
-LINTS		= $(IOAT_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE	= $(ROOT_PSM_DRV_DIR)/$(MODULE)
-CONF_SRCDIR     = $(UTSBASE)/i86pc/io/ioat
-
-#
-#	Include common rules.
-#
-include $(UTSBASE)/i86pc/Makefile.i86pc
-
-#
-#	Define targets
-#
-ALL_TARGET	= $(BINARY) $(SRC_CONFILE)
-LINT_TARGET	= $(MODULE).lint
-INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
-
-LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
-
-#
-#	Dependency
-#
-LDFLAGS		+= -dy -Nmisc/dcopy
-
-#
-#	Override defaults to build a unique, local modstubs.o.
-#
-MODSTUBS_DIR	 = $(OBJS_DIR)
-CLEANFILES	+= $(MODSTUBS_O)
-
-#
-#	Default build targets.
-#
-.KEEP_STATE:
-
-def:		$(DEF_DEPS)
-
-all:		$(ALL_DEPS)
-
-clean:		$(CLEAN_DEPS)
-
-clobber:	$(CLOBBER_DEPS)
-
-lint:		$(LINT_DEPS)
-
-modlintlib:	$(MODLINTLIB_DEPS)
-
-clean.lint:	$(CLEAN_LINT_DEPS)
-
-install:	$(INSTALL_DEPS)
-
-#
-#	Include common targets.
-#
-include $(UTSBASE)/i86pc/Makefile.targ
-
--- a/deleted_files/usr/src/uts/i86pc/sys/ioat.h	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,359 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SYS_IOAT_H
-#define	_SYS_IOAT_H
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/types.h>
-#include <sys/dcopy.h>
-#include <sys/dcopy_device.h>
-
-
-/* ioat ioctls */
-#define	IOATIOC			('T'<< 8)
-typedef enum {
-	IOAT_IOCTL_WRITE_REG	= (IOATIOC | 0x0),
-	IOAT_IOCTL_READ_REG	= (IOATIOC | 0x1),
-	IOAT_IOCTL_TEST		= (IOATIOC | 0x2)
-} ioat_ioctl_enum_t;
-
-typedef struct ioat_ioctl_reg_s {
-	uint_t		size;
-	uint_t		addr;
-	uint64_t	data;
-} ioat_ioctl_reg_t;
-typedef ioat_ioctl_reg_t ioat_ioctl_wrreg_t;
-typedef ioat_ioctl_reg_t ioat_ioctl_rdreg_t;
-
-#ifdef _KERNEL
-/* *** Driver Private Below *** */
-
-/* IOAT_DMACAPABILITY flags */
-#define	IOAT_DMACAP_PAGEBREAK	0x1
-#define	IOAT_DMACAP_CRC		0x2
-#define	IOAT_DMACAP_MARKERSKIP	0x4
-#define	IOAT_DMACAP_XOR		0x8
-#define	IOAT_DMACAP_DCA		0x10
-
-/* IOAT_INTRCTL bits */
-#define	IOAT_INTRCTL_MASTER_EN	0x1
-#define	IOAT_INTRCTL_INTR_STAT	0x2
-
-/* MMIO Registers */
-#define	IOAT_CHANCNT		0x0	/* 8-bit */
-#define	IOAT_XFERCAP		0x1	/* 8-bit */
-#define	IOAT_GENCTRL		0x2	/* 8-bit */
-#define	IOAT_INTRCTL		0x3	/* 8-bit */
-#define	IOAT_ATTNSTATUS		0x4	/* 32-bit */
-#define	IOAT_CBVER		0x8	/* 8-bit */
-#define	IOAT_PERPORT_OFF	0xA	/* 16-bit */
-#define	IOAT_INTRDELAY		0xC	/* 16-bit */
-#define	IOAT_CSSTATUS		0xE	/* 16-bit */
-#define	IOAT_DMACAPABILITY	0x10	/* 32-bit */
-
-#define	IOAT_CHANNELREG_OFFSET	0x80
-
-/* Channel Registers */
-#define	IOAT_CHAN_CTL		0x0	/* 16-bit */
-#define	IOAT_CHAN_COMP		0x2	/* 16-bit */
-#define	IOAT_CHAN_CMPL_LO	0x18	/* 32-bit */
-#define	IOAT_CHAN_CMPL_HI	0x1C	/* 32-bit */
-#define	IOAT_CHAN_ERR		0x28	/* 32-bit */
-#define	IOAT_CHAN_ERRMASK	0x2C	/* 32-bit */
-#define	IOAT_CHAN_DCACTRL	0x30	/* 32-bit */
-
-#define	IOAT_V1_CHAN_STS_LO	0x4	/* 32-bit */
-#define	IOAT_V1_CHAN_STS_HI	0x8	/* 32-bit */
-#define	IOAT_V1_CHAN_ADDR_LO	0x0C	/* 32-bit */
-#define	IOAT_V1_CHAN_ADDR_HI	0x10	/* 32-bit */
-#define	IOAT_V1_CHAN_CMD	0x14	/* 8-bit */
-
-#define	IOAT_V2_CHAN_CMD	0x4	/* 8-bit */
-#define	IOAT_V2_CHAN_CNT	0x6	/* 16-bit */
-#define	IOAT_V2_CHAN_STS_LO	0x8	/* 32-bit */
-#define	IOAT_V2_CHAN_STS_HI	0xC	/* 32-bit */
-#define	IOAT_V2_CHAN_ADDR_LO	0x10	/* 32-bit */
-#define	IOAT_V2_CHAN_ADDR_HI	0x14	/* 32-bit */
-
-#define	IOAT_CHAN_STS_ADDR_MASK		0xFFFFFFFFFFFFFFC0
-#define	IOAT_CHAN_STS_XFER_MASK		0x3F
-#define	IOAT_CHAN_STS_FAIL_MASK		0x6
-#define	IOAT_CMPL_INDEX(channel)	\
-	(((*channel->ic_cmpl & IOAT_CHAN_STS_ADDR_MASK) - \
-	ring->cr_phys_desc) >> 6)
-#define	IOAT_CMPL_FAILED(channel)	\
-	(*channel->ic_cmpl & IOAT_CHAN_STS_FAIL_MASK)
-
-
-typedef struct ioat_chan_desc_s {
-	uint32_t	dd_res0;
-	uint32_t	dd_ctrl;
-	uint64_t	dd_res1;
-	uint64_t	dd_res2;
-	uint64_t	dd_next_desc;
-	uint64_t	dd_res4;
-	uint64_t	dd_res5;
-	uint64_t	dd_res6;
-	uint64_t	dd_res7;
-} ioat_chan_desc_t;
-
-/* dca dd_ctrl bits */
-#define	IOAT_DESC_CTRL_OP_CNTX	((uint32_t)0xFF << 24)
-#define	IOAT_DESC_CTRL_CNTX_CHNG	0x1
-typedef struct ioat_chan_dca_desc_s {
-	uint32_t	dd_cntx;
-	uint32_t	dd_ctrl;
-	uint64_t	dd_res1;
-	uint64_t	dd_res2;
-	uint64_t	dd_next_desc;
-	uint64_t	dd_res4;
-	uint64_t	dd_res5;
-	uint64_t	dd_res6;
-	uint64_t	dd_res7;
-} ioat_chan_dca_desc_t;
-
-/* dma dd_ctrl bits */
-#define	IOAT_DESC_CTRL_OP_DMA	(0x0 << 24)
-#define	IOAT_DESC_DMACTRL_NULL	0x20
-#define	IOAT_DESC_CTRL_FENCE	0x10
-#define	IOAT_DESC_CTRL_CMPL	0x8
-#define	IOAT_DESC_CTRL_INTR	0x1
-typedef struct ioat_chan_dma_desc_s {
-	uint32_t	dd_size;
-	uint32_t	dd_ctrl;
-	uint64_t	dd_src_paddr;
-	uint64_t	dd_dest_paddr;
-	uint64_t	dd_next_desc;
-	uint64_t	dd_next_src_paddr;	/* v2 only */
-	uint64_t	dd_next_dest_paddr;	/* v2 only */
-	uint64_t	dd_res6;
-	uint64_t	dd_res7;
-} ioat_chan_dma_desc_t;
-
-
-typedef enum {
-	IOAT_CBv1,
-	IOAT_CBv2
-} ioat_version_t;
-
-/* ioat private data per command */
-typedef struct ioat_cmd_private_s {
-	uint64_t	ip_generation;
-	uint64_t	ip_index;
-	dcopy_cmd_t	ip_next;
-} ioat_cmd_private_t;
-
-/* descriptor ring state */
-typedef struct ioat_channel_ring_s {
-	/* protects cr_cmpl_gen & cr_cmpl_last */
-	kmutex_t		cr_cmpl_mutex;
-
-	/* desc ring generation for the last completion we saw */
-	uint64_t		cr_cmpl_gen;
-
-	/* last descriptor index we saw complete */
-	uint64_t		cr_cmpl_last;
-
-	/* protects cr_desc_* */
-	kmutex_t		cr_desc_mutex;
-
-	/*
-	 * last descriptor posted. used to update its next pointer when we
-	 * add a new desc. Also used to tack the completion (See comment for
-	 * cr_desc_gen_prev).
-	 */
-	uint64_t		cr_desc_prev;
-
-	/* where to put the next descriptor */
-	uint64_t		cr_desc_next;
-
-	/* what the current desc ring generation is */
-	uint64_t		cr_desc_gen;
-
-	/*
-	 * used during cmd_post to track the last desc posted. cr_desc_next
-	 * and cr_desc_gen will be pointing to the next free desc after
-	 * writing the descriptor to the ring. But we want to track the
-	 * completion for the last descriptor posted.
-	 */
-	uint64_t		cr_desc_gen_prev;
-
-	/* the last desc in the ring (for wrap) */
-	uint64_t		cr_desc_last;
-
-	/* pointer to the head of the ring */
-	ioat_chan_desc_t	*cr_desc;
-
-	/* physical address of the head of the ring */
-	uint64_t		cr_phys_desc;
-
-	/* back pointer to the channel state */
-	struct ioat_channel_s	*cr_chan;
-
-	/* for CB v2, number of desc posted (written to IOAT_V2_CHAN_CNT) */
-	uint_t			cr_post_cnt;
-} ioat_channel_ring_t;
-
-/* track channel state so we can handle a failure */
-typedef enum {
-	IOAT_CHANNEL_OK = 0,
-	IOAT_CHANNEL_IN_FAILURE = 1
-} ic_channel_state_t;
-
-typedef struct ioat_channel_s *ioat_channel_t;
-struct ioat_channel_s {
-	/* channel's ring state */
-	ioat_channel_ring_t	*ic_ring;
-
-	/* IOAT_CBv1 || IOAT_CBv2 */
-	ioat_version_t		ic_ver;
-
-	/*
-	 * state to determine if it's OK to post the the channel and if all
-	 * future polls should return failure.
-	 */
-	ic_channel_state_t	ic_channel_state;
-
-	/* channel command cache (*_cmd_alloc, *_cmd_free, etc) */
-	kmem_cache_t		*ic_cmd_cache;
-
-	/* dcopy state for dcopy_device_channel_notify() call */
-	dcopy_handle_t		ic_dcopy_handle;
-
-	/* location in memory where completions are DMA'ed into */
-	volatile uint64_t	*ic_cmpl;
-
-	/* channel specific registers */
-	uint8_t			*ic_regs;
-
-	/* if this channel is using DCA */
-	boolean_t		ic_dca_active;
-
-	/* DCA ID the channel is currently pointing to */
-	uint32_t		ic_dca_current;
-
-	/* devices channel number */
-	uint_t			ic_chan_num;
-
-	/* number of descriptors in ring */
-	uint_t			ic_chan_desc_cnt;
-
-	/* descriptor ring alloc state */
-	ddi_dma_handle_t	ic_desc_dma_handle;
-	size_t			ic_desc_alloc_size;
-	ddi_acc_handle_t	ic_desc_handle;
-	ddi_dma_cookie_t	ic_desc_cookies;
-
-	/* completion buffer alloc state */
-	ddi_dma_handle_t	ic_cmpl_dma_handle;
-	size_t			ic_cmpl_alloc_size;
-	ddi_acc_handle_t	ic_cmpl_handle;
-	ddi_dma_cookie_t	ic_cmpl_cookie;
-	uint64_t		ic_phys_cmpl;
-
-	/* if inuse, we need to re-init the channel during resume */
-	boolean_t		ic_inuse;
-
-	/* backpointer to driver state */
-	struct ioat_state_s	*ic_state;
-};
-
-typedef struct ioat_rs_s *ioat_rs_hdl_t;
-
-/* driver state */
-typedef struct ioat_state_s {
-	dev_info_t		*is_dip;
-	int			is_instance;
-
-	kmutex_t		is_mutex;
-
-	/* register handle and pointer to registers */
-	ddi_acc_handle_t	is_reg_handle;
-	uint8_t			*is_genregs;
-
-	/* IOAT_CBv1 || IOAT_CBv2 */
-	ioat_version_t		is_ver;
-
-	/* channel state */
-	ioat_channel_t		is_channel;
-	size_t			is_chansize;
-	ioat_rs_hdl_t		is_channel_rs;
-
-	ddi_iblock_cookie_t	is_iblock_cookie;
-
-	/* device info */
-	uint_t			is_chanoff;
-	uint_t			is_num_channels;
-	uint_t			is_maxxfer;
-	uint_t			is_cbver;
-	uint_t			is_intrdelay;
-	uint_t			is_status;
-	uint_t			is_capabilities;
-
-	/* dcopy_device_register()/dcopy_device_unregister() state */
-	dcopy_device_handle_t	is_device_handle;
-	dcopy_device_info_t	is_deviceinfo;
-} ioat_state_t;
-
-
-int ioat_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
-    int *rval);
-
-void ioat_rs_init(ioat_state_t *state, uint_t min_val, uint_t max_val,
-    ioat_rs_hdl_t *handle);
-void ioat_rs_fini(ioat_rs_hdl_t *handle);
-int ioat_rs_alloc(ioat_rs_hdl_t handle, uint_t *rs);
-void ioat_rs_free(ioat_rs_hdl_t handle, uint_t rs);
-
-int ioat_channel_init(ioat_state_t *state);
-void ioat_channel_fini(ioat_state_t *state);
-void ioat_channel_suspend(ioat_state_t *state);
-int ioat_channel_resume(ioat_state_t *state);
-
-int ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags,
-    uint_t size, dcopy_query_channel_t *info, void *channel_private);
-void ioat_channel_free(void *channel_private);
-void ioat_channel_intr(ioat_channel_t channel);
-int ioat_cmd_alloc(void *channel, int flags, dcopy_cmd_t *cmd);
-void ioat_cmd_free(void *channel, dcopy_cmd_t *cmd);
-int ioat_cmd_post(void *channel, dcopy_cmd_t cmd);
-int ioat_cmd_poll(void *channel, dcopy_cmd_t cmd);
-void ioat_unregister_complete(void *device_private, int status);
-
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_IOAT_H */
--- a/deleted_files/usr/src/uts/i86xpv/ioat/Makefile	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# uts/i86xpv/ioat/Makefile
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-#
-#	This makefile drives the production of the ioat driver kernel
-#	module.
-#
-
-#
-#	Path to the base of the uts directory tree (usually /usr/src/uts).
-#
-UTSBASE	= ../..
-
-#
-#	Define the module and object file sets.
-#
-MODULE		= ioat
-OBJECTS		= $(IOAT_OBJS:%=$(OBJS_DIR)/%)
-LINTS		= $(IOAT_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE	= $(ROOT_PSM_DRV_DIR)/$(MODULE)
-CONF_SRCDIR     = $(UTSBASE)/i86pc/io/ioat
-
-#
-#	Include common rules.
-#
-include $(UTSBASE)/i86xpv/Makefile.i86xpv
-
-#
-#	Define targets
-#
-ALL_TARGET	= $(BINARY) $(SRC_CONFILE)
-LINT_TARGET	= $(MODULE).lint
-INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
-
-LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
-
-#
-#	Dependency
-#
-LDFLAGS		+= -dy -Nmisc/dcopy
-
-#
-#	Override defaults to build a unique, local modstubs.o.
-#
-MODSTUBS_DIR	 = $(OBJS_DIR)
-CLEANFILES	+= $(MODSTUBS_O)
-
-#
-#	Default build targets.
-#
-.KEEP_STATE:
-
-def:		$(DEF_DEPS)
-
-all:		$(ALL_DEPS)
-
-clean:		$(CLEAN_DEPS)
-
-clobber:	$(CLOBBER_DEPS)
-
-lint:		$(LINT_DEPS)
-
-modlintlib:	$(MODLINTLIB_DEPS)
-
-clean.lint:	$(CLEAN_LINT_DEPS)
-
-install:	$(INSTALL_DEPS)
-
-#
-#	Include common targets.
-#
-include $(UTSBASE)/i86xpv/Makefile.targ
-
--- a/deleted_files/usr/src/uts/intel/dcopy/Makefile	Fri May 23 18:47:44 2008 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-#
-# uts/intel/dcopy/Makefile
-#
-# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
-# Use is subject to license terms.
-#
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
-#	This makefile drives the production of the dcopy
-#	kernel module.
-#
-#	intel architecture dependent
-#
-
-#
-#	Path to the base of the uts directory tree (usually /usr/src/uts).
-#
-UTSBASE	= ../..
-
-#
-#	Define the module and object file sets.
-#
-MODULE		= dcopy
-OBJECTS		= $(DCOPY_OBJS:%=$(OBJS_DIR)/%)
-LINTS		= $(DCOPY_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE	= $(ROOT_MISC_DIR)/$(MODULE)
-
-#
-#	Include common rules.
-#
-include $(UTSBASE)/intel/Makefile.intel
-
-#
-#	Define targets
-#
-ALL_TARGET	= $(BINARY)
-LINT_TARGET	= $(MODULE).lint
-INSTALL_TARGET	= $(BINARY) $(ROOTMODULE)
-
-#
-#	Default build targets.
-#
-.KEEP_STATE:
-
-def:		$(DEF_DEPS)
-
-all:		$(ALL_DEPS)
-
-clean:		$(CLEAN_DEPS)
-
-clobber:	$(CLOBBER_DEPS)
-
-lint:		$(LINT_DEPS)
-
-modlintlib:	$(MODLINTLIB_DEPS)
-
-clean.lint:	$(CLEAN_LINT_DEPS)
-
-install:	$(INSTALL_DEPS)
-
-#
-#	Include common targets.
-#
-include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/pkgdefs/Makefile	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/pkgdefs/Makefile	Fri May 23 20:14:10 2008 -0700
@@ -125,6 +125,7 @@
 	SUNWgrub \
 	SUNWgrubS \
 	SUNWhxge \
+	SUNWdcopy \
 	SUNWipw \
 	SUNWiwi \
 	SUNWiwk \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWdcopy/Makefile	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+ 
+include ../Makefile.com
+
+TMPLFILES += postinstall preremove
+DATAFILES += depend
+
+.KEEP_STATE:
+
+all: $(FILES)
+install: all pkg
+
+include ../Makefile.targ
+include ../Makefile.prtarg
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWdcopy"
+NAME="Sun dcopy DMA drivers"
+ARCH="i386"
+CATEGORY="system"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKGTYPE="root"
+CLASSES="none"
+DESC="Sun dcopy DMA drivers"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+VERSION="ONVERS,REV=0.0.0"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+MAXINST="1000"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="true"
+SUNW_PKG_THISZONE="false"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,33 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include drv_utils
+
+CB1='"pciex8086,1a38" "pciex8086,360b"'
+CB2='"pciex8086,402f"'
+
+pkg_drvadd -i "'$CB1 $CB2'" ioat || exit 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,31 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+include drv_utils
+
+pkg_drvrem ioat || exit 1
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWdcopy/prototype_com	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+
+#
+# packaging files
+i copyright
+i depend
+i pkginfo
+i postinstall
+i preremove
+
+#
+# source locations relative to the prototype file
+#
+#
+# SUNWdcopy
+#
+d none kernel 0755 root sys
+d none kernel/misc 0755 root sys
+f none kernel/misc/dcopy 0755 root sys
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/pkgdefs/SUNWdcopy/prototype_i386	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,62 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+
+#!search <pathname pathname ...>	# where to find pkg objects
+#!include <filename>			# include another 'prototype' file
+#!default <mode> <owner> <group>	# default used if not specified on entry
+#!<param>=<value>			# puts parameter in pkg environment
+#
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+#
+#
+# List files which are i386 specific here
+#
+# SUNWioat
+#
+d none kernel/misc/amd64 0755 root sys
+f none kernel/misc/amd64/dcopy 0755 root sys
+d none platform 0755 root sys
+d none platform/i86pc 0755 root sys
+d none platform/i86pc/kernel 0755 root sys
+d none platform/i86pc/kernel/drv 0755 root sys
+f none platform/i86pc/kernel/drv/ioat 755 root sys
+f none platform/i86pc/kernel/drv/ioat.conf 644 root sys
+d none platform/i86pc/kernel/drv/amd64 0755 root sys
+f none platform/i86pc/kernel/drv/amd64/ioat 755 root sys
+d none platform/i86xpv 0755 root sys
+d none platform/i86xpv/kernel 0755 root sys
+d none platform/i86xpv/kernel/drv 0755 root sys
+f none platform/i86xpv/kernel/drv/ioat 755 root sys
+f none platform/i86xpv/kernel/drv/ioat.conf 644 root sys
+d none platform/i86xpv/kernel/drv/amd64 0755 root sys
+f none platform/i86xpv/kernel/drv/amd64/ioat 755 root sys
--- a/usr/src/pkgdefs/SUNWhea/prototype_com	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com	Fri May 23 20:14:10 2008 -0700
@@ -1218,6 +1218,7 @@
 f none usr/include/sys/socket_impl.h 644 root bin
 f none usr/include/sys/socketvar.h 644 root bin
 f none usr/include/sys/sockio.h 644 root bin
+f none usr/include/sys/sodirect.h 644 root bin
 f none usr/include/sys/sservice.h 644 root bin
 f none usr/include/sys/squeue.h 644 root bin
 f none usr/include/sys/squeue_impl.h 644 root bin
--- a/usr/src/uts/common/fs/sockfs/socksctp.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/fs/sockfs/socksctp.c	Fri May 23 20:14:10 2008 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -171,6 +171,8 @@
 	so->so_nl7c_uri		= NULL;
 	so->so_nl7c_rcv_mp	= NULL;
 
+	so->so_direct		= NULL;
+
 	vp = vn_alloc(kmflags);
 	if (vp == NULL) {
 		return (-1);
@@ -204,6 +206,8 @@
 	struct sonode *so = &ss->ss_so;
 	struct vnode *vp = SOTOV(so);
 
+	ASSERT(so->so_direct == NULL);
+
 	ASSERT(so->so_nl7c_flags == 0);
 	ASSERT(so->so_nl7c_uri == NULL);
 	ASSERT(so->so_nl7c_rcv_mp == NULL);
--- a/usr/src/uts/common/fs/sockfs/socksdp.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/fs/sockfs/socksdp.c	Fri May 23 20:14:10 2008 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -133,6 +133,8 @@
 	so->so_nl7c_uri		= NULL;
 	so->so_nl7c_rcv_mp	= NULL;
 
+	so->so_direct		= NULL;
+
 	vp = vn_alloc(kmflags);
 	if (vp == NULL) {
 		return (-1);
@@ -159,6 +161,8 @@
 	struct sonode *so = &ss->ss_so;
 	struct vnode *vp = SOTOV(so);
 
+	ASSERT(so->so_direct == NULL);
+
 	ASSERT(so->so_nl7c_flags == 0);
 	ASSERT(so->so_nl7c_uri == NULL);
 	ASSERT(so->so_nl7c_rcv_mp == NULL);
--- a/usr/src/uts/common/fs/sockfs/sockstr.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/fs/sockfs/sockstr.c	Fri May 23 20:14:10 2008 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -69,6 +69,8 @@
 
 #include <c2/audit.h>
 
+#include <sys/dcopy.h>
+
 int so_default_version = SOV_SOCKSTREAM;
 
 #ifdef DEBUG
@@ -119,6 +121,26 @@
 static int tlitosyserr(int terr);
 
 /*
+ * Sodirect kmem_cache and put/wakeup functions.
+ */
+struct kmem_cache *socktpi_sod_cache;
+static int sodput(sodirect_t *, mblk_t *);
+static void sodwakeup(sodirect_t *);
+
+/*
+ * Called by sockinit() when sockfs is loaded.
+ */
+int
+sostr_init()
+{
+	/* Allocate sodirect_t kmem_cache */
+	socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache",
+	    sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+	return (0);
+}
+
+/*
  * Convert a socket to a stream. Invoked when the illusory sockmod
  * is popped from the stream.
  * Change the stream head back to default operation without losing
@@ -468,6 +490,34 @@
 		stp->sd_qn_minpsz = 0;
 	mutex_exit(&stp->sd_lock);
 
+	/*
+	 * If sodirect capable allocate and initialize sodirect_t.
+	 * Note, SS_SODIRECT is set in socktpi_open().
+	 */
+	if (so->so_state & SS_SODIRECT) {
+		sodirect_t	*sodp;
+
+		ASSERT(so->so_direct == NULL);
+
+		sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP);
+		sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT;
+		sodp->sod_want = 0;
+		sodp->sod_q = RD(stp->sd_wrq);
+		sodp->sod_enqueue = sodput;
+		sodp->sod_wakeup = sodwakeup;
+		sodp->sod_uioafh = NULL;
+		sodp->sod_uioaft = NULL;
+		sodp->sod_lock = &stp->sd_lock;
+		/*
+		 * Remainder of the sod_uioa members are left uninitialized
+		 * but will be initialized later by uioainit() before uioa
+		 * is enabled.
+		 */
+		sodp->sod_uioa.uioa_state = UIOA_ALLOC;
+		so->so_direct = sodp;
+		stp->sd_sodirect = sodp;
+	}
+
 	return (0);
 }
 
@@ -2872,3 +2922,121 @@
 	else
 		return (tli_errs[terr]);
 }
+
+/*
+ * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable
+ * transport driver/module with an mblk_t chain.
+ *
+ * Note, we in-line putq() for the fast-path cases of q is empty, q_last and
+ * bp are of type M_DATA. All other cases we call putq().
+ *
+ * On success a zero will be return, else an errno will be returned.
+ */
+int
+sodput(sodirect_t *sodp, mblk_t *bp)
+{
+	queue_t		*q = sodp->sod_q;
+	struct stdata	*stp = (struct stdata *)q->q_ptr;
+	mblk_t		*nbp;
+	int		ret;
+	mblk_t		*last = q->q_last;
+	int		bytecnt = 0;
+	int		mblkcnt = 0;
+
+
+	ASSERT(MUTEX_HELD(sodp->sod_lock));
+
+	if (stp->sd_flag == STREOF) {
+		ret = 0;
+		goto error;
+	}
+
+	if (q->q_first == NULL) {
+		/* Q empty, really fast fast-path */
+		bp->b_prev = NULL;
+		bp->b_next = NULL;
+		q->q_first = bp;
+		q->q_last = bp;
+
+	} else if (last->b_datap->db_type == M_DATA &&
+	    bp->b_datap->db_type == M_DATA) {
+		/*
+		 * Last mblk_t chain and bp are both type M_DATA so
+		 * in-line putq() here, if the DBLK_UIOA state match
+		 * add bp to the end of the current last chain, else
+		 * start a new last chain with bp.
+		 */
+		if ((last->b_datap->db_flags & DBLK_UIOA) ==
+		    (bp->b_datap->db_flags & DBLK_UIOA)) {
+			/* Added to end */
+			while ((nbp = last->b_cont) != NULL)
+				last = nbp;
+			last->b_cont = bp;
+		} else {
+			/* New last */
+			last->b_next = bp;
+			bp->b_next = NULL;
+			bp->b_prev = last;
+			q->q_last = bp;
+		}
+	} else {
+		/*
+		 * Can't use q_last so just call putq().
+		 */
+		(void) putq(q, bp);
+		return (0);
+	}
+
+	/* Count bytes and mblk_t's */
+	do {
+		bytecnt += MBLKL(bp);
+		mblkcnt++;
+	} while ((bp = bp->b_cont) != NULL);
+	q->q_count += bytecnt;
+	q->q_mblkcnt += mblkcnt;
+
+	/* Check for QFULL */
+	if (q->q_count >= q->q_hiwat + sodp->sod_want ||
+	    q->q_mblkcnt >= q->q_hiwat) {
+		q->q_flag |= QFULL;
+	}
+
+	return (0);
+
+error:
+	do {
+		if ((nbp = bp->b_next) != NULL)
+			bp->b_next = NULL;
+		freemsg(bp);
+	} while ((bp = nbp) != NULL);
+
+	return (ret);
+}
+
+/*
+ * Sockfs sodirect read wakeup. Called from a sodirect enabled transport
+ * driver/module to indicate that read-side data is available.
+ *
+ * On return the sodirect_t.lock mutex will be exited so this must be the
+ * last sodirect_t call to guarantee atomic access of *sodp.
+ */
+void
+sodwakeup(sodirect_t *sodp)
+{
+	queue_t		*q = sodp->sod_q;
+	struct stdata	*stp = (struct stdata *)q->q_ptr;
+
+	ASSERT(MUTEX_HELD(sodp->sod_lock));
+
+	if (stp->sd_flag & RSLEEP) {
+		stp->sd_flag &= ~RSLEEP;
+		cv_broadcast(&q->q_wait);
+	}
+
+	if (stp->sd_rput_opt & SR_POLLIN) {
+		stp->sd_rput_opt &= ~SR_POLLIN;
+		mutex_exit(sodp->sod_lock);
+		pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM);
+	} else
+		mutex_exit(sodp->sod_lock);
+}
--- a/usr/src/uts/common/fs/sockfs/socksubr.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/fs/sockfs/socksubr.c	Fri May 23 20:14:10 2008 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -44,6 +44,7 @@
 #include <sys/file.h>
 #include <sys/open.h>
 #include <sys/user.h>
+#include <sys/uio.h>
 #include <sys/termios.h>
 #include <sys/stream.h>
 #include <sys/strsubr.h>
@@ -90,6 +91,7 @@
 #define	SO_LOCK_WAKEUP_TIME	3000	/* Wakeup time in milliseconds */
 
 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache;
+struct kmem_cache *socktpi_sod_cache;
 
 dev_t sockdev;	/* For fsid in getattr */
 
@@ -105,6 +107,8 @@
 
 extern void nl7c_init(void);
 
+extern int sostr_init();
+
 #define	ADRSTRLEN (2 * sizeof (void *) + 1)
 /*
  * kernel structure for passing the sockinfo data back up to the user.
@@ -523,6 +527,15 @@
 		so->so_nl7c_flags = 0;
 	}
 
+	if (so->so_direct != NULL) {
+		sodirect_t *sodp = so->so_direct;
+
+		ASSERT(sodp->sod_uioafh == NULL);
+
+		so->so_direct = NULL;
+		kmem_cache_free(socktpi_sod_cache, sodp);
+	}
+
 	ASSERT(so->so_ux_bound_vp == NULL);
 	if ((mp = so->so_unbind_mp) != NULL) {
 		freemsg(mp);
@@ -567,6 +580,8 @@
 	struct sonode *so = buf;
 	struct vnode *vp;
 
+	so->so_direct		= NULL;
+
 	so->so_nl7c_flags	= 0;
 	so->so_nl7c_uri		= NULL;
 	so->so_nl7c_rcv_mp	= NULL;
@@ -606,6 +621,8 @@
 	struct sonode *so = buf;
 	struct vnode *vp = SOTOV(so);
 
+	ASSERT(so->so_direct == NULL);
+
 	ASSERT(so->so_nl7c_flags == 0);
 	ASSERT(so->so_nl7c_uri == NULL);
 	ASSERT(so->so_nl7c_rcv_mp == NULL);
@@ -713,6 +730,12 @@
 		goto failure;
 	}
 
+	error = sostr_init();
+	if (error != 0) {
+		err_str = NULL;
+		goto failure;
+	}
+
 	/*
 	 * Create sonode caches.  We create a special one for AF_UNIX so
 	 * that we can track them for netstat(1m).
--- a/usr/src/uts/common/fs/sockfs/socktpi.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/fs/sockfs/socktpi.c	Fri May 23 20:14:10 2008 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -58,6 +58,7 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
+#include <sys/sodirect.h>
 #include <netinet/in.h>
 #include <sys/un.h>
 #include <sys/strsun.h>
@@ -186,6 +187,9 @@
 
 static int	sotpi_unbind(struct sonode *, int);
 
+extern int	sodput(sodirect_t *, mblk_t *);
+extern void	sodwakeup(sodirect_t *);
+
 /* TPI sockfs sonode operations */
 static int	sotpi_accept(struct sonode *, int, struct sonode **);
 static int	sotpi_bind(struct sonode *, struct sockaddr *, socklen_t,
@@ -2910,11 +2914,13 @@
 	t_uscalar_t		namelen;
 	int			so_state = so->so_state; /* Snapshot */
 	ssize_t			saved_resid;
-	int			error;
 	rval_t			rval;
 	int			flags;
 	clock_t			timout;
 	int			first;
+	int			error = 0;
+	struct uio		*suiop = NULL;
+	sodirect_t		*sodp = so->so_direct;
 
 	flags = msg->msg_flags;
 	msg->msg_flags = 0;
@@ -3062,6 +3068,53 @@
 	opflag = pflag;
 	first = 1;
 
+	if (uiop->uio_resid >= uioasync.mincnt &&
+	    sodp != NULL && (sodp->sod_state & SOD_ENABLED) &&
+	    uioasync.enabled && !(flags & MSG_PEEK) &&
+	    !(so_state & SS_CANTRCVMORE)) {
+		/*
+		 * Big enough I/O for uioa min setup and an sodirect socket
+		 * and sodirect enabled and uioa enabled and I/O will be done
+		 * and not EOF so initialize the sodirect_t uioa_t with "uiop".
+		 */
+		mutex_enter(sodp->sod_lock);
+		if (!uioainit(uiop, &sodp->sod_uioa)) {
+			/*
+			 * Successful uioainit() so the uio_t part of the
+			 * uioa_t will be used for all uio_t work to follow,
+			 * we save the original "uiop" in "suiop".
+			 */
+			suiop = uiop;
+			uiop = (uio_t *)&sodp->sod_uioa;
+			/*
+			 * Before returning to the caller the passed in uio_t
+			 * "uiop" will be updated via a call to uioafini()
+			 * below.
+			 *
+			 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED
+			 * here as first we have to uioamove() any currently
+			 * queued M_DATA mblk_t(s) so it will be done in
+			 * kstrgetmsg().
+			 */
+		}
+		/*
+		 * In either uioainit() success or not case note the number
+		 * of uio bytes the caller wants for sod framework and/or
+		 * transport (e.g. TCP) strategy.
+		 */
+		sodp->sod_want = uiop->uio_resid;
+		mutex_exit(sodp->sod_lock);
+	} else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) {
+		/*
+		 * No uioa but still using sodirect so note the number of
+		 * uio bytes the caller wants for sodirect framework and/or
+		 * transport (e.g. TCP) strategy.
+		 *
+		 * Note, sod_lock not held, only writer is in this function
+		 * and only one thread at a time so not needed just to init.
+		 */
+		sodp->sod_want = uiop->uio_resid;
+	}
 retry:
 	saved_resid = uiop->uio_resid;
 	pri = 0;
@@ -3091,10 +3144,7 @@
 			eprintsoline(so, error);
 			break;
 		}
-		mutex_enter(&so->so_lock);
-		so_unlock_read(so);	/* Clear SOREADLOCKED */
-		mutex_exit(&so->so_lock);
-		return (error);
+		goto out;
 	}
 	/*
 	 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
@@ -3137,9 +3187,7 @@
 			pflag = opflag | MSG_NOMARK;
 			goto retry;
 		}
-		so_unlock_read(so);	/* Clear SOREADLOCKED */
-		mutex_exit(&so->so_lock);
-		return (0);
+		goto out_locked;
 	}
 
 	/* strsock_proto has already verified length and alignment */
@@ -3179,9 +3227,7 @@
 			pflag = opflag | MSG_NOMARK;
 			goto retry;
 		}
-		so_unlock_read(so);	/* Clear SOREADLOCKED */
-		mutex_exit(&so->so_lock);
-		return (0);
+		goto out_locked;
 	}
 	case T_UNITDATA_IND: {
 		void *addr;
@@ -3207,7 +3253,7 @@
 				freemsg(mp);
 				error = EPROTO;
 				eprintsoline(so, error);
-				goto err;
+				goto out;
 			}
 			if (so->so_family == AF_UNIX) {
 				/*
@@ -3236,7 +3282,7 @@
 				freemsg(mp);
 				error = EPROTO;
 				eprintsoline(so, error);
-				goto err;
+				goto out;
 			}
 			if (so->so_family == AF_UNIX)
 				so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
@@ -3283,17 +3329,14 @@
 					    msg->msg_namelen);
 				kmem_free(control, controllen);
 				eprintsoline(so, error);
-				goto err;
+				goto out;
 			}
 			msg->msg_control = control;
 			msg->msg_controllen = controllen;
 		}
 
 		freemsg(mp);
-		mutex_enter(&so->so_lock);
-		so_unlock_read(so);	/* Clear SOREADLOCKED */
-		mutex_exit(&so->so_lock);
-		return (0);
+		goto out;
 	}
 	case T_OPTDATA_IND: {
 		struct T_optdata_req *tdr;
@@ -3322,7 +3365,7 @@
 				freemsg(mp);
 				error = EPROTO;
 				eprintsoline(so, error);
-				goto err;
+				goto out;
 			}
 
 			ncontrollen = so_cmsglen(mp, opt, optlen,
@@ -3350,7 +3393,7 @@
 				freemsg(mp);
 				kmem_free(control, controllen);
 				eprintsoline(so, error);
-				goto err;
+				goto out;
 			}
 			msg->msg_control = control;
 			msg->msg_controllen = controllen;
@@ -3382,9 +3425,7 @@
 			pflag = opflag | MSG_NOMARK;
 			goto retry;
 		}
-		so_unlock_read(so);	/* Clear SOREADLOCKED */
-		mutex_exit(&so->so_lock);
-		return (0);
+		goto out_locked;
 	}
 	case T_EXDATA_IND: {
 		dprintso(so, 1,
@@ -3441,10 +3482,7 @@
 					eprintsoline(so, error);
 				}
 #endif /* SOCK_DEBUG */
-				mutex_enter(&so->so_lock);
-				so_unlock_read(so);	/* Clear SOREADLOCKED */
-				mutex_exit(&so->so_lock);
-				return (error);
+				goto out;
 			}
 			ASSERT(mp);
 			tpr = (union T_primitives *)mp->b_rptr;
@@ -3490,11 +3528,40 @@
 		freemsg(mp);
 		error = EPROTO;
 		eprintsoline(so, error);
-		goto err;
+		goto out;
 	}
 	/* NOTREACHED */
-err:
+out:
 	mutex_enter(&so->so_lock);
+out_locked:
+	if (sodp != NULL) {
+		/* Finish any sodirect and uioa processing */
+		mutex_enter(sodp->sod_lock);
+		if (suiop != NULL) {
+			/* Finish any uioa_t processing */
+			int ret;
+
+			ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
+			ret = uioafini(suiop, (uioa_t *)uiop);
+			if (error == 0 && ret != 0) {
+				/* If no error yet, set it */
+				error = ret;
+			}
+			if ((mp = sodp->sod_uioafh) != NULL) {
+				sodp->sod_uioafh = NULL;
+				sodp->sod_uioaft = NULL;
+				freemsg(mp);
+			}
+		}
+		if (!(sodp->sod_state & SOD_WAKE_NOT)) {
+			/* Awoke */
+			sodp->sod_state &= SOD_WAKE_CLR;
+			sodp->sod_state |= SOD_WAKE_NOT;
+		}
+		/* Last, clear sod_want value */
+		sodp->sod_want = 0;
+		mutex_exit(sodp->sod_lock);
+	}
 	so_unlock_read(so);	/* Clear SOREADLOCKED */
 	mutex_exit(&so->so_lock);
 	return (error);
--- a/usr/src/uts/common/fs/sockfs/sockvnops.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/fs/sockfs/sockvnops.c	Fri May 23 20:14:10 2008 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -239,6 +239,10 @@
 			 * udp case, when some other module is autopushed
 			 * above it, or for some reasons the expected module
 			 * isn't purely D_MP (which is the main requirement).
+			 *
+			 * Else, SS_DIRECT is valid. If the read-side Q has
+			 * _QSODIRECT set then and uioasync is enabled then
+			 * set SS_SODIRECT to enable sodirect.
 			 */
 			if (!socktpi_direct || !(tq->q_flag & _QDIRECT) ||
 			    !(_OTHERQ(tq)->q_flag & _QDIRECT)) {
@@ -255,6 +259,10 @@
 						return (error);
 					}
 				}
+			} else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) &&
+			    uioasync.enabled) {
+				/* Enable sodirect */
+				so->so_state |= SS_SODIRECT;
 			}
 		}
 	} else {
--- a/usr/src/uts/common/inet/tcp.h	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/inet/tcp.h	Fri May 23 20:14:10 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /* Copyright (c) 1990 Mentat Inc. */
@@ -37,6 +37,7 @@
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <sys/socket.h>
+#include <sys/sodirect.h>
 #include <sys/multidata.h>
 #include <sys/md5.h>
 #include <inet/common.h>
@@ -598,6 +599,13 @@
 	 */
 	boolean_t	tcp_flow_stopped;
 
+	/*
+	 * tcp_sodirect is used by tcp on the receive side to push mblk_t(s)
+	 * directly to sockfs. Also, to schedule asynchronous copyout directly
+	 * to a pending user-land uio buffer.
+	 */
+	sodirect_t	*tcp_sodirect;
+
 #ifdef DEBUG
 	pc_t			tcmp_stk[15];
 #endif
--- a/usr/src/uts/common/inet/tcp/tcp.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/inet/tcp/tcp.c	Fri May 23 20:14:10 2008 -0700
@@ -66,6 +66,8 @@
 #include <sys/isa_defs.h>
 #include <sys/md5.h>
 #include <sys/random.h>
+#include <sys/sodirect.h>
+#include <sys/uio.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <netinet/ip6.h>
@@ -216,6 +218,23 @@
  * behaviour. Once tcp_issocket is unset, its never set for the
  * life of that connection.
  *
+ * In support of on-board asynchronous DMA hardware (e.g. Intel I/OAT)
+ * two consoldiation private KAPIs are used to enqueue M_DATA mblk_t's
+ * directly to the socket (sodirect) and start an asynchronous copyout
+ * to a user-land receive-side buffer (uioa) when a blocking socket read
+ * (e.g. read, recv, ...) is pending.
+ *
+ * This is accomplished when tcp_issocket is set and tcp_sodirect is not
+ * NULL so points to an sodirect_t and if marked enabled then we enqueue
+ * all mblk_t's directly to the socket.
+ *
+ * Further, if the sodirect_t sod_uioa and if marked enabled (due to a
+ * blocking socket read, e.g. user-land read, recv, ...) then an asynchronous
+ * copyout will be started directly to the user-land uio buffer. Also, as we
+ * have a pending read, TCP's push logic can take into account the number of
+ * bytes to be received and only awake the blocked read()er when the uioa_t
+ * byte count has been satisfied.
+ *
  * IPsec notes :
  *
  * Since a packet is always executed on the correct TCP perimeter
@@ -246,6 +265,37 @@
 squeue_func_t tcp_squeue_wput_proc;
 
 /*
+ * Macros for sodirect:
+ *
+ * SOD_PTR_ENTER(tcp, sodp) - for the tcp_t pointer "tcp" set the
+ * sodirect_t pointer "sodp" to the socket/tcp shared sodirect_t
+ * if it exists and is enabled, else to NULL. Note, in the current
+ * sodirect implementation the sod_lock must not be held across any
+ * STREAMS call (e.g. putnext) else a "recursive mutex_enter" PANIC
+ * will result as sod_lock is the streamhead stdata.sd_lock.
+ *
+ * SOD_NOT_ENABLED(tcp) - return true if not a sodirect tcp_t or the
+ * sodirect_t isn't enabled, usefull for ASSERT()ing that a recieve
+ * side tcp code path dealing with a tcp_rcv_list or putnext() isn't
+ * being used when sodirect code paths should be.
+ */
+
+#define	SOD_PTR_ENTER(tcp, sodp)					\
+	(sodp) = (tcp)->tcp_sodirect;					\
+									\
+	if ((sodp) != NULL) {						\
+		mutex_enter((sodp)->sod_lock);				\
+		if (!((sodp)->sod_state & SOD_ENABLED)) {		\
+			mutex_exit((sodp)->sod_lock);			\
+			(sodp) = NULL;					\
+		}							\
+	}
+
+#define	SOD_NOT_ENABLED(tcp)						\
+	((tcp)->tcp_sodirect == NULL ||					\
+	    !((tcp)->tcp_sodirect->sod_state & SOD_ENABLED))
+
+/*
  * This controls how tiny a write must be before we try to copy it
  * into the the mblk on the tail of the transmit queue.  Not much
  * speedup is observed for values larger than sixteen.  Zero will
@@ -3808,6 +3858,7 @@
 	mblk_t	*mp;
 	queue_t	*q;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	sodirect_t	*sodp;
 
 	TCP_CLD_STAT(tag);
 
@@ -3872,6 +3923,13 @@
 		return (-1);
 	}
 
+	/* If sodirect, not anymore */
+	SOD_PTR_ENTER(tcp, sodp);
+	if (sodp != NULL) {
+		tcp->tcp_sodirect = NULL;
+		mutex_exit(sodp->sod_lock);
+	}
+
 	q = tcp->tcp_rq;
 
 	/* Trash all inbound data */
@@ -4236,6 +4294,11 @@
 		 */
 		/* FALLTHRU */
 	default:
+		if (tcp->tcp_sodirect != NULL) {
+			/* Ok, no more sodirect */
+			tcp->tcp_sodirect = NULL;
+		}
+
 		if (tcp->tcp_fused)
 			tcp_unfuse(tcp);
 
@@ -6381,6 +6444,15 @@
 			*(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport;
 	}
 
+	if (tcp->tcp_issocket) {
+		/*
+		 * TCP is _D_SODIRECT and sockfs is directly above so save
+		 * the shared sonode sodirect_t pointer (if any) to enable
+		 * TCP sodirect.
+		 */
+		tcp->tcp_sodirect = SOD_QTOSODP(tcp->tcp_rq);
+	}
+
 	switch (tcp->tcp_state) {
 	case TCPS_IDLE:
 		/*
@@ -8190,6 +8262,9 @@
 	ASSERT(!tcp->tcp_kssl_pending);
 	PRESERVE(tcp->tcp_kssl_ent);
 
+	/* Sodirect */
+	tcp->tcp_sodirect = NULL;
+
 	tcp->tcp_closemp_used = B_FALSE;
 
 #ifdef DEBUG
@@ -8282,6 +8357,9 @@
 	tcp->tcp_fuse_rcv_unread_hiwater = 0;
 	tcp->tcp_fuse_rcv_unread_cnt = 0;
 
+	/* Sodirect */
+	tcp->tcp_sodirect = NULL;
+
 	/* Initialize the header template */
 	if (tcp->tcp_ipversion == IPV4_VERSION) {
 		err = tcp_header_init_ipv4(tcp);
@@ -11680,6 +11758,9 @@
 	if (tcp->tcp_listener != NULL)
 		return (ret);
 
+	/* Can't be sodirect enabled */
+	ASSERT(SOD_NOT_ENABLED(tcp));
+
 	/*
 	 * Handle two cases here: we are currently fused or we were
 	 * previously fused and have some urgent data to be delivered
@@ -11779,6 +11860,216 @@
 }
 
 /*
+ * The tcp_rcv_sod_XXX() functions enqueue data directly to the socket
+ * above, in addition when uioa is enabled schedule an asynchronous uio
+ * prior to enqueuing. They implement the combinhed semantics of the
+ * tcp_rcv_XXX() functions, tcp_rcv_list push logic, and STREAMS putnext()
+ * canputnext(), i.e. flow-control with backenable.
+ *
+ * tcp_sod_wakeup() is called where tcp_rcv_drain() would be called in the
+ * non sodirect connection but as there are no tcp_tcv_list mblk_t's we deal
+ * with the rcv_wnd and push timer and call the sodirect wakeup function.
+ *
+ * Must be called with sodp->sod_lock held and will return with the lock
+ * released.
+ */
+static uint_t
+tcp_rcv_sod_wakeup(tcp_t *tcp, sodirect_t *sodp)
+{
+	queue_t		*q = tcp->tcp_rq;
+	uint_t		thwin;
+	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	uint_t		ret = 0;
+
+	/* Can't be an eager connection */
+	ASSERT(tcp->tcp_listener == NULL);
+
+	/* Caller must have lock held */
+	ASSERT(MUTEX_HELD(sodp->sod_lock));
+
+	/* Sodirect mode so must not be a tcp_rcv_list */
+	ASSERT(tcp->tcp_rcv_list == NULL);
+
+	if (SOD_QFULL(sodp)) {
+		/* Q is full, mark Q for need backenable */
+		SOD_QSETBE(sodp);
+	}
+	/* Last advertised rwnd, i.e. rwnd last sent in a packet */
+	thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
+	    << tcp->tcp_rcv_ws;
+	/* This is peer's calculated send window (our available rwnd). */
+	thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
+	/*
+	 * Increase the receive window to max.  But we need to do receiver
+	 * SWS avoidance.  This means that we need to check the increase of
+	 * of receive window is at least 1 MSS.
+	 */
+	if (!SOD_QFULL(sodp) && (q->q_hiwat - thwin >= tcp->tcp_mss)) {
+		/*
+		 * If the window that the other side knows is less than max
+		 * deferred acks segments, send an update immediately.
+		 */
+		if (thwin < tcp->tcp_rack_cur_max * tcp->tcp_mss) {
+			BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate);
+			ret = TH_ACK_NEEDED;
+		}
+		tcp->tcp_rwnd = q->q_hiwat;
+	}
+
+	if (!SOD_QEMPTY(sodp)) {
+		/* Wakeup to socket */
+		sodp->sod_state &= SOD_WAKE_CLR;
+		sodp->sod_state |= SOD_WAKE_DONE;
+		(sodp->sod_wakeup)(sodp);
+		/* wakeup() does the mutex_ext() */
+	} else {
+		/* Q is empty, no need to wake */
+		sodp->sod_state &= SOD_WAKE_CLR;
+		sodp->sod_state |= SOD_WAKE_NOT;
+		mutex_exit(sodp->sod_lock);
+	}
+
+	/* No need for the push timer now. */
+	if (tcp->tcp_push_tid != 0) {
+		(void) TCP_TIMER_CANCEL(tcp, tcp->tcp_push_tid);
+		tcp->tcp_push_tid = 0;
+	}
+
+	return (ret);
+}
+
+/*
+ * Called where tcp_rcv_enqueue()/putnext(RD(q)) would be. For M_DATA
+ * mblk_t's if uioa enabled then start a uioa asynchronous copy directly
+ * to the user-land buffer and flag the mblk_t as such.
+ *
+ * Also, handle tcp_rwnd.
+ */
+uint_t
+tcp_rcv_sod_enqueue(tcp_t *tcp, sodirect_t *sodp, mblk_t *mp, uint_t seg_len)
+{
+	uioa_t		*uioap = &sodp->sod_uioa;
+	boolean_t	qfull;
+	uint_t		thwin;
+
+	/* Can't be an eager connection */
+	ASSERT(tcp->tcp_listener == NULL);
+
+	/* Caller must have lock held */
+	ASSERT(MUTEX_HELD(sodp->sod_lock));
+
+	/* Sodirect mode so must not be a tcp_rcv_list */
+	ASSERT(tcp->tcp_rcv_list == NULL);
+
+	/* Passed in segment length must be equal to mblk_t chain data size */
+	ASSERT(seg_len == msgdsize(mp));
+
+	if (DB_TYPE(mp) != M_DATA) {
+		/* Only process M_DATA mblk_t's */
+		goto enq;
+	}
+	if (uioap->uioa_state & UIOA_ENABLED) {
+		/* Uioa is enabled */
+		mblk_t		*mp1 = mp;
+
+		if (seg_len > uioap->uio_resid) {
+			/*
+			 * There isn't enough uio space for the mblk_t chain
+			 * so disable uioa such that this and any additional
+			 * mblk_t data is handled by the socket and schedule
+			 * the socket for wakeup to finish this uioa.
+			 */
+			uioap->uioa_state &= UIOA_CLR;
+			uioap->uioa_state |= UIOA_FINI;
+			if (sodp->sod_state & SOD_WAKE_NOT) {
+				sodp->sod_state &= SOD_WAKE_CLR;
+				sodp->sod_state |= SOD_WAKE_NEED;
+			}
+			goto enq;
+		}
+		do {
+			uint32_t	len = MBLKL(mp1);
+
+			if (!uioamove(mp1->b_rptr, len, UIO_READ, uioap)) {
+				/* Scheduled, mark dblk_t as such */
+				DB_FLAGS(mp1) |= DBLK_UIOA;
+			} else {
+				/* Error, turn off async processing */
+				uioap->uioa_state &= UIOA_CLR;
+				uioap->uioa_state |= UIOA_FINI;
+				break;
+			}
+		} while ((mp1 = mp1->b_cont) != NULL);
+
+		if (mp1 != NULL || uioap->uio_resid == 0) {
+			/*
+			 * Not all mblk_t(s) uioamoved (error) or all uio
+			 * space has been consumed so schedule the socket
+			 * for wakeup to finish this uio.
+			 */
+			sodp->sod_state &= SOD_WAKE_CLR;
+			sodp->sod_state |= SOD_WAKE_NEED;
+		}
+	} else if (uioap->uioa_state & UIOA_FINI) {
+		/*
+		 * Post UIO_ENABLED waiting for socket to finish processing
+		 * so just enqueue and update tcp_rwnd.
+		 */
+		if (SOD_QFULL(sodp))
+			tcp->tcp_rwnd -= seg_len;
+	} else if (sodp->sod_want > 0) {
+		/*
+		 * Uioa isn't enabled but sodirect has a pending read().
+		 */
+		if (SOD_QCNT(sodp) + seg_len >= sodp->sod_want) {
+			if (sodp->sod_state & SOD_WAKE_NOT) {
+				/* Schedule socket for wakeup */
+				sodp->sod_state &= SOD_WAKE_CLR;
+				sodp->sod_state |= SOD_WAKE_NEED;
+			}
+			tcp->tcp_rwnd -= seg_len;
+		}
+	} else if (SOD_QCNT(sodp) + seg_len >= tcp->tcp_rq->q_hiwat >> 3) {
+		/*
+		 * No pending sodirect read() so used the default
+		 * TCP push logic to guess that a push is needed.
+		 */
+		if (sodp->sod_state & SOD_WAKE_NOT) {
+			/* Schedule socket for wakeup */
+			sodp->sod_state &= SOD_WAKE_CLR;
+			sodp->sod_state |= SOD_WAKE_NEED;
+		}
+		tcp->tcp_rwnd -= seg_len;
+	} else {
+		/* Just update tcp_rwnd */
+		tcp->tcp_rwnd -= seg_len;
+	}
+enq:
+	qfull = SOD_QFULL(sodp);
+
+	(sodp->sod_enqueue)(sodp, mp);
+
+	if (! qfull && SOD_QFULL(sodp)) {
+		/* Wasn't QFULL, now QFULL, need back-enable */
+		SOD_QSETBE(sodp);
+	}
+
+	/*
+	 * Check to see if remote avail swnd < mss due to delayed ACK,
+	 * first get advertised rwnd.
+	 */
+	thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win));
+	/* Minus delayed ACK count */
+	thwin -= tcp->tcp_rnxt - tcp->tcp_rack;
+	if (thwin < tcp->tcp_mss) {
+		/* Remote avail swnd < mss, need ACK now */
+		return (TH_ACK_NEEDED);
+	}
+
+	return (0);
+}
+
+/*
  * DEFAULT TCP ENTRY POINT via squeue on READ side.
  *
  * This is the default entry function into TCP on the read side. TCP is
@@ -14976,13 +15267,39 @@
 			tcp_rcv_enqueue(tcp, mp, seg_len);
 		}
 	} else {
+		sodirect_t	*sodp = tcp->tcp_sodirect;
+
+		/*
+		 * If an sodirect connection and an enabled sodirect_t then
+		 * sodp will be set to point to the tcp_t/sonode_t shared
+		 * sodirect_t and the sodirect_t's lock will be held.
+		 */
+		if (sodp != NULL) {
+			mutex_enter(sodp->sod_lock);
+			if (!(sodp->sod_state & SOD_ENABLED)) {
+				mutex_exit(sodp->sod_lock);
+				sodp = NULL;
+			} else if (tcp->tcp_kssl_ctx != NULL &&
+			    DB_TYPE(mp) == M_DATA) {
+				mutex_exit(sodp->sod_lock);
+				sodp = NULL;
+			}
+		}
 		if (mp->b_datap->db_type != M_DATA ||
 		    (flags & TH_MARKNEXT_NEEDED)) {
-			if (tcp->tcp_rcv_list != NULL) {
+			if (sodp != NULL) {
+				if (!SOD_QEMPTY(sodp) &&
+				    (sodp->sod_state & SOD_WAKE_NOT)) {
+					flags |= tcp_rcv_sod_wakeup(tcp, sodp);
+					/* sod_wakeup() did the mutex_exit() */
+					mutex_enter(sodp->sod_lock);
+				}
+			} else if (tcp->tcp_rcv_list != NULL) {
 				flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
 			}
 			ASSERT(tcp->tcp_rcv_list == NULL ||
 			    tcp->tcp_fused_sigurg);
+
 			if (flags & TH_MARKNEXT_NEEDED) {
 #ifdef DEBUG
 				(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
@@ -15001,10 +15318,42 @@
 				    mblk_t *, mp);
 				tcp_kssl_input(tcp, mp);
 			} else {
+				if (sodp) {
+					/*
+					 * Done with sodirect, use putnext
+					 * to push this non M_DATA headed
+					 * mblk_t chain.
+					 */
+					mutex_exit(sodp->sod_lock);
+				}
 				putnext(tcp->tcp_rq, mp);
 				if (!canputnext(tcp->tcp_rq))
 					tcp->tcp_rwnd -= seg_len;
 			}
+		} else if ((tcp->tcp_kssl_ctx != NULL) &&
+		    (DB_TYPE(mp) == M_DATA)) {
+			/* Do SSL processing first */
+			DTRACE_PROBE1(kssl_mblk__ksslinput_data2,
+			    mblk_t *, mp);
+			tcp_kssl_input(tcp, mp);
+		} else if (sodp != NULL) {
+			/*
+			 * Sodirect so all mblk_t's are queued on the
+			 * socket directly, check for wakeup of blocked
+			 * reader (if any), and last if flow-controled.
+			 */
+			flags |= tcp_rcv_sod_enqueue(tcp, sodp, mp, seg_len);
+			if ((sodp->sod_state & SOD_WAKE_NEED) ||
+			    (flags & (TH_PUSH|TH_FIN))) {
+				flags |= tcp_rcv_sod_wakeup(tcp, sodp);
+				/* sod_wakeup() did the mutex_exit() */
+			} else {
+				if (SOD_QFULL(sodp)) {
+					/* Q is full, need backenable */
+					SOD_QSETBE(sodp);
+				}
+				mutex_exit(sodp->sod_lock);
+			}
 		} else if ((flags & (TH_PUSH|TH_FIN)) ||
 		    tcp->tcp_rcv_cnt + seg_len >= tcp->tcp_rq->q_hiwat >> 3) {
 			if (tcp->tcp_rcv_list != NULL) {
@@ -15024,41 +15373,33 @@
 				tcp_rcv_enqueue(tcp, mp, seg_len);
 				flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
 			} else {
-				/* Does this need SSL processing first? */
-				if ((tcp->tcp_kssl_ctx != NULL) &&
-				    (DB_TYPE(mp) == M_DATA)) {
-					DTRACE_PROBE1(
-					    kssl_mblk__ksslinput_data2,
-					    mblk_t *, mp);
-					tcp_kssl_input(tcp, mp);
-				} else {
-					putnext(tcp->tcp_rq, mp);
-					if (!canputnext(tcp->tcp_rq))
-						tcp->tcp_rwnd -= seg_len;
-				}
+				putnext(tcp->tcp_rq, mp);
+				if (!canputnext(tcp->tcp_rq))
+					tcp->tcp_rwnd -= seg_len;
 			}
 		} else {
 			/*
 			 * Enqueue all packets when processing an mblk
 			 * from the co queue and also enqueue normal packets.
-			 * For packets which belong to SSL stream do SSL
-			 * processing first.
-			 */
-			if ((tcp->tcp_kssl_ctx != NULL) &&
-			    (DB_TYPE(mp) == M_DATA)) {
-				DTRACE_PROBE1(kssl_mblk__tcpksslin3,
-				    mblk_t *, mp);
-				tcp_kssl_input(tcp, mp);
-			} else {
-				tcp_rcv_enqueue(tcp, mp, seg_len);
-			}
+			 */
+			tcp_rcv_enqueue(tcp, mp, seg_len);
 		}
 		/*
 		 * Make sure the timer is running if we have data waiting
 		 * for a push bit. This provides resiliency against
 		 * implementations that do not correctly generate push bits.
-		 */
-		if (tcp->tcp_rcv_list != NULL && tcp->tcp_push_tid == 0) {
+		 *
+		 * Note, for sodirect if Q isn't empty and there's not a
+		 * pending wakeup then we need a timer. Also note that sodp
+		 * is assumed to be still valid after exit()ing the sod_lock
+		 * above and while the SOD state can change it can only change
+		 * such that the Q is empty now even though data was added
+		 * above.
+		 */
+		if (((sodp != NULL && !SOD_QEMPTY(sodp) &&
+		    (sodp->sod_state & SOD_WAKE_NOT)) ||
+		    (sodp == NULL && tcp->tcp_rcv_list != NULL)) &&
+		    tcp->tcp_push_tid == 0) {
 			/*
 			 * The connection may be closed at this point, so don't
 			 * do anything for a detached tcp.
@@ -15070,6 +15411,7 @@
 				    tcps->tcps_push_timer_interval));
 		}
 	}
+
 xmit_check:
 	/* Is there anything left to do? */
 	ASSERT(!(flags & TH_MARKNEXT_NEEDED));
@@ -15145,13 +15487,26 @@
 		/*
 		 * Send up any queued data and then send the mark message
 		 */
-		if (tcp->tcp_rcv_list != NULL) {
-			flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
-		}
-		ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
+		sodirect_t *sodp;
+
+		SOD_PTR_ENTER(tcp, sodp);
 
 		mp1 = tcp->tcp_urp_mark_mp;
 		tcp->tcp_urp_mark_mp = NULL;
+		if (sodp != NULL) {
+
+			ASSERT(tcp->tcp_rcv_list == NULL);
+
+			flags |= tcp_rcv_sod_wakeup(tcp, sodp);
+			/* sod_wakeup() does the mutex_exit() */
+		} else if (tcp->tcp_rcv_list != NULL) {
+			flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
+
+			ASSERT(tcp->tcp_rcv_list == NULL ||
+			    tcp->tcp_fused_sigurg);
+
+		}
+		putnext(tcp->tcp_rq, mp1);
 #ifdef DEBUG
 		(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE,
 		    "tcp_rput: sending zero-length %s %s",
@@ -15159,7 +15514,6 @@
 		    "MSGNOTMARKNEXT"),
 		    tcp_display(tcp, NULL, DISP_PORT_ONLY));
 #endif /* DEBUG */
-		putnext(tcp->tcp_rq, mp1);
 		flags &= ~TH_SEND_URP_MARK;
 	}
 	if (flags & TH_ACK_NEEDED) {
@@ -15197,14 +15551,32 @@
 		 * In the eager case tcp_rsrv will do this when run
 		 * after tcp_accept is done.
 		 */
+		sodirect_t *sodp;
+
 		ASSERT(tcp->tcp_listener == NULL);
-		if (tcp->tcp_rcv_list != NULL) {
+
+		SOD_PTR_ENTER(tcp, sodp);
+		if (sodp != NULL) {
+			/* No more sodirect */
+			tcp->tcp_sodirect = NULL;
+			if (!SOD_QEMPTY(sodp)) {
+				/* Mblk(s) to process, notify */
+				flags |= tcp_rcv_sod_wakeup(tcp, sodp);
+				/* sod_wakeup() does the mutex_exit() */
+			} else {
+				/* Nothing to process */
+				mutex_exit(sodp->sod_lock);
+			}
+		} else if (tcp->tcp_rcv_list != NULL) {
 			/*
 			 * Push any mblk(s) enqueued from co processing.
 			 */
 			flags |= tcp_rcv_drain(tcp->tcp_rq, tcp);
-		}
-		ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
+
+			ASSERT(tcp->tcp_rcv_list == NULL ||
+			    tcp->tcp_fused_sigurg);
+		}
+
 		if ((mp1 = mi_tpi_ordrel_ind()) != NULL) {
 			tcp->tcp_ordrel_done = B_TRUE;
 			putnext(tcp->tcp_rq, mp1);
@@ -15974,6 +16346,8 @@
 	queue_t	*q = tcp->tcp_rq;
 	uint_t	thwin;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	sodirect_t	*sodp;
+	boolean_t	fc;
 
 	freeb(mp);
 
@@ -16024,7 +16398,27 @@
 		return;
 	}
 
-	if (canputnext(q)) {
+	SOD_PTR_ENTER(tcp, sodp);
+	if (sodp != NULL) {
+		/* An sodirect connection */
+		if (SOD_QFULL(sodp)) {
+			/* Flow-controlled, need another back-enable */
+			fc = B_TRUE;
+			SOD_QSETBE(sodp);
+		} else {
+			/* Not flow-controlled */
+			fc = B_FALSE;
+		}
+		mutex_exit(sodp->sod_lock);
+	} else if (canputnext(q)) {
+		/* STREAMS, not flow-controlled */
+		fc = B_FALSE;
+	} else {
+		/* STREAMS, flow-controlled */
+		fc = B_TRUE;
+	}
+	if (!fc) {
+		/* Not flow-controlled, open rwnd */
 		tcp->tcp_rwnd = q->q_hiwat;
 		thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
 		    << tcp->tcp_rcv_ws;
@@ -16043,13 +16437,32 @@
 			BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate);
 		}
 	}
+
 	/* Handle a failure to allocate a T_ORDREL_IND here */
 	if (tcp->tcp_fin_rcvd && !tcp->tcp_ordrel_done) {
 		ASSERT(tcp->tcp_listener == NULL);
-		if (tcp->tcp_rcv_list != NULL) {
-			(void) tcp_rcv_drain(q, tcp);
-		}
-		ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
+
+		SOD_PTR_ENTER(tcp, sodp);
+		if (sodp != NULL) {
+			/* No more sodirect */
+			tcp->tcp_sodirect = NULL;
+			if (!SOD_QEMPTY(sodp)) {
+				/* Notify mblk(s) to process */
+				(void) tcp_rcv_sod_wakeup(tcp, sodp);
+				/* sod_wakeup() does the mutex_exit() */
+			} else {
+				/* Nothing to process */
+				mutex_exit(sodp->sod_lock);
+			}
+		} else if (tcp->tcp_rcv_list != NULL) {
+			/*
+			 * Push any mblk(s) enqueued from co processing.
+			 */
+			(void) tcp_rcv_drain(tcp->tcp_rq, tcp);
+			ASSERT(tcp->tcp_rcv_list == NULL ||
+			    tcp->tcp_fused_sigurg);
+		}
+
 		mp = mi_tpi_ordrel_ind();
 		if (mp) {
 			tcp->tcp_ordrel_done = B_TRUE;
@@ -18097,6 +18510,8 @@
 	 */
 	if (tcp->tcp_rcv_list != NULL) {
 		/* We drain directly in case of fused tcp loopback */
+		sodirect_t *sodp;
+
 		if (!tcp->tcp_fused && canputnext(q)) {
 			tcp->tcp_rwnd = q->q_hiwat;
 			thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win))
@@ -18112,7 +18527,26 @@
 			}
 
 		}
-		(void) tcp_rcv_drain(q, tcp);
+
+		SOD_PTR_ENTER(tcp, sodp);
+		if (sodp != NULL) {
+			/* Sodirect, move from rcv_list */
+			ASSERT(!tcp->tcp_fused);
+			while ((mp = tcp->tcp_rcv_list) != NULL) {
+				tcp->tcp_rcv_list = mp->b_next;
+				mp->b_next = NULL;
+				(void) tcp_rcv_sod_enqueue(tcp, sodp, mp,
+				    msgdsize(mp));
+			}
+			tcp->tcp_rcv_last_head = NULL;
+			tcp->tcp_rcv_last_tail = NULL;
+			tcp->tcp_rcv_cnt = 0;
+			(void) tcp_rcv_sod_wakeup(tcp, sodp);
+			/* sod_wakeup() did the mutex_exit() */
+		} else {
+			/* Not sodirect, drain */
+			(void) tcp_rcv_drain(q, tcp);
+		}
 
 		/*
 		 * For fused tcp loopback, back-enable peer endpoint
@@ -18304,6 +18738,21 @@
 		listener = eager->tcp_listener;
 		eager->tcp_issocket = B_TRUE;
 
+		/*
+		 * TCP is _D_SODIRECT and sockfs is directly above so
+		 * save shared sodirect_t pointer (if any).
+		 *
+		 * If tcp_fused and sodirect enabled disable it.
+		 */
+		eager->tcp_sodirect = SOD_QTOSODP(eager->tcp_rq);
+		if (eager->tcp_fused && eager->tcp_sodirect != NULL) {
+			/* Fused, disable sodirect */
+			mutex_enter(eager->tcp_sodirect->sod_lock);
+			SOD_DISABLE(eager->tcp_sodirect);
+			mutex_exit(eager->tcp_sodirect->sod_lock);
+			eager->tcp_sodirect = NULL;
+		}
+
 		econnp->conn_zoneid = listener->tcp_connp->conn_zoneid;
 		econnp->conn_allzones = listener->tcp_connp->conn_allzones;
 		ASSERT(econnp->conn_netstack ==
@@ -22140,6 +22589,7 @@
 				tcp_fuse_disable_pair(tcp, B_FALSE);
 			}
 			tcp->tcp_issocket = B_FALSE;
+			tcp->tcp_sodirect = NULL;
 			TCP_STAT(tcps, tcp_sock_fallback);
 
 			DB_TYPE(mp) = M_IOCACK;
@@ -23420,6 +23870,8 @@
 	conn_t	*connp = (conn_t *)arg;
 	tcp_t *tcp = connp->conn_tcp;
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
+	uint_t		flags;
+	sodirect_t	*sodp;
 
 	TCP_DBGSTAT(tcps, tcp_push_timer_cnt);
 
@@ -23431,9 +23883,17 @@
 	 */
 	TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp);
 	tcp->tcp_push_tid = 0;
-	if ((tcp->tcp_rcv_list != NULL) &&
-	    (tcp_rcv_drain(tcp->tcp_rq, tcp) == TH_ACK_NEEDED))
+
+	SOD_PTR_ENTER(tcp, sodp);
+	if (sodp != NULL) {
+		flags = tcp_rcv_sod_wakeup(tcp, sodp);
+		/* sod_wakeup() does the mutex_exit() */
+	} else if (tcp->tcp_rcv_list != NULL) {
+		flags = tcp_rcv_drain(tcp->tcp_rq, tcp);
+	}
+	if (flags == TH_ACK_NEEDED)
 		tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
+
 	TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp);
 }
 
--- a/usr/src/uts/common/inet/tcp/tcp6ddi.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/inet/tcp/tcp6ddi.c	Fri May 23 20:14:10 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -40,7 +40,7 @@
  * for TCP Fusion (loopback); this is why we don't define
  * D_SYNCSTR here.
  */
-#define	INET_DEVMTFLAGS	(D_MP|_D_DIRECT)
+#define	INET_DEVMTFLAGS	(D_MP|_D_DIRECT|_D_SODIRECT)
 
 #include "../inetddi.c"
 
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c	Fri May 23 20:14:10 2008 -0700
@@ -287,6 +287,20 @@
 		if ((mp = allocb(sizeof (*stropt), BPRI_HI)) == NULL)
 			goto failed;
 
+		/* If either tcp or peer_tcp sodirect enabled then disable */
+		if (tcp->tcp_sodirect != NULL) {
+			mutex_enter(tcp->tcp_sodirect->sod_lock);
+			SOD_DISABLE(tcp->tcp_sodirect);
+			mutex_exit(tcp->tcp_sodirect->sod_lock);
+			tcp->tcp_sodirect = NULL;
+		}
+		if (peer_tcp->tcp_sodirect != NULL) {
+			mutex_enter(peer_tcp->tcp_sodirect->sod_lock);
+			SOD_DISABLE(peer_tcp->tcp_sodirect);
+			mutex_exit(peer_tcp->tcp_sodirect->sod_lock);
+			peer_tcp->tcp_sodirect = NULL;
+		}
+
 		/* Fuse both endpoints */
 		peer_tcp->tcp_loopback_peer = tcp;
 		tcp->tcp_loopback_peer = peer_tcp;
--- a/usr/src/uts/common/inet/tcp/tcpddi.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/inet/tcp/tcpddi.c	Fri May 23 20:14:10 2008 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /* Copyright (c) 1990 Mentat Inc. */
@@ -44,7 +44,7 @@
  * for TCP Fusion (loopback); this is why we don't define
  * D_SYNCSTR here.
  */
-#define	INET_DEVMTFLAGS	(D_MP|_D_DIRECT)
+#define	INET_DEVMTFLAGS	(D_MP|_D_DIRECT|_D_SODIRECT)
 
 #include "../inetddi.c"
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/io/dcopy.c	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,938 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * dcopy.c
+ *    dcopy misc module
+ */
+
+#include <sys/conf.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/sysmacros.h>
+#include <sys/atomic.h>
+
+
+#include <sys/dcopy.h>
+#include <sys/dcopy_device.h>
+
+
+/* Number of entries per channel to allocate */
+uint_t dcopy_channel_size = 1024;
+
+
+typedef struct dcopy_list_s {
+	list_t			dl_list;
+	kmutex_t		dl_mutex;
+	uint_t			dl_cnt; /* num entries on list */
+} dcopy_list_t;
+
+/* device state for register/unregister */
+struct dcopy_device_s {
+	/* DMA device drivers private pointer */
+	void			*dc_device_private;
+
+	/* to track list of channels from this DMA device */
+	dcopy_list_t		dc_devchan_list;
+	list_node_t		dc_device_list_node;
+
+	/*
+	 * dc_removing_cnt track how many channels still have to be freed up
+	 * before it's safe to allow the DMA device driver to detach.
+	 */
+	uint_t			dc_removing_cnt;
+	dcopy_device_cb_t	*dc_cb;
+
+	dcopy_device_info_t	dc_info;
+
+};
+
+typedef struct dcopy_stats_s {
+	kstat_named_t	cs_bytes_xfer;
+	kstat_named_t	cs_cmd_alloc;
+	kstat_named_t	cs_cmd_post;
+	kstat_named_t	cs_cmd_poll;
+	kstat_named_t	cs_notify_poll;
+	kstat_named_t	cs_notify_pending;
+	kstat_named_t	cs_id;
+	kstat_named_t	cs_capabilities;
+} dcopy_stats_t;
+
+/* DMA channel state */
+struct dcopy_channel_s {
+	/* DMA driver channel private pointer */
+	void			*ch_channel_private;
+
+	/* shortcut to device callbacks */
+	dcopy_device_cb_t	*ch_cb;
+
+	/*
+	 * number of outstanding allocs for this channel. used to track when
+	 * it's safe to free up this channel so the DMA device driver can
+	 * detach.
+	 */
+	uint64_t		ch_ref_cnt;
+
+	/* state for if channel needs to be removed when ch_ref_cnt gets to 0 */
+	boolean_t		ch_removing;
+
+	list_node_t		ch_devchan_list_node;
+	list_node_t		ch_globalchan_list_node;
+
+	/*
+	 * per channel list of commands actively blocking waiting for
+	 * completion.
+	 */
+	dcopy_list_t		ch_poll_list;
+
+	/* pointer back to our device */
+	struct dcopy_device_s	*ch_device;
+
+	dcopy_query_channel_t	ch_info;
+
+	kstat_t			*ch_kstat;
+	dcopy_stats_t		ch_stat;
+};
+
+/*
+ * If grabbing both device_list mutex & globalchan_list mutex,
+ * Always grab globalchan_list mutex before device_list mutex
+ */
+typedef struct dcopy_state_s {
+	dcopy_list_t		d_device_list;
+	dcopy_list_t		d_globalchan_list;
+} dcopy_state_t;
+dcopy_state_t *dcopy_statep;
+
+
+/* Module Driver Info */
+static struct modlmisc dcopy_modlmisc = {
+	&mod_miscops,
+	"dcopy kernel module"
+};
+
+/* Module Linkage */
+static struct modlinkage dcopy_modlinkage = {
+	MODREV_1,
+	&dcopy_modlmisc,
+	NULL
+};
+
+static int dcopy_init();
+static void dcopy_fini();
+
+static int dcopy_list_init(dcopy_list_t *list, size_t node_size,
+    offset_t link_offset);
+static void dcopy_list_fini(dcopy_list_t *list);
+static void dcopy_list_push(dcopy_list_t *list, void *list_node);
+static void *dcopy_list_pop(dcopy_list_t *list);
+
+static void dcopy_device_cleanup(dcopy_device_handle_t device,
+    boolean_t do_callback);
+
+static int dcopy_stats_init(dcopy_handle_t channel);
+static void dcopy_stats_fini(dcopy_handle_t channel);
+
+
+/*
+ * _init()
+ */
+int
+_init()
+{
+	int e;
+
+	e = dcopy_init();
+	if (e != 0) {
+		return (e);
+	}
+
+	return (mod_install(&dcopy_modlinkage));
+}
+
+
+/*
+ * _info()
+ */
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&dcopy_modlinkage, modinfop));
+}
+
+
+/*
+ * _fini()
+ */
+int
+_fini()
+{
+	int e;
+
+	e = mod_remove(&dcopy_modlinkage);
+	if (e != 0) {
+		return (e);
+	}
+
+	dcopy_fini();
+
+	return (e);
+}
+
+/*
+ * dcopy_init()
+ */
+static int
+dcopy_init()
+{
+	int e;
+
+
+	dcopy_statep = kmem_zalloc(sizeof (*dcopy_statep), KM_SLEEP);
+
+	/* Initialize the list we use to track device register/unregister */
+	e = dcopy_list_init(&dcopy_statep->d_device_list,
+	    sizeof (struct dcopy_device_s),
+	    offsetof(struct dcopy_device_s, dc_device_list_node));
+	if (e != DCOPY_SUCCESS) {
+		goto dcopyinitfail_device;
+	}
+
+	/* Initialize the list we use to track all DMA channels */
+	e = dcopy_list_init(&dcopy_statep->d_globalchan_list,
+	    sizeof (struct dcopy_channel_s),
+	    offsetof(struct dcopy_channel_s, ch_globalchan_list_node));
+	if (e != DCOPY_SUCCESS) {
+		goto dcopyinitfail_global;
+	}
+
+	return (0);
+
+dcopyinitfail_cback:
+	dcopy_list_fini(&dcopy_statep->d_globalchan_list);
+dcopyinitfail_global:
+	dcopy_list_fini(&dcopy_statep->d_device_list);
+dcopyinitfail_device:
+	kmem_free(dcopy_statep, sizeof (*dcopy_statep));
+
+	return (-1);
+}
+
+
+/*
+ * dcopy_fini()
+ */
+static void
+dcopy_fini()
+{
+	/*
+	 * if mod_remove was successfull, we shouldn't have any
+	 * devices/channels to worry about.
+	 */
+	ASSERT(list_head(&dcopy_statep->d_globalchan_list.dl_list) == NULL);
+	ASSERT(list_head(&dcopy_statep->d_device_list.dl_list) == NULL);
+
+	dcopy_list_fini(&dcopy_statep->d_globalchan_list);
+	dcopy_list_fini(&dcopy_statep->d_device_list);
+	kmem_free(dcopy_statep, sizeof (*dcopy_statep));
+}
+
+
+/* *** EXTERNAL INTERFACE *** */
+/*
+ * dcopy_query()
+ */
+void
+dcopy_query(dcopy_query_t *query)
+{
+	query->dq_version = DCOPY_QUERY_V0;
+	query->dq_num_channels = dcopy_statep->d_globalchan_list.dl_cnt;
+}
+
+
+/*
+ * dcopy_alloc()
+ */
+/*ARGSUSED*/
+int
+dcopy_alloc(int flags, dcopy_handle_t *handle)
+{
+	dcopy_handle_t channel;
+	dcopy_list_t *list;
+
+
+	/*
+	 * we don't use the dcopy_list_* code here because we need to due
+	 * some non-standard stuff.
+	 */
+
+	list = &dcopy_statep->d_globalchan_list;
+
+	/*
+	 * if nothing is on the channel list, return DCOPY_NORESOURCES. This
+	 * can happen if there aren't any DMA device registered.
+	 */
+	mutex_enter(&list->dl_mutex);
+	channel = list_head(&list->dl_list);
+	if (channel == NULL) {
+		mutex_exit(&list->dl_mutex);
+		return (DCOPY_NORESOURCES);
+	}
+
+	/*
+	 * increment the reference count, and pop the channel off the head and
+	 * push it on the tail. This ensures we rotate through the channels.
+	 * DMA channels are shared.
+	 */
+	channel->ch_ref_cnt++;
+	list_remove(&list->dl_list, channel);
+	list_insert_tail(&list->dl_list, channel);
+	mutex_exit(&list->dl_mutex);
+
+	*handle = (dcopy_handle_t)channel;
+	return (DCOPY_SUCCESS);
+}
+
+
+/*
+ * dcopy_free()
+ */
+void
+dcopy_free(dcopy_handle_t *channel)
+{
+	dcopy_device_handle_t device;
+	dcopy_list_t *list;
+	boolean_t cleanup;
+
+
+	ASSERT(*channel != NULL);
+
+	/*
+	 * we don't need to add the channel back to the list since we never
+	 * removed it. decrement the reference count.
+	 */
+	list = &dcopy_statep->d_globalchan_list;
+	mutex_enter(&list->dl_mutex);
+	(*channel)->ch_ref_cnt--;
+
+	/*
+	 * if we need to remove this channel, and the reference count is down
+	 * to 0, decrement the number of channels which still need to be
+	 * removed on the device.
+	 */
+	if ((*channel)->ch_removing && ((*channel)->ch_ref_cnt == 0)) {
+		cleanup = B_FALSE;
+		device = (*channel)->ch_device;
+		mutex_enter(&device->dc_devchan_list.dl_mutex);
+		device->dc_removing_cnt--;
+		if (device->dc_removing_cnt == 0) {
+			cleanup = B_TRUE;
+		}
+		mutex_exit(&device->dc_devchan_list.dl_mutex);
+	}
+	mutex_exit(&list->dl_mutex);
+
+	/*
+	 * if there are no channels which still need to be removed, cleanup the
+	 * device state and call back into the DMA device driver to tell them
+	 * the device is free.
+	 */
+	if (cleanup) {
+		dcopy_device_cleanup(device, B_TRUE);
+	}
+
+	*channel = NULL;
+}
+
+
+/*
+ * dcopy_query_channel()
+ */
+void
+dcopy_query_channel(dcopy_handle_t channel, dcopy_query_channel_t *query)
+{
+	*query = channel->ch_info;
+}
+
+
+/*
+ * dcopy_cmd_alloc()
+ */
+int
+dcopy_cmd_alloc(dcopy_handle_t handle, int flags, dcopy_cmd_t *cmd)
+{
+	dcopy_handle_t channel;
+	dcopy_cmd_priv_t priv;
+	int e;
+
+
+	channel = handle;
+
+	atomic_inc_64(&channel->ch_stat.cs_cmd_alloc.value.ui64);
+	e = channel->ch_cb->cb_cmd_alloc(channel->ch_channel_private, flags,
+	    cmd);
+	if (e == DCOPY_SUCCESS) {
+		priv = (*cmd)->dp_private;
+		priv->pr_channel = channel;
+		/*
+		 * we won't initialize the blocking state until we actually
+		 * need to block.
+		 */
+		priv->pr_block_init = B_FALSE;
+	}
+
+	return (e);
+}
+
+
+/*
+ * dcopy_cmd_free()
+ */
+void
+dcopy_cmd_free(dcopy_cmd_t *cmd)
+{
+	dcopy_handle_t channel;
+	dcopy_cmd_priv_t priv;
+
+
+	ASSERT(*cmd != NULL);
+
+	priv = (*cmd)->dp_private;
+	channel = priv->pr_channel;
+
+	/* if we initialized the blocking state, clean it up too */
+	if (priv->pr_block_init) {
+		cv_destroy(&priv->pr_cv);
+		mutex_destroy(&priv->pr_mutex);
+	}
+
+	channel->ch_cb->cb_cmd_free(channel->ch_channel_private, cmd);
+}
+
+
+/*
+ * dcopy_cmd_post()
+ */
+int
+dcopy_cmd_post(dcopy_cmd_t cmd)
+{
+	dcopy_handle_t channel;
+	int e;
+
+
+	channel = cmd->dp_private->pr_channel;
+
+	atomic_inc_64(&channel->ch_stat.cs_cmd_post.value.ui64);
+	if (cmd->dp_cmd == DCOPY_CMD_COPY) {
+		atomic_add_64(&channel->ch_stat.cs_bytes_xfer.value.ui64,
+		    cmd->dp.copy.cc_size);
+	}
+	e = channel->ch_cb->cb_cmd_post(channel->ch_channel_private, cmd);
+	if (e != DCOPY_SUCCESS) {
+		return (e);
+	}
+
+	return (DCOPY_SUCCESS);
+}
+
+
+/*
+ * dcopy_cmd_poll()
+ */
+int
+dcopy_cmd_poll(dcopy_cmd_t cmd, int flags)
+{
+	dcopy_handle_t channel;
+	dcopy_cmd_priv_t priv;
+	int e;
+
+
+	priv = cmd->dp_private;
+	channel = priv->pr_channel;
+
+	/*
+	 * if the caller is trying to block, they needed to post the
+	 * command with DCOPY_CMD_INTR set.
+	 */
+	if ((flags & DCOPY_POLL_BLOCK) && !(cmd->dp_flags & DCOPY_CMD_INTR)) {
+		return (DCOPY_FAILURE);
+	}
+
+	atomic_inc_64(&channel->ch_stat.cs_cmd_poll.value.ui64);
+
+repoll:
+	e = channel->ch_cb->cb_cmd_poll(channel->ch_channel_private, cmd);
+	if (e == DCOPY_PENDING) {
+		/*
+		 * if the command is still active, and the blocking flag
+		 * is set.
+		 */
+		if (flags & DCOPY_POLL_BLOCK) {
+
+			/*
+			 * if we haven't initialized the state, do it now. A
+			 * command can be re-used, so it's possible it's
+			 * already been initialized.
+			 */
+			if (!priv->pr_block_init) {
+				priv->pr_block_init = B_TRUE;
+				mutex_init(&priv->pr_mutex, NULL, MUTEX_DRIVER,
+				    NULL);
+				cv_init(&priv->pr_cv, NULL, CV_DRIVER, NULL);
+				priv->pr_cmd = cmd;
+			}
+
+			/* push it on the list for blocking commands */
+			priv->pr_wait = B_TRUE;
+			dcopy_list_push(&channel->ch_poll_list, priv);
+
+			mutex_enter(&priv->pr_mutex);
+			/*
+			 * it's possible we already cleared pr_wait before we
+			 * grabbed the mutex.
+			 */
+			if (priv->pr_wait) {
+				cv_wait(&priv->pr_cv, &priv->pr_mutex);
+			}
+			mutex_exit(&priv->pr_mutex);
+
+			/*
+			 * the command has completed, go back and poll so we
+			 * get the status.
+			 */
+			goto repoll;
+		}
+	}
+
+	return (e);
+}
+
+/* *** END OF EXTERNAL INTERFACE *** */
+
+/*
+ * dcopy_list_init()
+ */
+static int
+dcopy_list_init(dcopy_list_t *list, size_t node_size, offset_t link_offset)
+{
+	mutex_init(&list->dl_mutex, NULL, MUTEX_DRIVER, NULL);
+	list_create(&list->dl_list, node_size, link_offset);
+	list->dl_cnt = 0;
+
+	return (DCOPY_SUCCESS);
+}
+
+
+/*
+ * dcopy_list_fini()
+ */
+static void
+dcopy_list_fini(dcopy_list_t *list)
+{
+	list_destroy(&list->dl_list);
+	mutex_destroy(&list->dl_mutex);
+}
+
+
+/*
+ * dcopy_list_push()
+ */
+static void
+dcopy_list_push(dcopy_list_t *list, void *list_node)
+{
+	mutex_enter(&list->dl_mutex);
+	list_insert_tail(&list->dl_list, list_node);
+	list->dl_cnt++;
+	mutex_exit(&list->dl_mutex);
+}
+
+
+/*
+ * dcopy_list_pop()
+ */
+static void *
+dcopy_list_pop(dcopy_list_t *list)
+{
+	list_node_t *list_node;
+
+	mutex_enter(&list->dl_mutex);
+	list_node = list_head(&list->dl_list);
+	if (list_node == NULL) {
+		mutex_exit(&list->dl_mutex);
+		return (list_node);
+	}
+	list->dl_cnt--;
+	list_remove(&list->dl_list, list_node);
+	mutex_exit(&list->dl_mutex);
+
+	return (list_node);
+}
+
+
+/* *** DEVICE INTERFACE *** */
+/*
+ * dcopy_device_register()
+ */
+int
+dcopy_device_register(void *device_private, dcopy_device_info_t *info,
+    dcopy_device_handle_t *handle)
+{
+	struct dcopy_channel_s *channel;
+	struct dcopy_device_s *device;
+	int e;
+	int i;
+
+
+	/* initialize the per device state */
+	device = kmem_zalloc(sizeof (*device), KM_SLEEP);
+	device->dc_device_private = device_private;
+	device->dc_info = *info;
+	device->dc_removing_cnt = 0;
+	device->dc_cb = info->di_cb;
+
+	/*
+	 * we have a per device channel list so we can remove a device in the
+	 * future.
+	 */
+	e = dcopy_list_init(&device->dc_devchan_list,
+	    sizeof (struct dcopy_channel_s),
+	    offsetof(struct dcopy_channel_s, ch_devchan_list_node));
+	if (e != DCOPY_SUCCESS) {
+		goto registerfail_devchan;
+	}
+
+	/*
+	 * allocate state for each channel, allocate the channel,  and then add
+	 * the devices dma channels to the devices channel list.
+	 */
+	for (i = 0; i < info->di_num_dma; i++) {
+		channel = kmem_zalloc(sizeof (*channel), KM_SLEEP);
+		channel->ch_device = device;
+		channel->ch_removing = B_FALSE;
+		channel->ch_ref_cnt = 0;
+		channel->ch_cb = info->di_cb;
+
+		e = info->di_cb->cb_channel_alloc(device_private, channel,
+		    DCOPY_SLEEP, dcopy_channel_size, &channel->ch_info,
+		    &channel->ch_channel_private);
+		if (e != DCOPY_SUCCESS) {
+			kmem_free(channel, sizeof (*channel));
+			goto registerfail_alloc;
+		}
+
+		e = dcopy_stats_init(channel);
+		if (e != DCOPY_SUCCESS) {
+			info->di_cb->cb_channel_free(
+			    &channel->ch_channel_private);
+			kmem_free(channel, sizeof (*channel));
+			goto registerfail_alloc;
+		}
+
+		e = dcopy_list_init(&channel->ch_poll_list,
+		    sizeof (struct dcopy_cmd_priv_s),
+		    offsetof(struct dcopy_cmd_priv_s, pr_poll_list_node));
+		if (e != DCOPY_SUCCESS) {
+			dcopy_stats_fini(channel);
+			info->di_cb->cb_channel_free(
+			    &channel->ch_channel_private);
+			kmem_free(channel, sizeof (*channel));
+			goto registerfail_alloc;
+		}
+
+		dcopy_list_push(&device->dc_devchan_list, channel);
+	}
+
+	/* add the device to device list */
+	dcopy_list_push(&dcopy_statep->d_device_list, device);
+
+	/*
+	 * add the device's dma channels to the global channel list (where
+	 * dcopy_alloc's come from)
+	 */
+	mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex);
+	mutex_enter(&dcopy_statep->d_device_list.dl_mutex);
+	channel = list_head(&device->dc_devchan_list.dl_list);
+	while (channel != NULL) {
+		list_insert_tail(&dcopy_statep->d_globalchan_list.dl_list,
+		    channel);
+		dcopy_statep->d_globalchan_list.dl_cnt++;
+		channel = list_next(&device->dc_devchan_list.dl_list, channel);
+	}
+	mutex_exit(&dcopy_statep->d_device_list.dl_mutex);
+	mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex);
+
+	*handle = device;
+
+	/* last call-back into kernel for dcopy KAPI enabled */
+	uioa_dcopy_enable();
+
+	return (DCOPY_SUCCESS);
+
+registerfail_alloc:
+	channel = list_head(&device->dc_devchan_list.dl_list);
+	while (channel != NULL) {
+		/* remove from the list */
+		channel = dcopy_list_pop(&device->dc_devchan_list);
+		ASSERT(channel != NULL);
+
+		dcopy_list_fini(&channel->ch_poll_list);
+		dcopy_stats_fini(channel);
+		info->di_cb->cb_channel_free(&channel->ch_channel_private);
+		kmem_free(channel, sizeof (*channel));
+	}
+
+	dcopy_list_fini(&device->dc_devchan_list);
+registerfail_devchan:
+	kmem_free(device, sizeof (*device));
+
+	return (DCOPY_FAILURE);
+}
+
+
+/*
+ * dcopy_device_unregister()
+ */
+/*ARGSUSED*/
+int
+dcopy_device_unregister(dcopy_device_handle_t *handle)
+{
+	struct dcopy_channel_s *channel;
+	dcopy_device_handle_t device;
+	boolean_t device_busy;
+
+	/* first call-back into kernel for dcopy KAPI disable */
+	uioa_dcopy_disable();
+
+	device = *handle;
+	device_busy = B_FALSE;
+
+	/*
+	 * remove the devices dma channels from the global channel list (where
+	 * dcopy_alloc's come from)
+	 */
+	mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex);
+	mutex_enter(&device->dc_devchan_list.dl_mutex);
+	channel = list_head(&device->dc_devchan_list.dl_list);
+	while (channel != NULL) {
+		/*
+		 * if the channel has outstanding allocs, mark it as having
+		 * to be removed and increment the number of channels which
+		 * need to be removed in the device state too.
+		 */
+		if (channel->ch_ref_cnt != 0) {
+			channel->ch_removing = B_TRUE;
+			device_busy = B_TRUE;
+			device->dc_removing_cnt++;
+		}
+		dcopy_statep->d_globalchan_list.dl_cnt--;
+		list_remove(&dcopy_statep->d_globalchan_list.dl_list, channel);
+		channel = list_next(&device->dc_devchan_list.dl_list, channel);
+	}
+	mutex_exit(&device->dc_devchan_list.dl_mutex);
+	mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex);
+
+	/*
+	 * if there are channels which still need to be removed, we will clean
+	 * up the device state after they are freed up.
+	 */
+	if (device_busy) {
+		return (DCOPY_PENDING);
+	}
+
+	dcopy_device_cleanup(device, B_FALSE);
+
+	*handle = NULL;
+	return (DCOPY_SUCCESS);
+}
+
+
+/*
+ * dcopy_device_cleanup()
+ */
+static void
+dcopy_device_cleanup(dcopy_device_handle_t device, boolean_t do_callback)
+{
+	struct dcopy_channel_s *channel;
+
+	/*
+	 * remove all the channels in the device list, free them, and clean up
+	 * the state.
+	 */
+	mutex_enter(&dcopy_statep->d_device_list.dl_mutex);
+	channel = list_head(&device->dc_devchan_list.dl_list);
+	while (channel != NULL) {
+		device->dc_devchan_list.dl_cnt--;
+		list_remove(&device->dc_devchan_list.dl_list, channel);
+		dcopy_list_fini(&channel->ch_poll_list);
+		dcopy_stats_fini(channel);
+		channel->ch_cb->cb_channel_free(&channel->ch_channel_private);
+		kmem_free(channel, sizeof (*channel));
+		channel = list_head(&device->dc_devchan_list.dl_list);
+	}
+
+	/* remove it from the list of devices */
+	list_remove(&dcopy_statep->d_device_list.dl_list, device);
+
+	mutex_exit(&dcopy_statep->d_device_list.dl_mutex);
+
+	/*
+	 * notify the DMA device driver that the device is free to be
+	 * detached.
+	 */
+	if (do_callback) {
+		device->dc_cb->cb_unregister_complete(
+		    device->dc_device_private, DCOPY_SUCCESS);
+	}
+
+	dcopy_list_fini(&device->dc_devchan_list);
+	kmem_free(device, sizeof (*device));
+}
+
+
+/*
+ * dcopy_device_channel_notify()
+ */
+/*ARGSUSED*/
+void
+dcopy_device_channel_notify(dcopy_handle_t handle, int status)
+{
+	struct dcopy_channel_s *channel;
+	dcopy_list_t *poll_list;
+	dcopy_cmd_priv_t priv;
+	int e;
+
+
+	ASSERT(status == DCOPY_COMPLETION);
+	channel = handle;
+
+	poll_list = &channel->ch_poll_list;
+
+	/*
+	 * when we get a completion notification from the device, go through
+	 * all of the commands blocking on this channel and see if they have
+	 * completed. Remove the command and wake up the block thread if they
+	 * have. Once we hit a command which is still pending, we are done
+	 * polling since commands in a channel complete in order.
+	 */
+	mutex_enter(&poll_list->dl_mutex);
+	if (poll_list->dl_cnt != 0) {
+		priv = list_head(&poll_list->dl_list);
+		while (priv != NULL) {
+			atomic_inc_64(&channel->
+			    ch_stat.cs_notify_poll.value.ui64);
+			e = channel->ch_cb->cb_cmd_poll(
+			    channel->ch_channel_private,
+			    priv->pr_cmd);
+			if (e == DCOPY_PENDING) {
+				atomic_inc_64(&channel->
+				    ch_stat.cs_notify_pending.value.ui64);
+				break;
+			}
+
+			poll_list->dl_cnt--;
+			list_remove(&poll_list->dl_list, priv);
+
+			mutex_enter(&priv->pr_mutex);
+			priv->pr_wait = B_FALSE;
+			cv_signal(&priv->pr_cv);
+			mutex_exit(&priv->pr_mutex);
+
+			priv = list_head(&poll_list->dl_list);
+		}
+	}
+
+	mutex_exit(&poll_list->dl_mutex);
+}
+
+
+/*
+ * dcopy_stats_init()
+ */
+static int
+dcopy_stats_init(dcopy_handle_t channel)
+{
+#define	CHANSTRSIZE	20
+	char chanstr[CHANSTRSIZE];
+	dcopy_stats_t *stats;
+	int instance;
+	char *name;
+
+
+	stats = &channel->ch_stat;
+	name = (char *)ddi_driver_name(channel->ch_device->dc_info.di_dip);
+	instance = ddi_get_instance(channel->ch_device->dc_info.di_dip);
+
+	(void) snprintf(chanstr, CHANSTRSIZE, "channel%d",
+	    (uint32_t)channel->ch_info.qc_chan_num);
+
+	channel->ch_kstat = kstat_create(name, instance, chanstr, "misc",
+	    KSTAT_TYPE_NAMED, sizeof (dcopy_stats_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	if (channel->ch_kstat == NULL) {
+		return (DCOPY_FAILURE);
+	}
+	channel->ch_kstat->ks_data = stats;
+
+	kstat_named_init(&stats->cs_bytes_xfer, "bytes_xfer",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&stats->cs_cmd_alloc, "cmd_alloc",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&stats->cs_cmd_post, "cmd_post",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&stats->cs_cmd_poll, "cmd_poll",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&stats->cs_notify_poll, "notify_poll",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&stats->cs_notify_pending, "notify_pending",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&stats->cs_id, "id",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&stats->cs_capabilities, "capabilities",
+	    KSTAT_DATA_UINT64);
+
+	kstat_install(channel->ch_kstat);
+
+	channel->ch_stat.cs_id.value.ui64 = channel->ch_info.qc_id;
+	channel->ch_stat.cs_capabilities.value.ui64 =
+	    channel->ch_info.qc_capabilities;
+
+	return (DCOPY_SUCCESS);
+}
+
+
+/*
+ * dcopy_stats_fini()
+ */
+static void
+dcopy_stats_fini(dcopy_handle_t channel)
+{
+	kstat_delete(channel->ch_kstat);
+}
+/* *** END OF DEVICE INTERFACE *** */
--- a/usr/src/uts/common/io/stream.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/io/stream.c	Fri May 23 20:14:10 2008 -0700
@@ -23,7 +23,7 @@
 
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -318,8 +318,8 @@
 	int offset;
 
 	mblk_cache = kmem_cache_create("streams_mblk",
-		sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
-		mblk_kmem_flags);
+	    sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL,
+	    mblk_kmem_flags);
 
 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
 
@@ -330,7 +330,7 @@
 			 */
 			tot_size = size + sizeof (dblk_t);
 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
-								< PAGESIZE);
+			    < PAGESIZE);
 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
 
 		} else {
@@ -346,9 +346,9 @@
 
 		(void) sprintf(name, "streams_dblk_%ld", size);
 		cp = kmem_cache_create(name, tot_size,
-			DBLK_CACHE_ALIGN, dblk_constructor,
-			dblk_destructor, NULL,
-			(void *)(size), NULL, dblk_kmem_flags);
+		    DBLK_CACHE_ALIGN, dblk_constructor,
+		    dblk_destructor, NULL,
+		    (void *)(size), NULL, dblk_kmem_flags);
 
 		while (lastsize <= size) {
 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
@@ -357,13 +357,13 @@
 	}
 
 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb",
-			sizeof (dblk_t), DBLK_CACHE_ALIGN,
-			dblk_esb_constructor, dblk_destructor, NULL,
-			(void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
+	    sizeof (dblk_t), DBLK_CACHE_ALIGN,
+	    dblk_esb_constructor, dblk_destructor, NULL,
+	    (void *) sizeof (dblk_t), NULL, dblk_kmem_flags);
 	fthdr_cache = kmem_cache_create("streams_fthdr",
-		sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
+	    sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
 	ftblk_cache = kmem_cache_create("streams_ftblk",
-		sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
+	    sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0);
 
 	/* Initialize Multidata caches */
 	mmd_init();
@@ -545,8 +545,8 @@
 	dbp->db_struioflag = 0;
 	dbp->db_struioun.cksum.flags = 0;
 
-	/* and the COOKED flag */
-	dbp->db_flags &= ~DBLK_COOKED;
+	/* and the COOKED and/or UIOA flag(s) */
+	dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA);
 
 	kmem_cache_free(dbp->db_cache, dbp);
 }
@@ -739,7 +739,7 @@
 	 */
 	if (!str_ftnever) {
 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
-			frp, dblk_lastfree_desb, KM_NOSLEEP);
+		    frp, dblk_lastfree_desb, KM_NOSLEEP);
 
 		if (mp != NULL)
 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
@@ -857,7 +857,7 @@
 	(void) sprintf(buffer, "%s_dblk_cache", name);
 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
-						NULL, (void *)bcp, NULL, 0);
+	    NULL, (void *)bcp, NULL, 0);
 
 	return (bcp);
 }
@@ -1584,7 +1584,7 @@
 			 */
 
 			if ((save_bp != mp) &&
-				(save_bp->b_wptr == save_bp->b_rptr)) {
+			    (save_bp->b_wptr == save_bp->b_rptr)) {
 				bcont = save_bp->b_cont;
 				freeb(save_bp);
 				prev_bp->b_cont = bcont;
@@ -2129,8 +2129,8 @@
 			nmp = mp->b_next;
 			mp->b_next = mp->b_prev = NULL;
 			if ((mp->b_band == 0) &&
-				((flag == FLUSHALL) ||
-				datamsg(mp->b_datap->db_type)))
+			    ((flag == FLUSHALL) ||
+			    datamsg(mp->b_datap->db_type)))
 				freemsg(mp);
 			else
 				(void) putq(q, mp);
@@ -2242,7 +2242,7 @@
 			q->q_flag |= QWANTW;
 			mutex_exit(QLOCK(q));
 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
-				"bcanput:%p %X %d", q, pri, 0);
+			    "bcanput:%p %X %d", q, pri, 0);
 			return (0);
 		}
 	} else {	/* pri != 0 */
@@ -2252,7 +2252,7 @@
 			 */
 			mutex_exit(QLOCK(q));
 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
-				"bcanput:%p %X %d", q, pri, 1);
+			    "bcanput:%p %X %d", q, pri, 1);
 			return (1);
 		}
 		qbp = q->q_bandp;
@@ -2262,13 +2262,13 @@
 			qbp->qb_flag |= QB_WANTW;
 			mutex_exit(QLOCK(q));
 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
-				"bcanput:%p %X %d", q, pri, 0);
+			    "bcanput:%p %X %d", q, pri, 0);
 			return (0);
 		}
 	}
 	mutex_exit(QLOCK(q));
 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
-		"bcanput:%p %X %d", q, pri, 1);
+	    "bcanput:%p %X %d", q, pri, 1);
 	return (1);
 }
 
@@ -2847,7 +2847,7 @@
 	mblk_t *bp;
 
 	if ((datamsg(type) && (type != M_DELAY)) ||
-		((bp = allocb_tryhard(1)) == NULL))
+	    ((bp = allocb_tryhard(1)) == NULL))
 		return (0);
 
 	bp->b_datap->db_type = (unsigned char)type;
@@ -2864,7 +2864,7 @@
 	mblk_t *bp;
 
 	if ((datamsg(type) && (type != M_DELAY)) ||
-		((bp = allocb_tryhard(0)) == NULL))
+	    ((bp = allocb_tryhard(0)) == NULL))
 		return (0);
 	bp->b_datap->db_type = (unsigned char)type;
 
--- a/usr/src/uts/common/os/move.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/os/move.c	Fri May 23 20:14:10 2008 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -45,6 +44,16 @@
 #include <sys/systm.h>
 #include <sys/uio.h>
 #include <sys/errno.h>
+#include <sys/vmsystm.h>
+#include <sys/cmn_err.h>
+#include <vm/as.h>
+#include <vm/page.h>
+
+#include <sys/dcopy.h>
+
+int64_t uioa_maxpoll = -1;	/* <0 = noblock, 0 = block, >0 = block after */
+#define	UIO_DCOPY_CHANNEL	0
+#define	UIO_DCOPY_CMD		1
 
 /*
  * Move "n" bytes at byte address "p"; "rw" indicates the direction
@@ -277,3 +286,386 @@
 	duio->uio_iov = diov;
 	return (0);
 }
+
+/*
+ * Shadow state for checking if a platform has hardware asynchronous
+ * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine,
+ *
+ * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls
+ * into dcopy to register and uioa_dcopy_disable() when the device calls
+ * into dcopy to unregister.
+ */
+uioasync_t uioasync = {B_FALSE, 1024};
+
+void
+uioa_dcopy_enable()
+{
+	uioasync.enabled = B_TRUE;
+}
+
+void
+uioa_dcopy_disable()
+{
+	uioasync.enabled = B_FALSE;
+}
+
+/*
+ * Schedule an asynchronous move of "n" bytes at byte address "p",
+ * "rw" indicates the direction of the move, I/O parameters and
+ * async state are provided in "uioa" which is update to reflect
+ * the data which is to be moved.
+ *
+ * Returns 0 on success or a non-zero errno on failure.
+ *
+ * Note, while the uioasync APIs are general purpose in design
+ * the current implementation is Intel I/OAT specific.
+ */
+int
+uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa)
+{
+	int		soff, doff;
+	uint64_t	pa;
+	int		cnt;
+	iovec_t		*iov;
+	dcopy_handle_t	channel;
+	dcopy_cmd_t	cmd;
+	int		ret = 0;
+	int		dcopy_flags;
+
+	if (!(uioa->uioa_state & UIOA_ENABLED)) {
+		/* The uioa_t isn't enabled */
+		return (ENXIO);
+	}
+
+	if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) {
+		/* Only support to user-land from kernel */
+		return (ENOTSUP);
+	}
+
+
+	channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL];
+	cmd = uioa->uioa_hwst[UIO_DCOPY_CMD];
+	dcopy_flags = DCOPY_NOSLEEP;
+
+	/*
+	 * While source bytes and destination bytes.
+	 */
+	while (n > 0 && uioa->uio_resid > 0) {
+		iov = uioa->uio_iov;
+		if (iov->iov_len == 0l) {
+			uioa->uio_iov++;
+			uioa->uio_iovcnt--;
+			uioa->uioa_lcur++;
+			uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp;
+			continue;
+		}
+		/*
+		 * While source bytes schedule an async
+		 * dma for destination page by page.
+		 */
+		while (n > 0) {
+			/* Addr offset in page src/dst */
+			soff = (uintptr_t)p & PAGEOFFSET;
+			doff = (uintptr_t)iov->iov_base & PAGEOFFSET;
+			/* Min copy count src and dst and page sized */
+			cnt = MIN(n, iov->iov_len);
+			cnt = MIN(cnt, PAGESIZE - soff);
+			cnt = MIN(cnt, PAGESIZE - doff);
+			/* XXX if next page(s) contiguous could use multipage */
+
+			/*
+			 * if we have an old command, we want to link all
+			 * other commands to the next command we alloced so
+			 * we only need to track the last command but can
+			 * still free them all.
+			 */
+			if (cmd != NULL) {
+				dcopy_flags |= DCOPY_ALLOC_LINK;
+			}
+			ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd);
+			if (ret != DCOPY_SUCCESS) {
+				/* Error of some sort */
+				return (EIO);
+			}
+			uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd;
+
+			ASSERT(cmd->dp_version == DCOPY_CMD_V0);
+			if (uioa_maxpoll >= 0) {
+				/* Blocking (>0 may be) used in uioafini() */
+				cmd->dp_flags = DCOPY_CMD_INTR;
+			} else {
+				/* Non blocking uioafini() so no intr */
+				cmd->dp_flags = DCOPY_CMD_NOFLAGS;
+			}
+			cmd->dp_cmd = DCOPY_CMD_COPY;
+			pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p));
+			cmd->dp.copy.cc_source = pa + soff;
+			if (uioa->uioa_lcur->uioa_pfncnt == 0) {
+				/* Have a (page_t **) */
+				pa = ptob((uint64_t)(
+				    *(page_t **)uioa->uioa_lppp)->p_pagenum);
+			} else {
+				/* Have a (pfn_t *) */
+				pa = ptob((uint64_t)(
+				    *(pfn_t *)uioa->uioa_lppp));
+			}
+			cmd->dp.copy.cc_dest = pa + doff;
+			cmd->dp.copy.cc_size = cnt;
+			ret = dcopy_cmd_post(cmd);
+			if (ret != DCOPY_SUCCESS) {
+				/* Error of some sort */
+				return (EIO);
+			}
+			ret = 0;
+
+			/* If UIOA_POLL not set, set it */
+			if (!(uioa->uioa_state & UIOA_POLL))
+				uioa->uioa_state |= UIOA_POLL;
+
+			/* Update iov, uio, and local pointers/counters */
+			iov->iov_base += cnt;
+			iov->iov_len -= cnt;
+			uioa->uio_resid -= cnt;
+			uioa->uio_loffset += cnt;
+			p = (caddr_t)p + cnt;
+			n -= cnt;
+
+			/* End of iovec? */
+			if (iov->iov_len == 0) {
+				/* Yup, next iovec */
+				break;
+			}
+
+			/* Next dst addr page? */
+			if (doff + cnt == PAGESIZE) {
+				/* Yup, next page_t */
+				uioa->uioa_lppp++;
+			}
+		}
+	}
+
+	return (ret);
+}
+
+/*
+ * Initialize a uioa_t for a given uio_t for the current user context,
+ * copy the common uio_t to the uioa_t, walk the shared iovec_t and
+ * lock down the user-land page(s) containing iovec_t data, then mapin
+ * user-land pages using segkpm.
+ */
+int
+uioainit(uio_t *uiop, uioa_t *uioap)
+{
+	caddr_t	addr;
+	page_t		**pages;
+	int		off;
+	int		len;
+	proc_t		*procp = ttoproc(curthread);
+	struct as	*as = procp->p_as;
+	iovec_t		*iov = uiop->uio_iov;
+	int32_t		iovcnt = uiop->uio_iovcnt;
+	uioa_page_t	*locked = uioap->uioa_locked;
+	dcopy_handle_t	channel;
+	int		error;
+
+	if (! (uioap->uioa_state & UIOA_ALLOC)) {
+		/* Can only init() a freshly allocated uioa_t */
+		return (EINVAL);
+	}
+
+	error = dcopy_alloc(DCOPY_NOSLEEP, &channel);
+	if (error == DCOPY_NORESOURCES) {
+		/* Turn off uioa */
+		uioasync.enabled = B_FALSE;
+		return (ENODEV);
+	}
+	if (error != DCOPY_SUCCESS) {
+		/* Alloc failed */
+		return (EIO);
+	}
+
+	uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel;
+	uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
+
+	/* Indicate uioa_t (will be) initialized */
+	uioap->uioa_state = UIOA_INIT;
+
+	/* uio_t/uioa_t uio_t common struct copy */
+	*((uio_t *)uioap) = *uiop;
+
+	/* initialize *uiop->uio_iov */
+	if (iovcnt > UIOA_IOV_MAX) {
+		/* Too big? */
+		return (E2BIG);
+	}
+	uioap->uio_iov = iov;
+	uioap->uio_iovcnt = iovcnt;
+
+	/* Mark the uioap as such */
+	uioap->uio_extflg |= UIO_ASYNC;
+
+	/*
+	 * For each iovec_t, lock-down the page(s) backing the iovec_t
+	 * and save the page_t list for phys addr use in uioamove().
+	 */
+	iov = uiop->uio_iov;
+	iovcnt = uiop->uio_iovcnt;
+	while (iovcnt > 0) {
+		addr = iov->iov_base;
+		off = (uintptr_t)addr & PAGEOFFSET;
+		addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
+		len = iov->iov_len + off;
+
+		/* Lock down page(s) for the iov span */
+		if ((error = as_pagelock(as, &pages,
+		    iov->iov_base, iov->iov_len, S_WRITE)) != 0) {
+			/* Error */
+			goto cleanup;
+		}
+
+		if (pages == NULL) {
+			/*
+			 * Need page_t list, really only need
+			 * a pfn list so build one.
+			 */
+			pfn_t   *pfnp;
+			int	pcnt = len >> PAGESHIFT;
+
+			if (off)
+				pcnt++;
+			if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp),
+			    KM_NOSLEEP)) == NULL) {
+				error = ENOMEM;
+				goto cleanup;
+			}
+			locked->uioa_ppp = (void **)pfnp;
+			locked->uioa_pfncnt = pcnt;
+			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+			while (pcnt-- > 0) {
+				*pfnp++ = hat_getpfnum(as->a_hat, addr);
+				addr += PAGESIZE;
+			}
+			AS_LOCK_EXIT(as, &as->a_lock);
+		} else {
+			/* Have a page_t list, save it */
+			locked->uioa_ppp = (void **)pages;
+			locked->uioa_pfncnt = 0;
+		}
+		/* Save for as_pageunlock() in uioafini() */
+		locked->uioa_base = iov->iov_base;
+		locked->uioa_len = iov->iov_len;
+		locked++;
+
+		/* Next iovec_t */
+		iov++;
+		iovcnt--;
+	}
+	/* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */
+	uioap->uioa_lcur = uioap->uioa_locked;
+	uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp;
+	return (0);
+
+cleanup:
+	/* Unlock any previously locked page_t(s) */
+	while (locked > uioap->uioa_locked) {
+		locked--;
+		as_pageunlock(as, (page_t **)locked->uioa_ppp,
+		    locked->uioa_base, locked->uioa_len, S_WRITE);
+	}
+
+	/* Last indicate uioa_t still in alloc state */
+	uioap->uioa_state = UIOA_ALLOC;
+
+	return (error);
+}
+
+/*
+ * Finish processing of a uioa_t by cleanup any pending "uioap" actions.
+ */
+int
+uioafini(uio_t *uiop, uioa_t *uioap)
+{
+	int32_t		iovcnt = uiop->uio_iovcnt;
+	uioa_page_t	*locked = uioap->uioa_locked;
+	struct as	*as = ttoproc(curthread)->p_as;
+	dcopy_handle_t	channel;
+	dcopy_cmd_t	cmd;
+	int		ret = 0;
+
+	ASSERT(uioap->uio_extflg & UIO_ASYNC);
+
+	if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) {
+		/* Must be an active uioa_t */
+		return (EINVAL);
+	}
+
+	channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL];
+	cmd = uioap->uioa_hwst[UIO_DCOPY_CMD];
+
+	/* XXX - why do we get cmd == NULL sometimes? */
+	if (cmd != NULL) {
+		if (uioap->uioa_state & UIOA_POLL) {
+			/* Wait for last dcopy() to finish */
+			int64_t poll = 1;
+			int poll_flag = DCOPY_POLL_NOFLAGS;
+
+			do {
+				if (uioa_maxpoll == 0 ||
+				    (uioa_maxpoll > 0 &&
+				    poll >= uioa_maxpoll)) {
+					/* Always block or after maxpoll */
+					poll_flag = DCOPY_POLL_BLOCK;
+				} else {
+					/* No block, poll */
+					poll++;
+				}
+				ret = dcopy_cmd_poll(cmd, poll_flag);
+			} while (ret == DCOPY_PENDING);
+
+			if (ret == DCOPY_COMPLETED) {
+				/* Poll/block succeeded */
+				ret = 0;
+			} else {
+				/* Poll/block failed */
+				ret = EIO;
+			}
+		}
+		dcopy_cmd_free(&cmd);
+	}
+
+	dcopy_free(&channel);
+
+	/* Unlock all page(s) iovec_t by iovec_t */
+	while (iovcnt-- > 0) {
+		page_t **pages;
+
+		if (locked->uioa_pfncnt == 0) {
+			/* A as_pagelock() returned (page_t **) */
+			pages = (page_t **)locked->uioa_ppp;
+		} else {
+			/* Our pfn_t array */
+			pages = NULL;
+			kmem_free(locked->uioa_ppp, locked->uioa_pfncnt *
+			    sizeof (pfn_t *));
+		}
+		as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len,
+		    S_WRITE);
+
+		locked++;
+	}
+	/* uioa_t->uio_t common struct copy */
+	*uiop = *((uio_t *)uioap);
+
+	/*
+	 * Last, reset uioa state to alloc.
+	 *
+	 * Note, we only initialize the state here, all other members
+	 * will be initialized in a subsequent uioainit().
+	 */
+	uioap->uioa_state = UIOA_ALLOC;
+
+	uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
+	uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL;
+
+	return (ret);
+}
--- a/usr/src/uts/common/os/streamio.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/os/streamio.c	Fri May 23 20:14:10 2008 -0700
@@ -144,6 +144,7 @@
 static void strcleanall(struct vnode *);
 static int strwsrv(queue_t *);
 static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
+static void struioainit(queue_t *, sodirect_t *, uio_t *);
 
 /*
  * qinit and module_info structures for stream head read and write queues
@@ -189,6 +190,11 @@
  *		mirror this.
  *	4. ioctl monitor: sd_lock is gotten to ensure that only one
  *		thread is doing an ioctl at a time.
+ *
+ * Note, for sodirect case 3. is extended to (*sodirect_t.sod_enqueue)()
+ * call-back from below, further the sodirect support is for code paths
+ * called via kstgetmsg(), all other code paths ASSERT() that sodirect
+ * uioa generated mblk_t's (i.e. DBLK_UIOA) aren't processed.
  */
 
 static int
@@ -397,6 +403,7 @@
 	stp->sd_qn_minpsz = 0;
 	stp->sd_qn_maxpsz = INFPSZ - 1;	/* used to check for initialization */
 	stp->sd_maxblk = INFPSZ;
+	stp->sd_sodirect = NULL;
 	qp->q_ptr = _WR(qp)->q_ptr = stp;
 	STREAM(qp) = STREAM(_WR(qp)) = stp;
 	vp->v_stream = stp;
@@ -970,11 +977,14 @@
  * It is the callers responsibility to call qbackenable after
  * it is finished with the message. The caller should not call
  * qbackenable until after any putback calls to avoid spurious backenabling.
+ *
+ * Also, handle uioa initialization and process any DBLK_UIOA flaged messages.
  */
 mblk_t *
 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
     int *errorp)
 {
+	sodirect_t *sodp = stp->sd_sodirect;
 	mblk_t *bp;
 	int error;
 
@@ -1063,7 +1073,67 @@
 	}
 	*errorp = 0;
 	ASSERT(MUTEX_HELD(&stp->sd_lock));
-	return (getq_noenab(q));
+	if (sodp != NULL && (sodp->sod_state & SOD_ENABLED) &&
+	    (sodp->sod_uioa.uioa_state & UIOA_INIT)) {
+		/*
+		 * First kstrgetmsg() call for an uioa_t so if any
+		 * queued mblk_t's need to consume them before uioa
+		 * from below can occur.
+		 */
+		sodp->sod_uioa.uioa_state &= UIOA_CLR;
+		sodp->sod_uioa.uioa_state |= UIOA_ENABLED;
+		if (q->q_first != NULL) {
+			struioainit(q, sodp, uiop);
+		}
+	}
+
+	bp = getq_noenab(q);
+
+	if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) {
+		/*
+		 * A uioa flaged mblk_t chain, already uio processed,
+		 * add it to the sodirect uioa pending free list.
+		 *
+		 * Note, a b_cont chain headed by a DBLK_UIOA enable
+		 * mblk_t must have all mblk_t(s) DBLK_UIOA enabled.
+		 */
+		mblk_t	*bpt = sodp->sod_uioaft;
+
+		ASSERT(sodp != NULL);
+
+		/*
+		 * Add first mblk_t of "bp" chain to current sodirect uioa
+		 * free list tail mblk_t, if any, else empty list so new head.
+		 */
+		if (bpt == NULL)
+			sodp->sod_uioafh = bp;
+		else
+			bpt->b_cont = bp;
+
+		/*
+		 * Walk mblk_t "bp" chain to find tail and adjust rptr of
+		 * each to reflect that uioamove() has consumed all data.
+		 */
+		bpt = bp;
+		for (;;) {
+			bpt->b_rptr = bpt->b_wptr;
+			if (bpt->b_cont == NULL)
+				break;
+			bpt = bpt->b_cont;
+
+			ASSERT(bpt->b_datap->db_flags & DBLK_UIOA);
+		}
+		/* New sodirect uioa free list tail */
+		sodp->sod_uioaft = bpt;
+
+		/* Only 1 strget() with data returned per uioa_t */
+		if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) {
+			sodp->sod_uioa.uioa_state &= UIOA_CLR;
+			sodp->sod_uioa.uioa_state |= UIOA_FINI;
+		}
+	}
+
+	return (bp);
 }
 
 /*
@@ -1083,6 +1153,8 @@
 	ASSERT(bp->b_wptr >= bp->b_rptr);
 
 	do {
+		ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
+
 		if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
 			ASSERT(n > 0);
 
@@ -1229,8 +1301,10 @@
 			}
 			first = 0;
 		}
+
 		ASSERT(MUTEX_HELD(&stp->sd_lock));
 		ASSERT(bp);
+		ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
 		pri = bp->b_band;
 		/*
 		 * Extract any mark information. If the message is not
@@ -6650,6 +6724,7 @@
 			bp = strget(stp, q, uiop, first, &error);
 			ASSERT(MUTEX_HELD(&stp->sd_lock));
 			if (bp != NULL) {
+				ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
 				if (bp->b_datap->db_type == M_SIG) {
 					strsignal_nolock(stp, *bp->b_rptr,
 					    (int32_t)bp->b_band);
@@ -7288,7 +7363,7 @@
 		    "kstrgetmsg calls strwaitq:%p, %p",
 		    vp, uiop);
 		if (((error = strwaitq(stp, waitflag, (ssize_t)0,
-		    fmode, timout, &done)) != 0) || done) {
+		    fmode, timout, &done))) != 0 || done) {
 			TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
 			    "kstrgetmsg error or done:%p, %p",
 			    vp, uiop);
@@ -7360,6 +7435,8 @@
 		 * there is indeed a shortage of memory.  dupmsg() may fail
 		 * if db_ref in any of the messages reaches its limit.
 		 */
+
+		ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
 		if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
 			/*
 			 * Restore the state of the stream head since we
@@ -7418,6 +7495,7 @@
 			}
 		}
 
+		ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA));
 		bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp,
 		    NULL, NULL, NULL, NULL);
 
@@ -7468,6 +7546,8 @@
 	 */
 	if (uiop == NULL) {
 		/* Append data to tail of mctlp */
+
+		ASSERT(bp == NULL || !(bp->b_datap->db_flags & DBLK_UIOA));
 		if (mctlp != NULL) {
 			mblk_t **mpp = mctlp;
 
@@ -7476,6 +7556,14 @@
 			*mpp = bp;
 			bp = NULL;
 		}
+	} else if (bp && (bp->b_datap->db_flags & DBLK_UIOA)) {
+		/*
+		 * A uioa mblk_t chain, as uio processing has already
+		 * been done we simple skip over processing.
+		 */
+		bp = NULL;
+		pr = 0;
+
 	} else if (uiop->uio_resid >= 0 && bp) {
 		size_t oldresid = uiop->uio_resid;
 
@@ -7564,6 +7652,8 @@
 			 * again since the flush logic in strrput_nondata()
 			 * may have cleared it while we had sd_lock dropped.
 			 */
+
+			ASSERT(!(savemp->b_datap->db_flags & DBLK_UIOA));
 			if (type >= QPCTL) {
 				ASSERT(type == M_PCPROTO);
 				if (queclass(savemp) < QPCTL)
@@ -8635,3 +8725,85 @@
 		}
 	return (B_FALSE);
 }
+
+/*
+ * Called on the first strget() of a sodirect/uioa enabled streamhead,
+ * if any mblk_t(s) enqueued they must first be uioamove()d before uioa
+ * can be enabled for the underlying transport's use.
+ */
+void
+struioainit(queue_t *q, sodirect_t *sodp, uio_t *uiop)
+{
+	uioa_t	*uioap = (uioa_t *)uiop;
+	mblk_t	*bp = q->q_first;
+	mblk_t	*lbp = NULL;
+	mblk_t	*nbp, *wbp;
+	int	len;
+	int	error;
+
+	ASSERT(MUTEX_HELD(sodp->sod_lock));
+	ASSERT(&sodp->sod_uioa == uioap);
+
+	/*
+	 * Walk the b_next/b_prev doubly linked list of b_cont chain(s)
+	 * and schedule any M_DATA mblk_t's for uio asynchronous move.
+	 */
+	do {
+		/* Next mblk_t chain */
+		nbp = bp->b_next;
+		/* Walk the chain */
+		wbp = bp;
+		do {
+			if (wbp->b_datap->db_type != M_DATA) {
+				/* Not M_DATA, no more uioa */
+				goto nouioa;
+			}
+			if ((len = wbp->b_wptr - wbp->b_rptr) > 0) {
+				/* Have a M_DATA mblk_t with data */
+				if (len > uioap->uio_resid) {
+					/* Not enough uio sapce */
+					goto nouioa;
+				}
+				error = uioamove(wbp->b_rptr, len,
+				    UIO_READ, uioap);
+				if (!error) {
+					/* Scheduled, mark dblk_t as such */
+					wbp->b_datap->db_flags |= DBLK_UIOA;
+				} else {
+					/* Error of some sort, no more uioa */
+					uioap->uioa_state &= UIOA_CLR;
+					uioap->uioa_state |= UIOA_FINI;
+					return;
+				}
+			}
+			/* Save last wbp processed */
+			lbp = wbp;
+		} while ((wbp = wbp->b_cont) != NULL);
+	} while ((bp = nbp) != NULL);
+
+	return;
+
+nouioa:
+	/* No more uioa */
+	uioap->uioa_state &= UIOA_CLR;
+	uioap->uioa_state |= UIOA_FINI;
+
+	/*
+	 * If we processed 1 or more mblk_t(s) then we need to split the
+	 * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s)
+	 * are in the current chain and the rest are in the following new
+	 * chain.
+	 */
+	if (lbp != NULL) {
+		/* New end of current chain */
+		lbp->b_cont = NULL;
+
+		/* Insert new chain wbp after bp */
+		if ((wbp->b_next = nbp) != NULL)
+			nbp->b_prev = wbp;
+		else
+			q->q_last = wbp;
+		wbp->b_prev = bp;
+		bp->b_next = wbp;
+	}
+}
--- a/usr/src/uts/common/os/strsubr.c	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/os/strsubr.c	Fri May 23 20:14:10 2008 -0700
@@ -2467,13 +2467,18 @@
 	/*
 	 * Private flag used by a transport module to indicate
 	 * to sockfs that it supports direct-access mode without
-	 * having to go through STREAMS.
-	 */
-	if (devflag & _D_DIRECT) {
+	 * having to go through STREAMS or the transport can use
+	 * sodirect_t sharing to bypass STREAMS for receive-side
+	 * M_DATA processing.
+	 */
+	if (devflag & (_D_DIRECT|_D_SODIRECT)) {
 		/* Reject unless the module is fully-MT (no perimeter) */
 		if ((qflag & QMT_TYPEMASK) != QMTSAFE)
 			goto bad;
-		qflag |= _QDIRECT;
+		if (devflag & _D_DIRECT)
+			qflag |= _QDIRECT;
+		if (devflag & _D_SODIRECT)
+			qflag |= _QSODIRECT;
 	}
 
 	*qflagp = qflag;
--- a/usr/src/uts/common/sys/Makefile	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/sys/Makefile	Fri May 23 20:14:10 2008 -0700
@@ -487,6 +487,7 @@
 	socket_impl.h		\
 	socketvar.h		\
 	sockio.h		\
+	sodirect.h		\
 	squeue.h		\
 	squeue_impl.h		\
 	srn.h			\
--- a/usr/src/uts/common/sys/conf.h	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/sys/conf.h	Fri May 23 20:14:10 2008 -0700
@@ -22,7 +22,7 @@
 /*	  All Rights Reserved  	*/
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -221,6 +221,9 @@
 
 #define	D_OPEN_RETURNS_EINTR	0x100000 /* EINTR expected from open(9E) */
 
+#define	_D_SODIRECT	0x200000 /* Private flag for transport modules used */
+				/* to enable _QSODIRECT for a STREAMS Q */
+
 #endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
 
 #ifdef	__cplusplus
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/dcopy.h	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,239 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_DCOPY_H
+#define	_SYS_DCOPY_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+/*
+ * *** This interface is for private use by the IP stack only ***
+ */
+
+/* Private dcopy/uioa interface for dcopy to enable/disable dcopy KAPI */
+extern void uioa_dcopy_enable();
+extern void uioa_dcopy_disable();
+
+/* Function return status */
+#define	DCOPY_FAILURE		(-1)
+#define	DCOPY_SUCCESS		(0)
+#define	DCOPY_NORESOURCES	(1) /* _alloc & _cmd_alloc, _cmd_post only */
+#define	DCOPY_PENDING		(0x10) /* dcopy_poll(), dcopy_unregister() */
+#define	DCOPY_COMPLETED		(0x20) /* dcopy_poll() only */
+
+
+/* dq_version */
+#define	DCOPY_QUERY_V0	0
+
+typedef struct dcopy_query_s {
+	int		dq_version; /* DCOPY_QUERY_V0 */
+	uint_t		dq_num_channels; /* number of dma channels */
+} dcopy_query_t;
+
+/*
+ * dcopy_query()
+ *   query for the number of DMA engines usable in the system.
+ */
+void dcopy_query(dcopy_query_t *query);
+
+
+typedef struct dcopy_channel_s *dcopy_handle_t;
+
+/* dcopy_alloc() and dcopy_cmd_alloc() common flags */
+#define	DCOPY_SLEEP	(0)
+#define	DCOPY_NOSLEEP	(1 << 0)
+
+/*
+ * dcopy_alloc()
+ *   Allocate a DMA channel which is used for posting DMA requests. Note: this
+ *   does not give the caller exclusive access to the DMA engine. Commands
+ *   posted to a channel will complete in order.
+ *     flags - (DCOPY_SLEEP, DCOPY_NOSLEEP)
+ *     returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES
+ */
+int dcopy_alloc(int flags, dcopy_handle_t *handle);
+
+/*
+ * dcopy_free()
+ *   Free the DMA channel. The client can no longer use the handle to post or
+ *   poll for status on posts which were previously done on this channel.
+ */
+void dcopy_free(dcopy_handle_t *handle);
+
+/* dq_version */
+#define	DCOPY_QUERY_CHANNEL_V0	0
+
+/* Per DMA channel info */
+typedef struct dcopy_query_channel_s {
+	int		qc_version; /* DCOPY_QUERY_CHANNEL_V0 */
+
+	/* Does DMA channel support DCA */
+	boolean_t	qc_dca_supported;
+
+	/* device id and device specific capabilities */
+	uint64_t	qc_id;
+	uint64_t	qc_capabilities;
+
+	/*
+	 * DMA channel size. This may not be the same as the number of posts
+	 * that the DMA channel can handle since a post may consume 1 or more
+	 * entries.
+	 */
+	uint64_t	qc_channel_size;
+
+	/* DMA channel number within the device. Not unique across devices */
+	uint64_t	qc_chan_num;
+} dcopy_query_channel_t;
+
+/*
+ * dcopy_query_channel()
+ *   query DMA engines capabilities
+ */
+void dcopy_query_channel(dcopy_handle_t handle, dcopy_query_channel_t *query);
+
+
+/* dp_version */
+#define	DCOPY_CMD_V0	0
+
+/* dp_cmd */
+#define	DCOPY_CMD_COPY	0x1
+
+/* dp_flags */
+/*
+ * DCOPY_CMD_QUEUE
+ *    Hint to queue up the post but don't notify the DMA engine. This can be
+ *    used as an optimization when multiple posts are going to be queued up and
+ *    you only want notify the DMA engine after the last post. Note, this does
+ *    not mean the DMA engine won't process the request since it could notice
+ *    it anyway.
+ * DCOPY_CMD_NOSTAT
+ *    Don't generate a status. If this flag is used, You cannot poll for
+ *    completion status on this command. This can be a useful performance
+ *    optimization if your posting multiple commands and just want to poll on
+ *    the last command.
+ * DCOPY_CMD_DCA
+ *    If DCA is supported, direct this and all future command data (until the
+ *    next command with DCOPY_POST_DCA set) to the processor specified in
+ *    dp_dca_id. This flag is ignored if DCA is not supported.
+ * DCOPY_CMD_INTR
+ *    Generate an interrupt when command completes. This flag is required if
+ *    the caller is going to call dcopy_cmd_poll(() with DCOPY_POLL_BLOCK set
+ *    for this command.
+ */
+#define	DCOPY_CMD_NOFLAGS	(0)
+#define	DCOPY_CMD_QUEUE		(1 << 0)
+#define	DCOPY_CMD_NOSTAT	(1 << 1)
+#define	DCOPY_CMD_DCA		(1 << 2)
+#define	DCOPY_CMD_INTR		(1 << 3)
+
+typedef struct dcopy_cmd_copy_s {
+	uint64_t	cc_source; /* Source physical address */
+	uint64_t	cc_dest; /* Destination physical address */
+	size_t		cc_size;
+} dcopy_cmd_copy_t;
+
+typedef union dcopy_cmd_u {
+	dcopy_cmd_copy_t	copy;
+} dcopy_cmd_u_t;
+
+typedef struct dcopy_cmd_priv_s *dcopy_cmd_priv_t;
+
+struct dcopy_cmd_s {
+	uint_t			dp_version; /* DCOPY_CMD_V0 */
+	uint_t			dp_flags;
+	uint64_t		dp_cmd;
+	dcopy_cmd_u_t   	dp;
+	uint32_t		dp_dca_id;
+	dcopy_cmd_priv_t	dp_private;
+};
+typedef struct dcopy_cmd_s *dcopy_cmd_t;
+
+
+/*
+ * dcopy_cmd_alloc() specific flags
+ *   DCOPY_ALLOC_LINK - when set, the caller passes in a previously alloced
+ *     command in cmd. dcopy_cmd_alloc() will allocate a new command and
+ *     link it to the old command. The caller can use this to build a
+ *     chain of commands, keeping only the last cmd alloced. calling
+ *     dcopy_cmd_free() with the last cmd alloced in the chain will free all of
+ *     the commands in the chain. dcopy_cmd_post() and dcopy_cmd_poll() have
+ *     no knowledge of a chain of commands.  It's only used for alloc/free.
+ */
+#define	DCOPY_ALLOC_LINK	(1 << 16)
+
+/*
+ * dcopy_cmd_alloc()
+ *   allocate a command. A command can be re-used after it completes.
+ *     flags - (DCOPY_SLEEP || DCOPY_NOSLEEP), DCOPY_ALLOC_LINK
+ *     returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES
+ */
+int dcopy_cmd_alloc(dcopy_handle_t handle, int flags, dcopy_cmd_t *cmd);
+
+/*
+ * dcopy_cmd_free()
+ *   free the command. This call cannot be called after dcopy_free().
+ */
+void dcopy_cmd_free(dcopy_cmd_t *cmd);
+
+/*
+ * dcopy_cmd_post()
+ *   post a command (allocated from dcopy_cmd_alloc()) to the DMA channel
+ *     returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES
+ */
+int dcopy_cmd_post(dcopy_cmd_t cmd);
+
+/* dcopy_cmd_poll() flags */
+#define	DCOPY_POLL_NOFLAGS	(0)
+#define	DCOPY_POLL_BLOCK	(1 << 0)
+
+/*
+ * dcopy_cmd_poll()
+ *   poll on completion status of a previous post. This call cannot be called
+ *   after dcopy_free().
+ *
+ *   if flags == DCOPY_POLL_NOFLAGS, return status can be DCOPY_FAILURE,
+ *   DCOPY_PENDING, or DCOPY_COMPLETED.
+ *
+ *   if flags & DCOPY_POLL_BLOCK, return status can be DCOPY_FAILURE or
+ *   DCOPY_COMPLETED. DCOPY_POLL_BLOCK can only be set in base context.
+ *
+ *   The command cannot be re-used or freed until the command has completed
+ *   (e.g. DCOPY_FAILURE or DCOPY_COMPLETED).
+ */
+int dcopy_cmd_poll(dcopy_cmd_t cmd, int flags);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DCOPY_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/dcopy_device.h	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,154 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_DCOPY_DEVICE_H
+#define	_SYS_DCOPY_DEVICE_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/dcopy.h>
+
+/*
+ * private command state. Space for this structure should be allocated during
+ * (*cb_cmd_alloc). The DMA driver must set dp_private in dcopy_cmd_t to point
+ * to the memory it allocated. Other than pr_device_cmd_private, the DMA driver
+ * should not touch any of the fields in this structure. pr_device_cmd_private
+ * is a private pointer for the DMA engine to use.
+ */
+struct dcopy_cmd_priv_s {
+	/*
+	 * we only init the state used to track a command which blocks when it
+	 * actually blocks. pr_block_init tells us when we need to clean it
+	 * up during a cmd_free.
+	 */
+	boolean_t		pr_block_init;
+
+	/* dcopy_poll blocking state */
+	list_node_t		pr_poll_list_node;
+	volatile boolean_t	pr_wait;
+	kmutex_t		pr_mutex;
+	kcondvar_t		pr_cv;
+
+	/* back pointer to the command */
+	dcopy_cmd_t		pr_cmd;
+
+	/* shortcut to the channel we're on */
+	struct dcopy_channel_s	*pr_channel;
+
+	/* DMA driver private pointer */
+	void			*pr_device_cmd_private;
+};
+
+/* cb_version */
+#define	DCOPY_DEVICECB_V0	0
+
+typedef struct dcopy_device_chaninfo_s {
+	uint_t	di_chan_num;
+} dcopy_device_chaninfo_t;
+
+typedef struct dcopy_device_cb_s {
+	int	cb_version;
+	int	cb_res1;
+
+	/* allocate/free a DMA channel. See dcopy.h for return status  */
+	int	(*cb_channel_alloc)(void *device_private,
+		    dcopy_handle_t handle, int flags, uint_t size,
+		    dcopy_query_channel_t *info, void *channel_private);
+	void	(*cb_channel_free)(void *channel_private);
+
+	/* allocate/free a command. See dcopy.h for return status  */
+	int	(*cb_cmd_alloc)(void *channel_private, int flags,
+		    dcopy_cmd_t *cmd);
+	void	(*cb_cmd_free)(void *channel_private, dcopy_cmd_t *cmd);
+
+	/*
+	 * post a command/poll for command status. See dcopy.h for return
+	 * status
+	 */
+	int	(*cb_cmd_post)(void *channel_private, dcopy_cmd_t cmd);
+	int	(*cb_cmd_poll)(void *channel_private, dcopy_cmd_t cmd);
+
+	/*
+	 * if dcopy_device_unregister() returns DCOPY_PENDING, dcopy will
+	 * call this routine when all the channels are no longer being
+	 * used and have been free'd up. e.g. it's safe for the DMA driver
+	 * to detach.
+	 *   status = DCOPY_SUCCESS || DCOPY_FAILURE
+	 */
+	void	(*cb_unregister_complete)(void *device_private, int status);
+} dcopy_device_cb_t;
+
+
+typedef struct dcopy_device_info_s {
+	dev_info_t		*di_dip;
+	dcopy_device_cb_t	*di_cb; /* must be a static array */
+	uint_t			di_num_dma;
+	uint_t			di_maxxfer;
+	uint_t			di_capabilities;
+	uint64_t		di_id;
+} dcopy_device_info_t;
+
+typedef struct dcopy_device_s *dcopy_device_handle_t;
+
+/* dcopy_device_notify() status */
+#define	DCOPY_COMPLETION	0
+
+/*
+ * dcopy_device_register()
+ *   register the DMA device with dcopy.
+ *    return status => DCOPY_FAILURE, DCOPY_SUCCESS
+ */
+int dcopy_device_register(void *device_private, dcopy_device_info_t *info,
+    dcopy_device_handle_t *handle);
+
+/*
+ * dcopy_device_unregister()
+ *   try to unregister the DMA device with dcopy. If the DMA engines are
+ *   still being used by upper layer modules, DCOPY_PENDING will be returned.
+ *    return status => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_PENDING
+ *      if DCOPY_PENDING, (*cb_unregister_complete)() will be called when
+ *      completed.
+ */
+int dcopy_device_unregister(dcopy_device_handle_t *handle);
+
+/*
+ * dcopy_device_channel_notify()
+ *   Notify dcopy of an event.
+ *     dcopy_handle_t handle => what was passed into (*cb_alloc)()
+ *     status => DCOPY_COMPLETION
+ */
+void dcopy_device_channel_notify(dcopy_handle_t handle, int status);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DCOPY_DEVICE_H */
--- a/usr/src/uts/common/sys/socketvar.h	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/sys/socketvar.h	Fri May 23 20:14:10 2008 -0700
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -50,14 +50,13 @@
 #include <sys/file.h>
 #include <sys/param.h>
 #include <sys/zone.h>
+#include <sys/sodirect.h>
 #include <inet/kssl/ksslapi.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
-
-
 /*
  * Internal representation used for addresses.
  */
@@ -333,6 +332,9 @@
 	kssl_endpt_type_t	so_kssl_type;	/* is proxy/is proxied/none */
 	kssl_ent_t		so_kssl_ent;	/* SSL config entry */
 	kssl_ctx_t		so_kssl_ctx;	/* SSL session context */
+
+	/* != NULL for sodirect_t enabled socket */
+	sodirect_t	*so_direct;
 };
 
 /* flags */
@@ -375,6 +377,7 @@
 #define	SS_MOREDATA		0x00100000 /* NCAfs: NCA has more data */
 
 #define	SS_DIRECT		0x00200000 /* transport is directly below */
+#define	SS_SODIRECT		0x00400000 /* transport supports sodirect */
 
 #define	SS_LADDR_VALID		0x01000000	/* so_laddr valid for user */
 #define	SS_FADDR_VALID		0x02000000	/* so_faddr valid for user */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/common/sys/sodirect.h	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,101 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
+/*	  All Rights Reserved  	*/
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+#ifndef _SYS_SODIRECT_H
+#define	_SYS_SODIRECT_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * Sodirect ...
+ *
+ * Currently the sodirect_t uses the sockfs streamhead STREAMS Q directly,
+ * in the future when we have STREAMless sockets a sonode Q will have to
+ * be implemented however the sodirect KPI shouldn't need to change.
+ */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct sodirect_s {
+	uint32_t	sod_state;	/* State bits */
+	uint32_t	sod_want;	/* Pending read byte count or 0 */
+	queue_t		*sod_q;		/* Socket Q */
+	int		(*sod_enqueue)(); /* Call to enqueue an mblk_t */
+	void		(*sod_wakeup)(); /* Call to awkake a read()er, if any */
+	mblk_t		*sod_uioafh;	/* To be freed list head, or NULL */
+	mblk_t		*sod_uioaft;	/* To be freed list tail */
+	kmutex_t	*sod_lock;	/* Lock needed to protect all members */
+	uioa_t		sod_uioa;	/* Pending uio_t for uioa_t use */
+} sodirect_t;
+
+/*
+ * sod_state bits:
+ */
+
+#define	SOD_DISABLED	0		/* No more sodirect */
+
+#define	SOD_ENABLED	0x0001		/* sodirect_t enabled */
+
+#define	SOD_WAKE_NOT	0x0010		/* Wakeup not needed */
+#define	SOD_WAKE_NEED   0x0020		/* Wakeup needed */
+#define	SOD_WAKE_DONE	0x0040		/* Wakeup done */
+#define	SOD_WAKE_CLR	~(SOD_WAKE_NOT|SOD_WAKE_NEED|SOD_WAKE_DONE)
+
+/*
+ * Usefull macros:
+ */
+
+#define	SOD_QSETBE(p) ((p)->sod_q->q_flag |= QWANTW)
+#define	SOD_QCLRBE(p) ((p)->sod_q->q_flag &= ~QWANTW)
+#define	SOD_QEMPTY(p) ((p)->sod_q->q_first == NULL)
+#define	SOD_QFULL(p) ((p)->sod_q->q_flag & QFULL)
+#define	SOD_QCNT(p) ((p)->sod_q->q_count)
+
+#define	SOD_DISABLE(p) (p)->sod_state &= ~SOD_ENABLED
+
+#define	SOD_QTOSODP(q) (q)->q_stream->sd_sodirect
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_SODIRECT_H */
--- a/usr/src/uts/common/sys/stream.h	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/sys/stream.h	Fri May 23 20:14:10 2008 -0700
@@ -190,6 +190,8 @@
 #define	_QASSOCIATED	0x10000000	/* queue is associated with a device */
 #define	_QDIRECT	0x20000000	/* Private; transport module uses */
 					/* direct interface to/from sockfs */
+#define	_QSODIRECT	0x40000000	/* Private, transport module shares */
+					/* an sodirect_t with sockfs */
 
 /* queue sqflags (protected by SQLOCK). */
 #define	Q_SQQUEUED	0x01		/* Queue is in the syncq list */
@@ -400,6 +402,7 @@
  */
 #define	DBLK_REFMIN		0x01	/* min refcnt stored in low bit */
 #define	DBLK_COOKED		0x02	/* message has been processed once */
+#define	DBLK_UIOA		0x04	/* uioamove() is pending */
 
 /*
  * db_struioflag values:
--- a/usr/src/uts/common/sys/strsubr.h	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/sys/strsubr.h	Fri May 23 20:14:10 2008 -0700
@@ -46,6 +46,7 @@
 #include <sys/proc.h>
 #include <sys/netstack.h>
 #include <sys/modhash.h>
+#include <sys/sodirect.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -94,9 +95,8 @@
  *	sd_mark
  *	sd_closetime
  *	sd_wakeq
- *	sd_uiordq
- *	sd_uiowrq
  *	sd_maxblk
+ *	sd_sodirect
  *
  * The following fields are modified only by the allocator, which
  * has exclusive access to them at that time:
@@ -245,6 +245,10 @@
 	uint_t		sd_copyflag;	/* copy-related flags */
 	zoneid_t	sd_anchorzone;	/* Allow removal from same zone only */
 	struct msgb	*sd_cmdblk;	/* reply from _I_CMD */
+	/*
+	 * Support for socket direct.
+	 */
+	sodirect_t	*sd_sodirect;	/* pointer to shared sodirect_t */
 } stdata_t;
 
 /*
--- a/usr/src/uts/common/sys/uio.h	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/common/sys/uio.h	Fri May 23 20:14:10 2008 -0700
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -101,6 +100,49 @@
 	ssize_t		uio_resid;	/* residual count */
 } uio_t;
 
+/*
+ * Extended uio_t uioa_t used for asynchronous uio.
+ *
+ * Note: UIOA_IOV_MAX is defined and used as it is in "fs/vncalls.c"
+ *	 as there isn't a formal definition of IOV_MAX for the kernel.
+ */
+#define	UIOA_IOV_MAX	16
+
+typedef struct uioa_page_s {		/* locked uio_iov state */
+	int	uioa_pfncnt;		/* count of pfn_t(s) in *uioa_ppp */
+	void	**uioa_ppp;		/* page_t or pfn_t arrary */
+	caddr_t	uioa_base;		/* address base */
+	size_t	uioa_len;		/* span length */
+} uioa_page_t;
+
+typedef struct uioa_s {
+	iovec_t		*uio_iov;	/* pointer to array of iovecs */
+	int		uio_iovcnt;	/* number of iovecs */
+	lloff_t		_uio_offset;	/* file offset */
+	uio_seg_t	uio_segflg;	/* address space (kernel or user) */
+	uint16_t	uio_fmode;	/* file mode flags */
+	uint16_t	uio_extflg;	/* extended flags */
+	lloff_t		_uio_limit;	/* u-limit (maximum byte offset) */
+	ssize_t		uio_resid;	/* residual count */
+	/*
+	 * uioa extended members.
+	 */
+	uint32_t	uioa_state;	/* state of asynch i/o */
+	uioa_page_t	*uioa_lcur;	/* pointer into uioa_locked[] */
+	void		**uioa_lppp;	/* pointer into lcur->uioa_ppp[] */
+	void		*uioa_hwst[4];	/* opaque hardware state */
+	uioa_page_t	uioa_locked[UIOA_IOV_MAX]; /* Per iov locked pages */
+} uioa_t;
+
+#define	UIOA_ALLOC	0x0001		/* allocated but not yet initialized */
+#define	UIOA_INIT	0x0002		/* initialized but not yet enabled */
+#define	UIOA_ENABLED	0x0004		/* enabled, asynch i/o active */
+#define	UIOA_FINI	0x0008		/* finished waiting for uioafini() */
+
+#define	UIOA_CLR	(~0x000F)	/* clear mutually exclusive bits */
+
+#define	UIOA_POLL	0x0010		/* need dcopy_poll() */
+
 #define	uio_loffset	_uio_offset._f
 #if !defined(_LP64)
 #define	uio_offset	_uio_offset._p._l
@@ -127,10 +169,24 @@
  * access, ie, access bypassing caches, should be used.  Filesystems that
  * don't initialize this field could experience suboptimal performance due to
  * the random data the field contains.
+ *
+ * NOTE: This flag is also used by uioasync callers to pass an extended
+ * uio_t (uioa_t), to uioasync enabled consumers. Unlike above all
+ * consumers of a uioa_t require the uio_extflg to be initialized.
  */
 #define	UIO_COPY_DEFAULT	0x0000	/* no special options to copy */
 #define	UIO_COPY_CACHED		0x0001	/* copy should not bypass caches */
 
+#define	UIO_ASYNC		0x0002	/* uio_t is really a uioa_t */
+
+/*
+ * Global uioasync capability shadow state.
+ */
+typedef struct uioasync_s {
+	boolean_t	enabled;	/* Is uioasync enabled? */
+	size_t		mincnt;		/* Minimum byte count for use of */
+} uioasync_t;
+
 #endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */
 
 #if	defined(_KERNEL)
@@ -141,6 +197,11 @@
 void	uioskip(uio_t *, size_t);
 int	uiodup(uio_t *, uio_t *, iovec_t *, int);
 
+int	uioamove(void *, size_t, enum uio_rw, uioa_t *);
+int	uioainit(uio_t *, uioa_t *);
+int	uioafini(uio_t *, uioa_t *);
+extern	uioasync_t uioasync;
+
 #else	/* defined(_KERNEL) */
 
 #if 	defined(__STDC__)
--- a/usr/src/uts/i86pc/Makefile.files	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/i86pc/Makefile.files	Fri May 23 20:14:10 2008 -0700
@@ -161,6 +161,7 @@
 #
 GFX_PRIVATE_OBJS	+= gfx_private.o gfxp_pci.o gfxp_segmap.o \
 			   gfxp_devmap.o gfxp_vgatext.o gfxp_vm.o vgasubr.o
+IOAT_OBJS += ioat.o ioat_rs.o ioat_ioctl.o ioat_chan.o
 ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o
 PCI_E_MISC_OBJS += pcie.o pcie_fault.o
 PCI_E_NEXUS_OBJS += npe.o npe_misc.o
--- a/usr/src/uts/i86pc/Makefile.i86pc.shared	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/i86pc/Makefile.i86pc.shared	Fri May 23 20:14:10 2008 -0700
@@ -257,6 +257,7 @@
 DRV_KMODS	+= mc-amd
 DRV_KMODS	+= tzmon
 DRV_KMODS	+= acpi_drv 
+DRV_KMODS	+= ioat
 
 DRV_KMODS	+= cpudrv
 
--- a/usr/src/uts/i86pc/Makefile.rules	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/i86pc/Makefile.rules	Fri May 23 20:14:10 2008 -0700
@@ -73,6 +73,10 @@
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
 
+$(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/ioat/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 $(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/mc/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
@@ -259,6 +263,9 @@
 $(LINTS_DIR)/%.ln:              $(UTSBASE)/i86pc/io/acpi_drv/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
+$(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/ioat/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/mc/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/i86pc/io/ioat/ioat.c	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,665 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/file.h>
+#include <sys/open.h>
+#include <sys/modctl.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/sysmacros.h>
+
+#include <sys/ioat.h>
+
+static int ioat_open(dev_t *devp, int flag, int otyp, cred_t *cred);
+static int ioat_close(dev_t devp, int flag, int otyp, cred_t *cred);
+static int ioat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
+static int ioat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
+static int ioat_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
+    void **result);
+
+static 	struct cb_ops ioat_cb_ops = {
+	ioat_open,		/* cb_open */
+	ioat_close,		/* cb_close */
+	nodev,			/* cb_strategy */
+	nodev,			/* cb_print */
+	nodev,			/* cb_dump */
+	nodev,			/* cb_read */
+	nodev,			/* cb_write */
+	ioat_ioctl,		/* cb_ioctl */
+	nodev,			/* cb_devmap */
+	nodev,			/* cb_mmap */
+	nodev,			/* cb_segmap */
+	nochpoll,		/* cb_chpoll */
+	ddi_prop_op,		/* cb_prop_op */
+	NULL,			/* cb_stream */
+	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
+	CB_REV
+};
+
+static struct dev_ops ioat_dev_ops = {
+	DEVO_REV,		/* devo_rev */
+	0,			/* devo_refcnt */
+	ioat_getinfo,		/* devo_getinfo */
+	nulldev,		/* devo_identify */
+	nulldev,		/* devo_probe */
+	ioat_attach,		/* devo_attach */
+	ioat_detach,		/* devo_detach */
+	nodev,			/* devo_reset */
+	&ioat_cb_ops,		/* devo_cb_ops */
+	NULL,			/* devo_bus_ops */
+	NULL			/* power */
+};
+
+static struct modldrv ioat_modldrv = {
+	&mod_driverops,		/* Type of module.  This one is a driver */
+	"ioat driver v%I%",	/* Name of the module. */
+	&ioat_dev_ops,		/* driver ops */
+};
+
+static struct modlinkage ioat_modlinkage = {
+	MODREV_1,
+	(void *) &ioat_modldrv,
+	NULL
+};
+
+
+void *ioat_statep;
+
+static int ioat_chip_init(ioat_state_t *state);
+static void ioat_chip_fini(ioat_state_t *state);
+static int ioat_drv_init(ioat_state_t *state);
+static void ioat_drv_fini(ioat_state_t *state);
+static uint_t ioat_isr(caddr_t parm);
+static void ioat_intr_enable(ioat_state_t *state);
+static void ioat_intr_disable(ioat_state_t *state);
+void ioat_detach_finish(ioat_state_t *state);
+
+
+ddi_device_acc_attr_t ioat_acc_attr = {
+	DDI_DEVICE_ATTR_V0,		/* devacc_attr_version */
+	DDI_NEVERSWAP_ACC,		/* devacc_attr_endian_flags */
+	DDI_STORECACHING_OK_ACC,	/* devacc_attr_dataorder */
+	DDI_DEFAULT_ACC			/* devacc_attr_access */
+};
+
+/* dcopy callback interface */
+dcopy_device_cb_t ioat_cb = {
+	DCOPY_DEVICECB_V0,
+	0,		/* reserved */
+	ioat_channel_alloc,
+	ioat_channel_free,
+	ioat_cmd_alloc,
+	ioat_cmd_free,
+	ioat_cmd_post,
+	ioat_cmd_poll,
+	ioat_unregister_complete
+};
+
+/*
+ * _init()
+ */
+int
+_init(void)
+{
+	int e;
+
+	e = ddi_soft_state_init(&ioat_statep, sizeof (ioat_state_t), 1);
+	if (e != 0) {
+		return (e);
+	}
+
+	e = mod_install(&ioat_modlinkage);
+	if (e != 0) {
+		ddi_soft_state_fini(&ioat_statep);
+		return (e);
+	}
+
+	return (0);
+}
+
+/*
+ * _info()
+ */
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&ioat_modlinkage, modinfop));
+}
+
+/*
+ * _fini()
+ */
+int
+_fini(void)
+{
+	int e;
+
+	e = mod_remove(&ioat_modlinkage);
+	if (e != 0) {
+		return (e);
+	}
+
+	ddi_soft_state_fini(&ioat_statep);
+
+	return (0);
+}
+
+/*
+ * ioat_attach()
+ */
+static int
+ioat_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	ioat_state_t *state;
+	int instance;
+	int e;
+
+
+	switch (cmd) {
+	case DDI_ATTACH:
+		break;
+
+	case DDI_RESUME:
+		instance = ddi_get_instance(dip);
+		state = ddi_get_soft_state(ioat_statep, instance);
+		if (state == NULL) {
+			return (DDI_FAILURE);
+		}
+		e = ioat_channel_resume(state);
+		if (e != DDI_SUCCESS) {
+			return (DDI_FAILURE);
+		}
+		ioat_intr_enable(state);
+		return (DDI_SUCCESS);
+
+	default:
+		return (DDI_FAILURE);
+	}
+
+	instance = ddi_get_instance(dip);
+	e = ddi_soft_state_zalloc(ioat_statep, instance);
+	if (e != DDI_SUCCESS) {
+		return (DDI_FAILURE);
+	}
+	state = ddi_get_soft_state(ioat_statep, instance);
+	if (state == NULL) {
+		goto attachfail_get_soft_state;
+	}
+
+	state->is_dip = dip;
+	state->is_instance = instance;
+
+	/* setup the registers, save away some device info */
+	e = ioat_chip_init(state);
+	if (e != DDI_SUCCESS) {
+		goto attachfail_chip_init;
+	}
+
+	/* initialize driver state, must be after chip init */
+	e = ioat_drv_init(state);
+	if (e != DDI_SUCCESS) {
+		goto attachfail_drv_init;
+	}
+
+	/* create the minor node (for the ioctl) */
+	e = ddi_create_minor_node(dip, "ioat", S_IFCHR, instance, DDI_PSEUDO,
+	    0);
+	if (e != DDI_SUCCESS) {
+		goto attachfail_minor_node;
+	}
+
+	/* Enable device interrupts */
+	ioat_intr_enable(state);
+
+	/* Report that driver was loaded */
+	ddi_report_dev(dip);
+
+	/* register with dcopy */
+	e = dcopy_device_register(state, &state->is_deviceinfo,
+	    &state->is_device_handle);
+	if (e != DCOPY_SUCCESS) {
+		goto attachfail_register;
+	}
+
+	return (DDI_SUCCESS);
+
+attachfail_register:
+	ioat_intr_disable(state);
+	ddi_remove_minor_node(dip, NULL);
+attachfail_minor_node:
+	ioat_drv_fini(state);
+attachfail_drv_init:
+	ioat_chip_fini(state);
+attachfail_chip_init:
+attachfail_get_soft_state:
+	(void) ddi_soft_state_free(ioat_statep, instance);
+
+	return (DDI_FAILURE);
+}
+
+/*
+ * ioat_detach()
+ */
+static int
+ioat_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	ioat_state_t *state;
+	int instance;
+	int e;
+
+
+	instance = ddi_get_instance(dip);
+	state = ddi_get_soft_state(ioat_statep, instance);
+	if (state == NULL) {
+		return (DDI_FAILURE);
+	}
+
+	switch (cmd) {
+	case DDI_DETACH:
+		break;
+
+	case DDI_SUSPEND:
+		ioat_channel_suspend(state);
+		return (DDI_SUCCESS);
+
+	default:
+		return (DDI_FAILURE);
+	}
+
+	/*
+	 * try to unregister from dcopy.  Since this driver doesn't follow the
+	 * traditional parent/child model, we may still be in use so we can't
+	 * detach yet.
+	 */
+	e = dcopy_device_unregister(&state->is_device_handle);
+	if (e != DCOPY_SUCCESS) {
+		if (e == DCOPY_PENDING) {
+			cmn_err(CE_NOTE, "device busy, performing asynchronous"
+			    " detach\n");
+		}
+		return (DDI_FAILURE);
+	}
+
+	ioat_detach_finish(state);
+
+	return (DDI_SUCCESS);
+}
+
+/*
+ * ioat_getinfo()
+ */
+/*ARGSUSED*/
+static int
+ioat_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+	ioat_state_t *state;
+	int instance;
+	dev_t dev;
+	int e;
+
+
+	dev = (dev_t)arg;
+	instance = getminor(dev);
+
+	switch (cmd) {
+	case DDI_INFO_DEVT2DEVINFO:
+		state = ddi_get_soft_state(ioat_statep, instance);
+		if (state == NULL) {
+			return (DDI_FAILURE);
+		}
+		*result = (void *)state->is_dip;
+		e = DDI_SUCCESS;
+		break;
+
+	case DDI_INFO_DEVT2INSTANCE:
+		*result = (void *)(uintptr_t)instance;
+		e = DDI_SUCCESS;
+		break;
+
+	default:
+		e = DDI_FAILURE;
+		break;
+	}
+
+	return (e);
+}
+
+
+/*
+ * ioat_open()
+ */
+/*ARGSUSED*/
+static int
+ioat_open(dev_t *devp, int flag, int otyp, cred_t *cred)
+{
+	ioat_state_t *state;
+	int instance;
+
+	instance = getminor(*devp);
+	state = ddi_get_soft_state(ioat_statep, instance);
+	if (state == NULL) {
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+
+/*
+ * ioat_close()
+ */
+/*ARGSUSED*/
+static int
+ioat_close(dev_t devp, int flag, int otyp, cred_t *cred)
+{
+	return (0);
+}
+
+
+/*
+ * ioat_chip_init()
+ */
+static int
+ioat_chip_init(ioat_state_t *state)
+{
+	ddi_device_acc_attr_t attr;
+	int e;
+
+
+	attr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
+	attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
+	attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
+
+	e =  ddi_regs_map_setup(state->is_dip, 1, (caddr_t *)&state->is_genregs,
+	    0, 0, &attr, &state->is_reg_handle);
+	if (e != DDI_SUCCESS) {
+		goto chipinitfail_regsmap;
+	}
+
+	/* save away ioat chip info */
+	state->is_num_channels = (uint_t)ddi_get8(state->is_reg_handle,
+	    &state->is_genregs[IOAT_CHANCNT]);
+	state->is_maxxfer = (uint_t)ddi_get8(state->is_reg_handle,
+	    &state->is_genregs[IOAT_XFERCAP]);
+	state->is_chanoff = (uintptr_t)ddi_get16(state->is_reg_handle,
+	    (uint16_t *)&state->is_genregs[IOAT_PERPORT_OFF]);
+	state->is_cbver = (uint_t)ddi_get8(state->is_reg_handle,
+	    &state->is_genregs[IOAT_CBVER]);
+	state->is_intrdelay = (uint_t)ddi_get16(state->is_reg_handle,
+	    (uint16_t *)&state->is_genregs[IOAT_INTRDELAY]);
+	state->is_status = (uint_t)ddi_get16(state->is_reg_handle,
+	    (uint16_t *)&state->is_genregs[IOAT_CSSTATUS]);
+	state->is_capabilities = (uint_t)ddi_get32(state->is_reg_handle,
+	    (uint32_t *)&state->is_genregs[IOAT_DMACAPABILITY]);
+
+	if (state->is_cbver & 0x10) {
+		state->is_ver = IOAT_CBv1;
+	} else if (state->is_cbver & 0x20) {
+		state->is_ver = IOAT_CBv2;
+	} else {
+		goto chipinitfail_version;
+	}
+
+	return (DDI_SUCCESS);
+
+chipinitfail_version:
+	ddi_regs_map_free(&state->is_reg_handle);
+chipinitfail_regsmap:
+	return (DDI_FAILURE);
+}
+
+
+/*
+ * ioat_chip_fini()
+ */
+static void
+ioat_chip_fini(ioat_state_t *state)
+{
+	ddi_regs_map_free(&state->is_reg_handle);
+}
+
+
+/*
+ * ioat_drv_init()
+ */
+static int
+ioat_drv_init(ioat_state_t *state)
+{
+	ddi_acc_handle_t handle;
+	int e;
+
+
+	mutex_init(&state->is_mutex, NULL, MUTEX_DRIVER, NULL);
+
+	state->is_deviceinfo.di_dip = state->is_dip;
+	state->is_deviceinfo.di_num_dma = state->is_num_channels;
+	state->is_deviceinfo.di_maxxfer = state->is_maxxfer;
+	state->is_deviceinfo.di_capabilities = state->is_capabilities;
+	state->is_deviceinfo.di_cb = &ioat_cb;
+
+	e = pci_config_setup(state->is_dip, &handle);
+	if (e != DDI_SUCCESS) {
+		goto drvinitfail_config_setup;
+	}
+
+	/* read in Vendor ID */
+	state->is_deviceinfo.di_id = (uint64_t)pci_config_get16(handle, 0);
+	state->is_deviceinfo.di_id = state->is_deviceinfo.di_id << 16;
+
+	/* read in Device ID */
+	state->is_deviceinfo.di_id |= (uint64_t)pci_config_get16(handle, 2);
+	state->is_deviceinfo.di_id = state->is_deviceinfo.di_id << 32;
+
+	/* Add in chipset version */
+	state->is_deviceinfo.di_id |= (uint64_t)state->is_cbver;
+	pci_config_teardown(&handle);
+
+	e = ddi_intr_hilevel(state->is_dip, 0);
+	if (e != 0) {
+		cmn_err(CE_WARN, "hilevel interrupt not supported\n");
+		goto drvinitfail_hilevel;
+	}
+
+	/* we don't support MSIs for v2 yet */
+	e = ddi_add_intr(state->is_dip, 0, NULL, NULL, ioat_isr,
+	    (caddr_t)state);
+	if (e != DDI_SUCCESS) {
+		goto drvinitfail_add_intr;
+	}
+
+	e = ddi_get_iblock_cookie(state->is_dip, 0, &state->is_iblock_cookie);
+	if (e != DDI_SUCCESS) {
+		goto drvinitfail_iblock_cookie;
+	}
+
+	e = ioat_channel_init(state);
+	if (e != DDI_SUCCESS) {
+		goto drvinitfail_channel_init;
+	}
+
+	return (DDI_SUCCESS);
+
+drvinitfail_channel_init:
+drvinitfail_iblock_cookie:
+	ddi_remove_intr(state->is_dip, 0, state->is_iblock_cookie);
+drvinitfail_add_intr:
+drvinitfail_hilevel:
+drvinitfail_config_setup:
+	mutex_destroy(&state->is_mutex);
+
+	return (DDI_FAILURE);
+}
+
+
+/*
+ * ioat_drv_fini()
+ */
+static void
+ioat_drv_fini(ioat_state_t *state)
+{
+	ioat_channel_fini(state);
+	ddi_remove_intr(state->is_dip, 0, state->is_iblock_cookie);
+	mutex_destroy(&state->is_mutex);
+}
+
+
+/*
+ * ioat_unregister_complete()
+ */
+void
+ioat_unregister_complete(void *device_private, int status)
+{
+	ioat_state_t *state;
+
+
+	state = device_private;
+
+	if (status != DCOPY_SUCCESS) {
+		cmn_err(CE_WARN, "asynchronous detach aborted\n");
+		return;
+	}
+
+	cmn_err(CE_CONT, "detach completing\n");
+	ioat_detach_finish(state);
+}
+
+
+/*
+ * ioat_detach_finish()
+ */
+void
+ioat_detach_finish(ioat_state_t *state)
+{
+	ioat_intr_disable(state);
+	ddi_remove_minor_node(state->is_dip, NULL);
+	ioat_drv_fini(state);
+	ioat_chip_fini(state);
+	(void) ddi_soft_state_free(ioat_statep, state->is_instance);
+}
+
+
+/*
+ * ioat_intr_enable()
+ */
+static void
+ioat_intr_enable(ioat_state_t *state)
+{
+	uint32_t intr_status;
+
+
+	/* Clear any pending interrupts */
+	intr_status = ddi_get32(state->is_reg_handle,
+	    (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS]);
+	if (intr_status != 0) {
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS],
+		    intr_status);
+	}
+
+	/* Enable interrupts on the device */
+	ddi_put8(state->is_reg_handle, &state->is_genregs[IOAT_INTRCTL],
+	    IOAT_INTRCTL_MASTER_EN);
+}
+
+
+/*
+ * ioat_intr_disable()
+ */
+static void
+ioat_intr_disable(ioat_state_t *state)
+{
+	/*
+	 * disable interrupts on the device. A read of the interrupt control
+	 * register clears the enable bit.
+	 */
+	(void) ddi_get8(state->is_reg_handle,
+	    &state->is_genregs[IOAT_INTRCTL]);
+}
+
+
+/*
+ * ioat_isr()
+ */
+static uint_t
+ioat_isr(caddr_t parm)
+{
+	uint32_t intr_status;
+	ioat_state_t *state;
+	uint8_t intrctrl;
+	uint32_t chan;
+	uint_t r;
+	int i;
+
+	state = (ioat_state_t *)parm;
+
+	intrctrl = ddi_get8(state->is_reg_handle,
+	    &state->is_genregs[IOAT_INTRCTL]);
+	/* master interrupt enable should always be set */
+	ASSERT(intrctrl & IOAT_INTRCTL_MASTER_EN);
+
+	/* If the interrupt status bit isn't set, it's not ours */
+	if (!(intrctrl & IOAT_INTRCTL_INTR_STAT)) {
+		/* re-set master interrupt enable (since it clears on read) */
+		ddi_put8(state->is_reg_handle,
+		    &state->is_genregs[IOAT_INTRCTL], intrctrl);
+		return (DDI_INTR_UNCLAIMED);
+	}
+
+	/* see which channels generated the interrupt */
+	intr_status = ddi_get32(state->is_reg_handle,
+	    (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS]);
+
+	/* call the intr handler for the channels */
+	r = DDI_INTR_UNCLAIMED;
+	chan = 1;
+	for (i = 0; i < state->is_num_channels; i++) {
+		if (intr_status & chan) {
+			ioat_channel_intr(&state->is_channel[i]);
+			r = DDI_INTR_CLAIMED;
+		}
+		chan = chan << 1;
+	}
+
+	/*
+	 * if interrupt status bit was set, there should have been an
+	 * attention status bit set too.
+	 */
+	ASSERT(r == DDI_INTR_CLAIMED);
+
+	/* re-set master interrupt enable (since it clears on read) */
+	ddi_put8(state->is_reg_handle, &state->is_genregs[IOAT_INTRCTL],
+	    intrctrl);
+
+	return (r);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/i86pc/io/ioat/ioat.conf	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+
+#
+# force attach this driver to support misc/driver
+ddi-forceattach=1;
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/i86pc/io/ioat/ioat_chan.c	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,1319 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/file.h>
+#include <sys/open.h>
+#include <sys/modctl.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/sysmacros.h>
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <sys/mach_mmu.h>
+#ifdef __xpv
+#include <sys/hypervisor.h>
+#endif
+
+#include <sys/ioat.h>
+
+
+extern ddi_device_acc_attr_t ioat_acc_attr;
+
+/* dma attr for the descriptor rings */
+ddi_dma_attr_t ioat_desc_dma_attr = {
+	DMA_ATTR_V0,		/* dma_attr_version */
+	0x0,			/* dma_attr_addr_lo */
+	0xffffffffffffffff,	/* dma_attr_addr_hi */
+	0xffffffff,		/* dma_attr_count_max */
+	0x1000,			/* dma_attr_align */
+	0x1,			/* dma_attr_burstsizes */
+	0x1,			/* dma_attr_minxfer */
+	0xffffffff,		/* dma_attr_maxxfer */
+	0xffffffff,		/* dma_attr_seg */
+	0x1,			/* dma_attr_sgllen */
+	0x1,			/* dma_attr_granular */
+	0x0,			/* dma_attr_flags */
+};
+
+/* dma attr for the completion buffers */
+ddi_dma_attr_t ioat_cmpl_dma_attr = {
+	DMA_ATTR_V0,		/* dma_attr_version */
+	0x0,			/* dma_attr_addr_lo */
+	0xffffffffffffffff,	/* dma_attr_addr_hi */
+	0xffffffff,		/* dma_attr_count_max */
+	0x40,			/* dma_attr_align */
+	0x1,			/* dma_attr_burstsizes */
+	0x1,			/* dma_attr_minxfer */
+	0xffffffff,		/* dma_attr_maxxfer */
+	0xffffffff,		/* dma_attr_seg */
+	0x1,			/* dma_attr_sgllen */
+	0x1,			/* dma_attr_granular */
+	0x0,			/* dma_attr_flags */
+};
+
+static int ioat_completion_alloc(ioat_channel_t channel);
+static void ioat_completion_free(ioat_channel_t channel);
+static void ioat_channel_start(ioat_channel_t channel);
+static void ioat_channel_reset(ioat_channel_t channel);
+
+int ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt);
+void ioat_ring_free(ioat_channel_t channel);
+void ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *desc);
+int ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
+    dcopy_cmd_t cmd);
+
+static void ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
+    uint64_t dest_addr, uint32_t size, uint32_t ctrl);
+static void ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id);
+
+
+/*
+ * ioat_channel_init()
+ */
+int
+ioat_channel_init(ioat_state_t *state)
+{
+	int i;
+
+	/*
+	 * initialize each dma channel's state which doesn't change across
+	 * channel alloc/free.
+	 */
+	state->is_chansize = sizeof (struct ioat_channel_s) *
+	    state->is_num_channels;
+	state->is_channel = kmem_zalloc(state->is_chansize, KM_SLEEP);
+	for (i = 0; i < state->is_num_channels; i++) {
+		state->is_channel[i].ic_state = state;
+		state->is_channel[i].ic_regs = (uint8_t *)
+		    ((uintptr_t)state->is_genregs +
+		    (uintptr_t)(IOAT_CHANNELREG_OFFSET * (i + 1)));
+	}
+
+	/* initial the allocator (from 0 to state->is_num_channels) */
+	ioat_rs_init(state, 0, state->is_num_channels, &state->is_channel_rs);
+
+	return (DDI_SUCCESS);
+}
+
+
+/*
+ * ioat_channel_fini()
+ */
+void
+ioat_channel_fini(ioat_state_t *state)
+{
+	ioat_rs_fini(&state->is_channel_rs);
+	kmem_free(state->is_channel, state->is_chansize);
+}
+
+
+/*
+ * ioat_channel_alloc()
+ *   NOTE: We intentionaly don't handle DCOPY_SLEEP (if no channels are
+ *	available)
+ */
+/*ARGSUSED*/
+int
+ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags,
+    uint_t size, dcopy_query_channel_t *info, void *channel_private)
+{
+#define	CHANSTRSIZE	20
+	struct ioat_channel_s *channel;
+	char chanstr[CHANSTRSIZE];
+	ioat_channel_t *chan;
+	ioat_state_t *state;
+	size_t cmd_size;
+	uint_t chan_num;
+	uint32_t estat;
+	int e;
+
+
+	state = (ioat_state_t *)device_private;
+	chan = (ioat_channel_t *)channel_private;
+
+	/* allocate a H/W channel */
+	e = ioat_rs_alloc(state->is_channel_rs, &chan_num);
+	if (e != DDI_SUCCESS) {
+		return (DCOPY_NORESOURCES);
+	}
+
+	channel = &state->is_channel[chan_num];
+	channel->ic_inuse = B_TRUE;
+	channel->ic_chan_num = chan_num;
+	channel->ic_ver = state->is_ver;
+	channel->ic_dca_active = B_FALSE;
+	channel->ic_channel_state = IOAT_CHANNEL_OK;
+	channel->ic_dcopy_handle = handle;
+
+#ifdef	DEBUG
+	{
+		/* if we're cbv2, verify that the V2 compatibility bit is set */
+		uint16_t reg;
+		if (channel->ic_ver == IOAT_CBv2) {
+			reg = ddi_get16(state->is_reg_handle,
+			    (uint16_t *)&channel->ic_regs[IOAT_CHAN_COMP]);
+			ASSERT(reg & 0x2);
+		}
+	}
+#endif
+
+	/*
+	 * Configure DMA channel
+	 *   Channel In Use
+	 *   Error Interrupt Enable
+	 *   Any Error Abort Enable
+	 *   Error Completion Enable
+	 */
+	ddi_put16(state->is_reg_handle,
+	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
+
+	/* check channel error register, clear any errors */
+	estat = ddi_get32(state->is_reg_handle,
+	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
+	if (estat != 0) {
+#ifdef	DEBUG
+		cmn_err(CE_CONT, "cleared errors (0x%x) before channel (%d) "
+		    "enable\n", estat, channel->ic_chan_num);
+#endif
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], estat);
+	}
+
+	/* allocate and initialize the descriptor buf */
+	e = ioat_ring_alloc(channel, size);
+	if (e != DDI_SUCCESS) {
+		goto chinitfail_desc_alloc;
+	}
+
+	/* allocate and initialize the completion space */
+	e = ioat_completion_alloc(channel);
+	if (e != DDI_SUCCESS) {
+		goto chinitfail_completion_alloc;
+	}
+
+	/* setup kmem_cache for commands */
+	cmd_size = sizeof (struct dcopy_cmd_s) +
+	    sizeof (struct dcopy_cmd_priv_s) +
+	    sizeof (struct ioat_cmd_private_s);
+	(void) snprintf(chanstr, CHANSTRSIZE, "ioat%dchan%dcmd",
+	    state->is_instance, channel->ic_chan_num);
+	channel->ic_cmd_cache = kmem_cache_create(chanstr, cmd_size, 64,
+	    NULL, NULL, NULL, NULL, NULL, 0);
+	if (channel->ic_cmd_cache == NULL) {
+		goto chinitfail_kmem_cache;
+	}
+
+	/* start-up the channel */
+	ioat_channel_start(channel);
+
+	/* fill in the channel info returned to dcopy */
+	info->qc_version = DCOPY_QUERY_CHANNEL_V0;
+	info->qc_id = state->is_deviceinfo.di_id;
+	info->qc_capabilities = (uint64_t)state->is_capabilities;
+	info->qc_channel_size = (uint64_t)size;
+	info->qc_chan_num = (uint64_t)channel->ic_chan_num;
+	if (channel->ic_ver == IOAT_CBv1) {
+		info->qc_dca_supported = B_FALSE;
+	} else {
+		if (info->qc_capabilities & IOAT_DMACAP_DCA) {
+			info->qc_dca_supported = B_TRUE;
+		} else {
+			info->qc_dca_supported = B_FALSE;
+		}
+	}
+
+	*chan = channel;
+
+	return (DCOPY_SUCCESS);
+
+chinitfail_kmem_cache:
+	ioat_completion_free(channel);
+chinitfail_completion_alloc:
+	ioat_ring_free(channel);
+chinitfail_desc_alloc:
+	return (DCOPY_FAILURE);
+}
+
+
+/*
+ * ioat_channel_suspend()
+ */
+/*ARGSUSED*/
+void
+ioat_channel_suspend(ioat_state_t *state)
+{
+	/*
+	 * normally you would disable interrupts and reset the H/W here. But
+	 * since the suspend framework doesn't know who is using us, it may
+	 * not suspend their I/O before us.  Since we won't actively be doing
+	 * any DMA or interrupts unless someone asks us to, it's safe to not
+	 * do anything here.
+	 */
+}
+
+
+/*
+ * ioat_channel_resume()
+ */
+int
+ioat_channel_resume(ioat_state_t *state)
+{
+	ioat_channel_ring_t *ring;
+	ioat_channel_t channel;
+	uint32_t estat;
+	int i;
+
+
+	for (i = 0; i < state->is_num_channels; i++) {
+		channel = &state->is_channel[i];
+		ring = channel->ic_ring;
+
+		if (!channel->ic_inuse) {
+			continue;
+		}
+
+		/*
+		 * Configure DMA channel
+		 *   Channel In Use
+		 *   Error Interrupt Enable
+		 *   Any Error Abort Enable
+		 *   Error Completion Enable
+		 */
+		ddi_put16(state->is_reg_handle,
+		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
+
+		/* check channel error register, clear any errors */
+		estat = ddi_get32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
+		if (estat != 0) {
+#ifdef	DEBUG
+			cmn_err(CE_CONT, "cleared errors (0x%x) before channel"
+			    " (%d) enable\n", estat, channel->ic_chan_num);
+#endif
+			ddi_put32(state->is_reg_handle,
+			    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR],
+			    estat);
+		}
+
+		/* Re-initialize the ring */
+		bzero(ring->cr_desc, channel->ic_desc_alloc_size);
+		/* write the physical address into the chain address register */
+		if (channel->ic_ver == IOAT_CBv1) {
+			ddi_put32(state->is_reg_handle,
+			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
+			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
+			ddi_put32(state->is_reg_handle,
+			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
+			    (uint32_t)(ring->cr_phys_desc >> 32));
+		} else {
+			ASSERT(channel->ic_ver == IOAT_CBv2);
+			ddi_put32(state->is_reg_handle,
+			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
+			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
+			ddi_put32(state->is_reg_handle,
+			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
+			    (uint32_t)(ring->cr_phys_desc >> 32));
+		}
+
+		/* re-initialize the completion buffer */
+		bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
+		/* write the phys addr into the completion address register */
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
+		    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
+		    (uint32_t)(channel->ic_phys_cmpl >> 32));
+
+		/* start-up the channel */
+		ioat_channel_start(channel);
+
+	}
+
+	return (DDI_SUCCESS);
+}
+
+
+/*
+ * ioat_channel_free()
+ */
+void
+ioat_channel_free(void *channel_private)
+{
+	struct ioat_channel_s *channel;
+	ioat_channel_t *chan;
+	ioat_state_t *state;
+	uint_t chan_num;
+
+
+	chan = (ioat_channel_t *)channel_private;
+	channel = *chan;
+
+	state = channel->ic_state;
+	chan_num = channel->ic_chan_num;
+
+	/* disable the interrupts */
+	ddi_put16(state->is_reg_handle,
+	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x0);
+
+	ioat_channel_reset(channel);
+
+	/* cleanup command cache */
+	kmem_cache_destroy(channel->ic_cmd_cache);
+
+	/* clean-up/free-up the completion space and descriptors */
+	ioat_completion_free(channel);
+	ioat_ring_free(channel);
+
+	channel->ic_inuse = B_FALSE;
+
+	/* free the H/W DMA engine */
+	ioat_rs_free(state->is_channel_rs, chan_num);
+
+	*chan = NULL;
+}
+
+
+/*
+ * ioat_channel_intr()
+ */
+void
+ioat_channel_intr(ioat_channel_t channel)
+{
+	ioat_state_t *state;
+	uint16_t chanctrl;
+	uint32_t chanerr;
+	uint32_t status;
+
+
+	state = channel->ic_state;
+
+	if (channel->ic_ver == IOAT_CBv1) {
+		status = ddi_get32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_STS_LO]);
+	} else {
+		ASSERT(channel->ic_ver == IOAT_CBv2);
+		status = ddi_get32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_STS_LO]);
+	}
+
+	/* if that status isn't ACTIVE or IDLE, the channel has failed */
+	if (status & IOAT_CHAN_STS_FAIL_MASK) {
+		chanerr = ddi_get32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
+		cmn_err(CE_WARN, "channel(%d) fatal failure! "
+		    "chanstat_lo=0x%X; chanerr=0x%X\n",
+		    channel->ic_chan_num, status, chanerr);
+		channel->ic_channel_state = IOAT_CHANNEL_IN_FAILURE;
+		ioat_channel_reset(channel);
+
+		return;
+	}
+
+	/*
+	 * clear interrupt disable bit if set (it's a RW1C). Read it back to
+	 * ensure the write completes.
+	 */
+	chanctrl = ddi_get16(state->is_reg_handle,
+	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
+	ddi_put16(state->is_reg_handle,
+	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], chanctrl);
+	(void) ddi_get16(state->is_reg_handle,
+	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
+
+	/* tell dcopy we have seen a completion on this channel */
+	dcopy_device_channel_notify(channel->ic_dcopy_handle, DCOPY_COMPLETION);
+}
+
+
+/*
+ * ioat_channel_start()
+ */
+void
+ioat_channel_start(ioat_channel_t channel)
+{
+	ioat_chan_dma_desc_t desc;
+
+	/* set the first descriptor up as a NULL descriptor */
+	bzero(&desc, sizeof (desc));
+	desc.dd_size = 0;
+	desc.dd_ctrl = IOAT_DESC_CTRL_OP_DMA | IOAT_DESC_DMACTRL_NULL |
+	    IOAT_DESC_CTRL_CMPL;
+	desc.dd_next_desc = 0x0;
+
+	/* setup the very first descriptor */
+	ioat_ring_seed(channel, &desc);
+}
+
+
+/*
+ * ioat_channel_reset()
+ */
+void
+ioat_channel_reset(ioat_channel_t channel)
+{
+	ioat_state_t *state;
+
+	state = channel->ic_state;
+
+	/* hit the reset bit */
+	if (channel->ic_ver == IOAT_CBv1) {
+		ddi_put8(state->is_reg_handle,
+		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x20);
+	} else {
+		ASSERT(channel->ic_ver == IOAT_CBv2);
+		ddi_put8(state->is_reg_handle,
+		    &channel->ic_regs[IOAT_V2_CHAN_CMD], 0x20);
+	}
+}
+
+
+/*
+ * ioat_completion_alloc()
+ */
+int
+ioat_completion_alloc(ioat_channel_t channel)
+{
+	ioat_state_t *state;
+	size_t real_length;
+	uint_t cookie_cnt;
+	int e;
+
+
+	state = channel->ic_state;
+
+	/*
+	 * allocate memory for the completion status, zero it out, and get
+	 * the paddr. We'll allocate a physically contiguous cache line.
+	 */
+	e = ddi_dma_alloc_handle(state->is_dip, &ioat_cmpl_dma_attr,
+	    DDI_DMA_SLEEP, NULL, &channel->ic_cmpl_dma_handle);
+	if (e != DDI_SUCCESS) {
+		goto cmplallocfail_alloc_handle;
+	}
+	channel->ic_cmpl_alloc_size = 64;
+	e = ddi_dma_mem_alloc(channel->ic_cmpl_dma_handle,
+	    channel->ic_cmpl_alloc_size, &ioat_acc_attr,
+	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
+	    (caddr_t *)&channel->ic_cmpl, &real_length,
+	    &channel->ic_cmpl_handle);
+	if (e != DDI_SUCCESS) {
+		goto cmplallocfail_mem_alloc;
+	}
+	bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
+	e = ddi_dma_addr_bind_handle(channel->ic_cmpl_dma_handle, NULL,
+	    (caddr_t)channel->ic_cmpl, channel->ic_cmpl_alloc_size,
+	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
+	    &channel->ic_cmpl_cookie, &cookie_cnt);
+	if (e != DDI_SUCCESS) {
+		goto cmplallocfail_addr_bind;
+	}
+	ASSERT(cookie_cnt == 1);
+	ASSERT(channel->ic_cmpl_cookie.dmac_size ==
+	    channel->ic_cmpl_alloc_size);
+	channel->ic_phys_cmpl = channel->ic_cmpl_cookie.dmac_laddress;
+
+	/* write the physical address into the completion address register */
+	ddi_put32(state->is_reg_handle,
+	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
+	    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
+	ddi_put32(state->is_reg_handle,
+	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
+	    (uint32_t)(channel->ic_phys_cmpl >> 32));
+
+	return (DDI_SUCCESS);
+
+cmplallocfail_addr_bind:
+	ddi_dma_mem_free(&channel->ic_desc_handle);
+cmplallocfail_mem_alloc:
+	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
+cmplallocfail_alloc_handle:
+	return (DDI_FAILURE);
+}
+
+
+/*
+ * ioat_completion_free()
+ */
+void
+ioat_completion_free(ioat_channel_t channel)
+{
+	ioat_state_t *state;
+
+	state = channel->ic_state;
+
+	/* reset the completion address register */
+	ddi_put32(state->is_reg_handle,
+	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], 0x0);
+	ddi_put32(state->is_reg_handle,
+	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], 0x0);
+
+	/* unbind, then free up the memory, dma handle */
+	(void) ddi_dma_unbind_handle(channel->ic_cmpl_dma_handle);
+	ddi_dma_mem_free(&channel->ic_cmpl_handle);
+	ddi_dma_free_handle(&channel->ic_cmpl_dma_handle);
+}
+
+/*
+ * ioat_ring_alloc()
+ */
+int
+ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt)
+{
+	ioat_channel_ring_t *ring;
+	ioat_state_t *state;
+	size_t real_length;
+	uint_t cookie_cnt;
+	int e;
+
+
+	state = channel->ic_state;
+
+	ring = kmem_zalloc(sizeof (ioat_channel_ring_t), KM_SLEEP);
+	channel->ic_ring = ring;
+	ring->cr_chan = channel;
+	ring->cr_post_cnt = 0;
+
+	mutex_init(&ring->cr_cmpl_mutex, NULL, MUTEX_DRIVER,
+	    channel->ic_state->is_iblock_cookie);
+	mutex_init(&ring->cr_desc_mutex, NULL, MUTEX_DRIVER,
+	    channel->ic_state->is_iblock_cookie);
+
+	/*
+	 * allocate memory for the ring, zero it out, and get the paddr.
+	 * We'll allocate a physically contiguous chunck of memory  which
+	 * simplifies the completion logic.
+	 */
+	e = ddi_dma_alloc_handle(state->is_dip, &ioat_desc_dma_attr,
+	    DDI_DMA_SLEEP, NULL, &channel->ic_desc_dma_handle);
+	if (e != DDI_SUCCESS) {
+		goto ringallocfail_alloc_handle;
+	}
+	/*
+	 * allocate one extra descriptor so we can simplify the empty/full
+	 * logic. Then round that number up to a whole multiple of 4.
+	 */
+	channel->ic_chan_desc_cnt = ((desc_cnt + 1) + 3) & ~0x3;
+	ring->cr_desc_last = channel->ic_chan_desc_cnt - 1;
+	channel->ic_desc_alloc_size = channel->ic_chan_desc_cnt *
+	    sizeof (ioat_chan_desc_t);
+	e = ddi_dma_mem_alloc(channel->ic_desc_dma_handle,
+	    channel->ic_desc_alloc_size, &ioat_acc_attr,
+	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
+	    (caddr_t *)&ring->cr_desc, &real_length, &channel->ic_desc_handle);
+	if (e != DDI_SUCCESS) {
+		goto ringallocfail_mem_alloc;
+	}
+	bzero(ring->cr_desc, channel->ic_desc_alloc_size);
+	e = ddi_dma_addr_bind_handle(channel->ic_desc_dma_handle, NULL,
+	    (caddr_t)ring->cr_desc, channel->ic_desc_alloc_size,
+	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
+	    &channel->ic_desc_cookies, &cookie_cnt);
+	if (e != DDI_SUCCESS) {
+		goto ringallocfail_addr_bind;
+	}
+	ASSERT(cookie_cnt == 1);
+	ASSERT(channel->ic_desc_cookies.dmac_size ==
+	    channel->ic_desc_alloc_size);
+	ring->cr_phys_desc = channel->ic_desc_cookies.dmac_laddress;
+
+	/* write the physical address into the chain address register */
+	if (channel->ic_ver == IOAT_CBv1) {
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
+		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
+		    (uint32_t)(ring->cr_phys_desc >> 32));
+	} else {
+		ASSERT(channel->ic_ver == IOAT_CBv2);
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
+		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
+		    (uint32_t)(ring->cr_phys_desc >> 32));
+	}
+
+	return (DCOPY_SUCCESS);
+
+ringallocfail_addr_bind:
+	ddi_dma_mem_free(&channel->ic_desc_handle);
+ringallocfail_mem_alloc:
+	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
+ringallocfail_alloc_handle:
+	mutex_destroy(&ring->cr_desc_mutex);
+	mutex_destroy(&ring->cr_cmpl_mutex);
+	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
+
+	return (DCOPY_FAILURE);
+}
+
+
+/*
+ * ioat_ring_free()
+ */
+void
+ioat_ring_free(ioat_channel_t channel)
+{
+	ioat_state_t *state;
+
+
+	state = channel->ic_state;
+
+	/* reset the chain address register */
+	if (channel->ic_ver == IOAT_CBv1) {
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], 0x0);
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], 0x0);
+	} else {
+		ASSERT(channel->ic_ver == IOAT_CBv2);
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], 0x0);
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], 0x0);
+	}
+
+	/* unbind, then free up the memory, dma handle */
+	(void) ddi_dma_unbind_handle(channel->ic_desc_dma_handle);
+	ddi_dma_mem_free(&channel->ic_desc_handle);
+	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
+
+	mutex_destroy(&channel->ic_ring->cr_desc_mutex);
+	mutex_destroy(&channel->ic_ring->cr_cmpl_mutex);
+	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
+
+}
+
+
+/*
+ * ioat_ring_seed()
+ *    write the first descriptor in the ring.
+ */
+void
+ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *in_desc)
+{
+	ioat_channel_ring_t *ring;
+	ioat_chan_dma_desc_t *desc;
+	ioat_chan_dma_desc_t *prev;
+	ioat_state_t *state;
+
+
+	state = channel->ic_state;
+	ring = channel->ic_ring;
+
+	/* init the completion state */
+	ring->cr_cmpl_gen = 0x0;
+	ring->cr_cmpl_last = 0x0;
+
+	/* write in the descriptor and init the descriptor state */
+	ring->cr_post_cnt++;
+	channel->ic_ring->cr_desc[0] = *(ioat_chan_desc_t *)in_desc;
+	ring->cr_desc_gen = 0;
+	ring->cr_desc_prev = 0;
+	ring->cr_desc_next = 1;
+
+	if (channel->ic_ver == IOAT_CBv1) {
+		/* hit the start bit */
+		ddi_put8(state->is_reg_handle,
+		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x1);
+	} else {
+		/*
+		 * if this is CBv2, link the descriptor to an empty
+		 * descriptor
+		 */
+		ASSERT(ring->cr_chan->ic_ver == IOAT_CBv2);
+		desc = (ioat_chan_dma_desc_t *)
+		    &ring->cr_desc[ring->cr_desc_next];
+		prev = (ioat_chan_dma_desc_t *)
+		    &ring->cr_desc[ring->cr_desc_prev];
+
+		desc->dd_ctrl = 0;
+		desc->dd_next_desc = 0x0;
+
+		prev->dd_next_desc = ring->cr_phys_desc +
+		    (ring->cr_desc_next << 6);
+
+		ddi_put16(state->is_reg_handle,
+		    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
+		    (uint16_t)1);
+	}
+
+}
+
+
+/*
+ * ioat_cmd_alloc()
+ */
+int
+ioat_cmd_alloc(void *private, int flags, dcopy_cmd_t *cmd)
+{
+	ioat_cmd_private_t *priv;
+	ioat_channel_t channel;
+	dcopy_cmd_t oldcmd;
+	int kmflag;
+
+
+	channel = (ioat_channel_t)private;
+
+	if (flags & DCOPY_NOSLEEP) {
+		kmflag = KM_NOSLEEP;
+	} else {
+		kmflag = KM_SLEEP;
+	}
+
+	/* save the command passed incase DCOPY_ALLOC_LINK is set */
+	oldcmd = *cmd;
+
+	*cmd = kmem_cache_alloc(channel->ic_cmd_cache, kmflag);
+	if (*cmd == NULL) {
+		return (DCOPY_NORESOURCES);
+	}
+
+	/* setup the dcopy and ioat private state pointers */
+	(*cmd)->dp_version = DCOPY_CMD_V0;
+	(*cmd)->dp_cmd = 0;
+	(*cmd)->dp_private = (struct dcopy_cmd_priv_s *)
+	    ((uintptr_t)(*cmd) + sizeof (struct dcopy_cmd_s));
+	(*cmd)->dp_private->pr_device_cmd_private =
+	    (struct ioat_cmd_private_s *)((uintptr_t)(*cmd)->dp_private +
+	    sizeof (struct dcopy_cmd_priv_s));
+
+	/*
+	 * if DCOPY_ALLOC_LINK is set, link the old command to the new one
+	 * just allocated.
+	 */
+	priv = (*cmd)->dp_private->pr_device_cmd_private;
+	if (flags & DCOPY_ALLOC_LINK) {
+		priv->ip_next = oldcmd;
+	} else {
+		priv->ip_next = NULL;
+	}
+
+	return (DCOPY_SUCCESS);
+}
+
+
+/*
+ * ioat_cmd_free()
+ */
+void
+ioat_cmd_free(void *private, dcopy_cmd_t *cmdp)
+{
+	ioat_cmd_private_t *priv;
+	ioat_channel_t channel;
+	dcopy_cmd_t next;
+	dcopy_cmd_t cmd;
+
+
+	channel = (ioat_channel_t)private;
+	cmd = *(cmdp);
+
+	/*
+	 * free all the commands in the chain (see DCOPY_ALLOC_LINK in
+	 * ioat_cmd_alloc() for more info).
+	 */
+	while (cmd != NULL) {
+		priv = cmd->dp_private->pr_device_cmd_private;
+		next = priv->ip_next;
+		kmem_cache_free(channel->ic_cmd_cache, cmd);
+		cmd = next;
+	}
+	*cmdp = NULL;
+}
+
+
+/*
+ * ioat_cmd_post()
+ */
+int
+ioat_cmd_post(void *private, dcopy_cmd_t cmd)
+{
+	ioat_channel_ring_t *ring;
+	ioat_cmd_private_t *priv;
+	ioat_channel_t channel;
+	ioat_state_t *state;
+	uint64_t dest_paddr;
+	uint64_t src_paddr;
+	uint64_t dest_addr;
+	uint32_t dest_size;
+	uint64_t src_addr;
+	uint32_t src_size;
+	size_t xfer_size;
+	uint32_t ctrl;
+	size_t size;
+	int e;
+
+
+	channel = (ioat_channel_t)private;
+	priv = cmd->dp_private->pr_device_cmd_private;
+
+	state = channel->ic_state;
+	ring = channel->ic_ring;
+
+	mutex_enter(&ring->cr_desc_mutex);
+
+	/* if the channel has had a fatal failure, return failure */
+	if (channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) {
+		mutex_exit(&ring->cr_cmpl_mutex);
+		return (DCOPY_FAILURE);
+	}
+
+	/* make sure we have space for the descriptors */
+	e = ioat_ring_reserve(channel, ring, cmd);
+	if (e != DCOPY_SUCCESS) {
+		mutex_exit(&ring->cr_cmpl_mutex);
+		return (DCOPY_NORESOURCES);
+	}
+
+	/* if we support DCA, and the DCA flag is set, post a DCA desc */
+	if ((channel->ic_ver == IOAT_CBv2) &&
+	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
+		ioat_cmd_post_dca(ring, cmd->dp_dca_id);
+	}
+
+	/*
+	 * the dma copy may have to be broken up into multiple descriptors
+	 * since we can't cross a page boundary.
+	 */
+	ASSERT(cmd->dp_version == DCOPY_CMD_V0);
+	ASSERT(cmd->dp_cmd == DCOPY_CMD_COPY);
+	src_addr = cmd->dp.copy.cc_source;
+	dest_addr = cmd->dp.copy.cc_dest;
+	size = cmd->dp.copy.cc_size;
+	while (size > 0) {
+		src_paddr = pa_to_ma(src_addr);
+		dest_paddr = pa_to_ma(dest_addr);
+
+		/* adjust for any offset into the page */
+		if ((src_addr & PAGEOFFSET) == 0) {
+			src_size = PAGESIZE;
+		} else {
+			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
+		}
+		if ((dest_addr & PAGEOFFSET) == 0) {
+			dest_size = PAGESIZE;
+		} else {
+			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
+		}
+
+		/* take the smallest of the three */
+		xfer_size = MIN(src_size, dest_size);
+		xfer_size = MIN(xfer_size, size);
+
+		/*
+		 * if this is the last descriptor, and we are supposed to
+		 * generate a completion, generate a completion. same logic
+		 * for interrupt.
+		 */
+		ctrl = 0;
+		if (xfer_size == size) {
+			if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
+				ctrl |= IOAT_DESC_CTRL_CMPL;
+			}
+			if ((cmd->dp_flags & DCOPY_CMD_INTR)) {
+				ctrl |= IOAT_DESC_CTRL_INTR;
+			}
+		}
+
+		ioat_cmd_post_copy(ring, src_paddr, dest_paddr, xfer_size,
+		    ctrl);
+
+		/* go to the next page */
+		src_addr += xfer_size;
+		dest_addr += xfer_size;
+		size -= xfer_size;
+	}
+
+	/*
+	 * if we are going to create a completion, save away the state so we
+	 * can poll on it.
+	 */
+	if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
+		priv->ip_generation = ring->cr_desc_gen_prev;
+		priv->ip_index = ring->cr_desc_prev;
+	}
+
+	/* if queue not defined, tell the DMA engine about it */
+	if (!(cmd->dp_flags & DCOPY_CMD_QUEUE)) {
+		if (channel->ic_ver == IOAT_CBv1) {
+			ddi_put8(state->is_reg_handle,
+			    (uint8_t *)&channel->ic_regs[IOAT_V1_CHAN_CMD],
+			    0x2);
+		} else {
+			ASSERT(channel->ic_ver == IOAT_CBv2);
+			ddi_put16(state->is_reg_handle,
+			    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
+			    (uint16_t)(ring->cr_post_cnt & 0xFFFF));
+		}
+	}
+
+	mutex_exit(&ring->cr_desc_mutex);
+
+	return (DCOPY_SUCCESS);
+}
+
+
+/*
+ * ioat_cmd_post_dca()
+ */
+static void
+ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id)
+{
+	ioat_chan_dca_desc_t *desc;
+	ioat_chan_dca_desc_t *prev;
+	ioat_channel_t channel;
+
+
+	channel = ring->cr_chan;
+	desc = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_next];
+	prev = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
+
+	/* keep track of the number of descs posted for cbv2 */
+	ring->cr_post_cnt++;
+
+	/*
+	 * post a context change desriptor. If dca has never been used on
+	 * this channel, or if the id doesn't match the last id used on this
+	 * channel, set CONTEXT_CHANGE bit and dca id, set dca state to active,
+	 * and save away the id we're using.
+	 */
+	desc->dd_ctrl = IOAT_DESC_CTRL_OP_CNTX;
+	desc->dd_next_desc = 0x0;
+	if (!channel->ic_dca_active || (channel->ic_dca_current != dca_id)) {
+		channel->ic_dca_active = B_TRUE;
+		channel->ic_dca_current = dca_id;
+		desc->dd_ctrl |= IOAT_DESC_CTRL_CNTX_CHNG;
+		desc->dd_cntx = dca_id;
+	}
+
+	/* Put the descriptors physical address in the previous descriptor */
+	/*LINTED:E_TRUE_LOGICAL_EXPR*/
+	ASSERT(sizeof (ioat_chan_dca_desc_t) == 64);
+
+	/* sync the current desc */
+	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
+	    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
+
+	/* update the previous desc and sync it too */
+	prev->dd_next_desc = ring->cr_phys_desc +
+	    (ring->cr_desc_next << 6);
+	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
+	    ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV);
+
+	/* save the current desc_next and desc_last for the completion */
+	ring->cr_desc_prev = ring->cr_desc_next;
+	ring->cr_desc_gen_prev = ring->cr_desc_gen;
+
+	/* increment next/gen so it points to the next free desc */
+	ring->cr_desc_next++;
+	if (ring->cr_desc_next > ring->cr_desc_last) {
+		ring->cr_desc_next = 0;
+		ring->cr_desc_gen++;
+	}
+
+	/*
+	 * if this is CBv2, link the descriptor to an empty descriptor. Since
+	 * we always leave on desc empty to detect full, this works out.
+	 */
+	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
+		desc = (ioat_chan_dca_desc_t *)
+		    &ring->cr_desc[ring->cr_desc_next];
+		prev = (ioat_chan_dca_desc_t *)
+		    &ring->cr_desc[ring->cr_desc_prev];
+		desc->dd_ctrl = 0;
+		desc->dd_next_desc = 0x0;
+
+		prev->dd_next_desc = ring->cr_phys_desc +
+		    (ring->cr_desc_next << 6);
+	}
+}
+
+
+/*
+ * ioat_cmd_post_copy()
+ *
+ */
+static void
+ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
+    uint64_t dest_addr, uint32_t size, uint32_t ctrl)
+{
+	ioat_chan_dma_desc_t *desc;
+	ioat_chan_dma_desc_t *prev;
+	ioat_channel_t channel;
+
+
+	channel = ring->cr_chan;
+	desc = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_next];
+	prev = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
+
+	/* keep track of the number of descs posted for cbv2 */
+	ring->cr_post_cnt++;
+
+	/* write in the DMA desc */
+	desc->dd_ctrl = IOAT_DESC_CTRL_OP_DMA | ctrl;
+	desc->dd_size = size;
+	desc->dd_src_paddr = src_addr;
+	desc->dd_dest_paddr = dest_addr;
+	desc->dd_next_desc = 0x0;
+
+	/* Put the descriptors physical address in the previous descriptor */
+	/*LINTED:E_TRUE_LOGICAL_EXPR*/
+	ASSERT(sizeof (ioat_chan_dma_desc_t) == 64);
+
+	/* sync the current desc */
+	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
+	    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
+
+	/* update the previous desc and sync it too */
+	prev->dd_next_desc = ring->cr_phys_desc +
+	    (ring->cr_desc_next << 6);
+	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
+	    ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV);
+
+	/* increment next/gen so it points to the next free desc */
+	ring->cr_desc_prev = ring->cr_desc_next;
+	ring->cr_desc_gen_prev = ring->cr_desc_gen;
+
+	/* increment next/gen so it points to the next free desc */
+	ring->cr_desc_next++;
+	if (ring->cr_desc_next > ring->cr_desc_last) {
+		ring->cr_desc_next = 0;
+		ring->cr_desc_gen++;
+	}
+
+	/*
+	 * if this is CBv2, link the descriptor to an empty descriptor. Since
+	 * we always leave on desc empty to detect full, this works out.
+	 */
+	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
+		desc = (ioat_chan_dma_desc_t *)
+		    &ring->cr_desc[ring->cr_desc_next];
+		prev = (ioat_chan_dma_desc_t *)
+		    &ring->cr_desc[ring->cr_desc_prev];
+		desc->dd_size = 0;
+		desc->dd_ctrl = 0;
+		desc->dd_next_desc = 0x0;
+
+		prev->dd_next_desc = ring->cr_phys_desc +
+		    (ring->cr_desc_next << 6);
+	}
+}
+
+
+/*
+ * ioat_cmd_poll()
+ */
+int
+ioat_cmd_poll(void *private, dcopy_cmd_t cmd)
+{
+	ioat_channel_ring_t *ring;
+	ioat_cmd_private_t *priv;
+	ioat_channel_t channel;
+	uint64_t generation;
+	uint64_t last_cmpl;
+
+
+	channel = (ioat_channel_t)private;
+	priv = cmd->dp_private->pr_device_cmd_private;
+
+	ring = channel->ic_ring;
+	ASSERT(ring != NULL);
+
+	mutex_enter(&ring->cr_cmpl_mutex);
+
+	/* if the channel had a fatal failure, fail all polls */
+	if ((channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) ||
+	    IOAT_CMPL_FAILED(channel)) {
+		mutex_exit(&ring->cr_cmpl_mutex);
+		return (DCOPY_FAILURE);
+	}
+
+	/*
+	 * if the current completion is the same as the last time we read one,
+	 * post is still pending, nothing further to do. We track completions
+	 * as indexes into the ring since post uses VAs and the H/W returns
+	 * PAs. We grab a snapshot of generation and last_cmpl in the mutex.
+	 */
+	(void) ddi_dma_sync(channel->ic_cmpl_dma_handle, 0, 0,
+	    DDI_DMA_SYNC_FORCPU);
+	last_cmpl = IOAT_CMPL_INDEX(channel);
+	if (last_cmpl != ring->cr_cmpl_last) {
+		/*
+		 * if we wrapped the ring, increment the generation. Store
+		 * the last cmpl. This logic assumes a physically contiguous
+		 * ring.
+		 */
+		if (last_cmpl < ring->cr_cmpl_last) {
+			ring->cr_cmpl_gen++;
+		}
+		ring->cr_cmpl_last = last_cmpl;
+		generation = ring->cr_cmpl_gen;
+
+	} else {
+		generation = ring->cr_cmpl_gen;
+	}
+
+	mutex_exit(&ring->cr_cmpl_mutex);
+
+	/*
+	 * if cmd isn't passed in, well return.  Useful for updating the
+	 * consumer pointer (ring->cr_cmpl_last).
+	 */
+	if (cmd == NULL) {
+		return (DCOPY_PENDING);
+	}
+
+	/*
+	 * if the post's generation is old, this post has completed. No reason
+	 * to go check the last completion. if the generation is the same
+	 * and if the post is before or = to the last completion processed,
+	 * the post has completed.
+	 */
+	if (priv->ip_generation < generation) {
+		return (DCOPY_COMPLETED);
+	} else if ((priv->ip_generation == generation) &&
+	    (priv->ip_index <= last_cmpl)) {
+		return (DCOPY_COMPLETED);
+	}
+
+	return (DCOPY_PENDING);
+}
+
+
+/*
+ * ioat_ring_reserve()
+ */
+int
+ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
+    dcopy_cmd_t cmd)
+{
+	uint64_t dest_addr;
+	uint32_t dest_size;
+	uint64_t src_addr;
+	uint32_t src_size;
+	size_t xfer_size;
+	uint64_t desc;
+	int num_desc;
+	size_t size;
+	int i;
+
+
+	/*
+	 * figure out how many descriptors we need. This can include a dca
+	 * desc and multiple desc for a dma copy.
+	 */
+	num_desc = 0;
+	if ((channel->ic_ver == IOAT_CBv2) &&
+	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
+		num_desc++;
+	}
+	src_addr = cmd->dp.copy.cc_source;
+	dest_addr = cmd->dp.copy.cc_dest;
+	size = cmd->dp.copy.cc_size;
+	while (size > 0) {
+		num_desc++;
+
+		/* adjust for any offset into the page */
+		if ((src_addr & PAGEOFFSET) == 0) {
+			src_size = PAGESIZE;
+		} else {
+			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
+		}
+		if ((dest_addr & PAGEOFFSET) == 0) {
+			dest_size = PAGESIZE;
+		} else {
+			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
+		}
+
+		/* take the smallest of the three */
+		xfer_size = MIN(src_size, dest_size);
+		xfer_size = MIN(xfer_size, size);
+
+		/* go to the next page */
+		src_addr += xfer_size;
+		dest_addr += xfer_size;
+		size -= xfer_size;
+	}
+
+	/* Make sure we have space for these descriptors */
+	desc = ring->cr_desc_next;
+	for (i = 0; i < num_desc; i++) {
+
+		/*
+		 * if this is the last descriptor in the ring, see if the
+		 * last completed descriptor is #0.
+		 */
+		if (desc == ring->cr_desc_last) {
+			if (ring->cr_cmpl_last == 0) {
+				/*
+				 * if we think the ring is full, update where
+				 * the H/W really is and check for full again.
+				 */
+				(void) ioat_cmd_poll(channel, NULL);
+				if (ring->cr_cmpl_last == 0) {
+					return (DCOPY_NORESOURCES);
+				}
+			}
+
+			/*
+			 * go to the next descriptor which is zero in this
+			 * case.
+			 */
+			desc = 0;
+
+		/*
+		 * if this is not the last descriptor in the ring, see if
+		 * the last completion we saw was the next descriptor.
+		 */
+		} else {
+			if ((desc + 1) == ring->cr_cmpl_last) {
+				/*
+				 * if we think the ring is full, update where
+				 * the H/W really is and check for full again.
+				 */
+				(void) ioat_cmd_poll(channel, NULL);
+				if ((desc + 1) == ring->cr_cmpl_last) {
+					return (DCOPY_NORESOURCES);
+				}
+			}
+
+			/* go to the next descriptor */
+			desc++;
+		}
+	}
+
+	return (DCOPY_SUCCESS);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,343 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/file.h>
+#include <sys/open.h>
+#include <sys/modctl.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/sysmacros.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+
+#include <sys/ioat.h>
+
+
+extern void *ioat_statep;
+#define	ptob64(x)	(((uint64_t)(x)) << PAGESHIFT)
+
+static int ioat_ioctl_rdreg(ioat_state_t *state, void *arg, int mode);
+#ifdef	DEBUG
+static int ioat_ioctl_wrreg(ioat_state_t *state, void *arg, int mode);
+static int ioat_ioctl_test(ioat_state_t *state, void *arg, int mode);
+#endif
+
+/*
+ * ioat_ioctl()
+ */
+/*ARGSUSED*/
+int
+ioat_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
+{
+	ioat_state_t *state;
+	int instance;
+	int e;
+
+
+	e = drv_priv(cred);
+	if (e != 0) {
+		return (EPERM);
+	}
+	instance = getminor(dev);
+	if (instance == -1) {
+		return (EBADF);
+	}
+	state = ddi_get_soft_state(ioat_statep, instance);
+	if (state == NULL) {
+		return (EBADF);
+	}
+
+	switch (cmd) {
+	case IOAT_IOCTL_READ_REG:
+		e = ioat_ioctl_rdreg(state, (void *)arg, mode);
+		break;
+#ifdef	DEBUG
+	case IOAT_IOCTL_WRITE_REG:
+		e = ioat_ioctl_wrreg(state, (void *)arg, mode);
+		break;
+	case IOAT_IOCTL_TEST:
+		e = ioat_ioctl_test(state, (void *)arg, mode);
+		break;
+#endif
+
+	default:
+		e = ENXIO;
+	}
+
+	return (e);
+}
+
+
+/*
+ * ioat_ioctl_rdreg()
+ */
+static int
+ioat_ioctl_rdreg(ioat_state_t *state, void *arg, int mode)
+{
+	ioat_ioctl_rdreg_t rdreg;
+	int e;
+
+
+	e = ddi_copyin(arg, &rdreg, sizeof (ioat_ioctl_rdreg_t), mode);
+	if (e != 0) {
+		return (EFAULT);
+	}
+
+	/*
+	 * read a device register, where size is read size in bits, addr is
+	 * the offset into MMIO registers.
+	 */
+	switch (rdreg.size) {
+	case 8:
+		rdreg.data = (uint64_t)ddi_get8(state->is_reg_handle,
+		    (uint8_t *)&state->is_genregs[rdreg.addr]);
+		break;
+	case 16:
+		rdreg.data = (uint64_t)ddi_get16(state->is_reg_handle,
+		    (uint16_t *)&state->is_genregs[rdreg.addr]);
+		break;
+	case 32:
+		rdreg.data = (uint64_t)ddi_get32(state->is_reg_handle,
+		    (uint32_t *)&state->is_genregs[rdreg.addr]);
+		break;
+	case 64:
+		rdreg.data = (uint64_t)ddi_get64(state->is_reg_handle,
+		    (uint64_t *)&state->is_genregs[rdreg.addr]);
+		break;
+	default:
+		return (EFAULT);
+	}
+
+	e = ddi_copyout(&rdreg, arg, sizeof (ioat_ioctl_rdreg_t), mode);
+	if (e != 0) {
+		return (EFAULT);
+	}
+
+	return (0);
+}
+
+
+#ifdef	DEBUG
+/*
+ * ioat_ioctl_wrreg()
+ */
+static int
+ioat_ioctl_wrreg(ioat_state_t *state, void *arg, int mode)
+{
+	ioat_ioctl_wrreg_t wrreg;
+	int e;
+
+
+	e = ddi_copyin(arg, &wrreg, sizeof (ioat_ioctl_wrreg_t), mode);
+	if (e != 0) {
+		return (EFAULT);
+	}
+
+	/*
+	 * write a device register, where size is write size in bits, addr is
+	 * the offset into MMIO registers.
+	 */
+	switch (wrreg.size) {
+	case 8:
+		ddi_put8(state->is_reg_handle,
+		    (uint8_t *)&state->is_genregs[wrreg.addr],
+		    (uint8_t)wrreg.data);
+		break;
+	case 16:
+		ddi_put16(state->is_reg_handle,
+		    (uint16_t *)&state->is_genregs[wrreg.addr],
+		    (uint16_t)wrreg.data);
+		break;
+	case 32:
+		ddi_put32(state->is_reg_handle,
+		    (uint32_t *)&state->is_genregs[wrreg.addr],
+		    (uint32_t)wrreg.data);
+		break;
+	case 64:
+		ddi_put64(state->is_reg_handle,
+		    (uint64_t *)&state->is_genregs[wrreg.addr],
+		    (uint64_t)wrreg.data);
+		break;
+	default:
+		return (EFAULT);
+	}
+
+	return (0);
+}
+
+
+/*
+ * ioat_ioctl_test()
+ */
+/*ARGSUSED*/
+static int
+ioat_ioctl_test(ioat_state_t *state, void *arg, int mode)
+{
+	dcopy_handle_t channel;
+	dcopy_cmd_t cmd;
+	uint8_t *source;
+	uint_t buf_size;
+	uint_t poll_cnt;
+	uint8_t *dest;
+	uint8_t *buf;
+	int flags;
+	int i;
+	int e;
+
+
+	/* allocate 2 paged aligned 4k pages */
+	buf_size = 0x1000;
+	buf = kmem_zalloc((buf_size * 2) + 0x1000, KM_SLEEP);
+	source = (uint8_t *)(((uintptr_t)buf + PAGEOFFSET) & PAGEMASK);
+	dest = source + buf_size;
+
+	/* Init source buffer */
+	for (i = 0; i < buf_size; i++) {
+		source[i] = (uint8_t)(i & 0xFF);
+	}
+
+	/* allocate a DMA channel */
+	e = dcopy_alloc(DCOPY_SLEEP, &channel);
+	if (e != DCOPY_SUCCESS) {
+		cmn_err(CE_CONT, "dcopy_alloc() failed\n");
+		goto testfail_alloc;
+	}
+
+	/*
+	 * post 32 DMA copy's from dest to dest.  These will complete in order
+	 * so they won't stomp on each other. We don't care about the data
+	 * right now which is why we go dest to dest.
+	 */
+	flags = DCOPY_SLEEP;
+	for (i = 0; i < 32; i++) {
+		/*
+		 * if this is the second command, link the commands from here
+		 * on out. We only want to keep track of the last command. We
+		 * will poll on the last command completing (which infers that
+		 * the other commands completed). If any of the previous
+		 * commands fail, so will the last one. Linking the commands
+		 * also allows us to only call free for the last command. free
+		 * will free up the entire chain of commands.
+		 */
+		if (i == 1) {
+			flags |= DCOPY_ALLOC_LINK;
+		}
+		e = dcopy_cmd_alloc(channel, flags, &cmd);
+		if (e != DCOPY_SUCCESS) {
+			cmn_err(CE_CONT, "dcopy_cmd_alloc() failed\n");
+			goto testfail_alloc;
+		}
+
+		ASSERT(cmd->dp_version == DCOPY_CMD_V0);
+		cmd->dp_cmd = DCOPY_CMD_COPY;
+		cmd->dp_flags = DCOPY_CMD_NOFLAGS;
+
+		/* do a bunch of dest to dest DMA's */
+		cmd->dp.copy.cc_source = ptob64(hat_getpfnum(kas.a_hat,
+		    (caddr_t)source)) + ((uintptr_t)dest & PAGEOFFSET);
+		cmd->dp.copy.cc_dest = ptob64(hat_getpfnum(kas.a_hat,
+		    (caddr_t)dest)) + ((uintptr_t)dest & PAGEOFFSET);
+		cmd->dp.copy.cc_size = PAGESIZE;
+
+		e = dcopy_cmd_post(cmd);
+		if (e != DCOPY_SUCCESS) {
+			cmn_err(CE_CONT, "dcopy_post() failed\n");
+			goto testfail_post;
+		}
+	}
+
+	e = dcopy_cmd_alloc(channel, flags, &cmd);
+	if (e != DCOPY_SUCCESS) {
+		cmn_err(CE_CONT, "dcopy_cmd_alloc() failed\n");
+		goto testfail_alloc;
+	}
+
+	/* now queue up the DMA we are going to check status and data for  */
+	cmd->dp_cmd = DCOPY_CMD_COPY;
+	cmd->dp_flags = DCOPY_CMD_INTR;
+	cmd->dp.copy.cc_source = ptob64(hat_getpfnum(kas.a_hat,
+	    (caddr_t)source)) + ((uintptr_t)source & PAGEOFFSET);
+	cmd->dp.copy.cc_dest = ptob64(hat_getpfnum(kas.a_hat,
+	    (caddr_t)dest)) + ((uintptr_t)dest & PAGEOFFSET);
+	cmd->dp.copy.cc_size = PAGESIZE;
+	e = dcopy_cmd_post(cmd);
+	if (e != DCOPY_SUCCESS) {
+		cmn_err(CE_CONT, "dcopy_post() failed\n");
+		goto testfail_post;
+	}
+
+	/* check the status of the last command */
+	poll_cnt = 0;
+	flags = DCOPY_POLL_NOFLAGS;
+	while ((e = dcopy_cmd_poll(cmd, flags)) == DCOPY_PENDING) {
+		poll_cnt++;
+		if (poll_cnt >= 16) {
+			flags |= DCOPY_POLL_BLOCK;
+		}
+	}
+	if (e != DCOPY_COMPLETED) {
+		cmn_err(CE_CONT, "dcopy_poll() failed\n");
+		goto testfail_poll;
+	}
+
+	/* since the cmd's are linked we only need to pass in the last cmd */
+	dcopy_cmd_free(&cmd);
+	dcopy_free(&channel);
+
+	/* verify the data */
+	for (i = 0; i < PAGESIZE; i++) {
+		if (dest[i] != (uint8_t)(i & 0xFF)) {
+			cmn_err(CE_CONT,
+			    "dcopy_data_compare() failed, %p[%d]: %x, %x\n",
+			    (void *)dest, i, dest[i], i & 0xFF);
+			return (-1);
+		}
+	}
+
+	kmem_free(buf, (buf_size * 2) + 0x1000);
+
+	return (0);
+
+testfail_data_compare:
+testfail_poll:
+testfail_post:
+	dcopy_cmd_free(&cmd);
+	dcopy_free(&channel);
+testfail_alloc:
+	kmem_free(buf, (buf_size * 2) + 0x1000);
+
+	return (-1);
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/i86pc/io/ioat/ioat_rs.c	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,246 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/kmem.h>
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+
+#include <sys/ioat.h>
+
+
+/* structure used to keep track of resources */
+typedef struct ioat_rs_s {
+	/*
+	 * Bounds of resource allocation. We will start allocating at rs_min
+	 * and rollover at rs_max+1 (rs_max is included). e.g. for rs_min=0
+	 * and rs_max=7, we will have 8 total resources which can be alloced.
+	 */
+	uint_t rs_min;
+	uint_t rs_max;
+
+	/*
+	 * rs_free points to an array of 64-bit values used to track resource
+	 * allocation. rs_free_size is the free buffer size in bytes.
+	 */
+	uint64_t *rs_free;
+	uint_t rs_free_size;
+
+	/*
+	 * last tracks the last alloc'd resource. This allows us to do a round
+	 * robin allocation.
+	 */
+	uint_t rs_last;
+
+	kmutex_t rs_mutex;
+} ioat_rs_t;
+
+
+/*
+ * ioat_rs_init()
+ *    Initialize the resource structure. This structure will be protected
+ *    by a mutex at the iblock_cookie passed in. init() returns a handle to be
+ *    used for the rest of the resource functions. This code is written assuming
+ *    that min_val will be close to 0. Therefore, we will allocate the free
+ *    buffer only taking max_val into account.
+ */
+void
+ioat_rs_init(ioat_state_t *state, uint_t min_val, uint_t max_val,
+    ioat_rs_hdl_t *handle)
+{
+	ioat_rs_t *rstruct;
+	uint_t array_size;
+	uint_t index;
+
+
+	ASSERT(handle != NULL);
+	ASSERT(min_val < max_val);
+
+	/* alloc space for resource structure */
+	rstruct = kmem_alloc(sizeof (ioat_rs_t), KM_SLEEP);
+
+	/*
+	 * Test to see if the max value is 64-bit aligned. If so, we don't need
+	 * to allocate an extra 64-bit word. alloc space for free buffer
+	 * (8 bytes per uint64_t).
+	 */
+	if ((max_val & 0x3F) == 0) {
+		rstruct->rs_free_size = (max_val >> 6) * 8;
+	} else {
+		rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
+	}
+	rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
+
+	/* Initialize resource structure */
+	rstruct->rs_min = min_val;
+	rstruct->rs_last = min_val;
+	rstruct->rs_max = max_val;
+	mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER,
+	    state->is_iblock_cookie);
+
+	/* Mark all resources as free */
+	array_size = rstruct->rs_free_size >> 3;
+	for (index = 0; index < array_size; index++) {
+		rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
+	}
+
+	/* setup handle which is returned from this function */
+	*handle = rstruct;
+}
+
+
+/*
+ * ioat_rs_fini()
+ *    Frees up the space allocated in init().  Notice that a pointer to the
+ *    handle is used for the parameter.  fini() will set the handle to NULL
+ *    before returning.
+ */
+void
+ioat_rs_fini(ioat_rs_hdl_t *handle)
+{
+	ioat_rs_t *rstruct;
+
+
+	ASSERT(handle != NULL);
+
+	rstruct = (ioat_rs_t *)*handle;
+
+	mutex_destroy(&rstruct->rs_mutex);
+	kmem_free(rstruct->rs_free, rstruct->rs_free_size);
+	kmem_free(rstruct, sizeof (ioat_rs_t));
+
+	/* set handle to null.  This helps catch bugs. */
+	*handle = NULL;
+}
+
+
+/*
+ * ioat_rs_alloc()
+ *    alloc a resource. If alloc fails, we are out of resources.
+ */
+int
+ioat_rs_alloc(ioat_rs_hdl_t handle, uint_t *resource)
+{
+	ioat_rs_t *rstruct;
+	uint_t array_idx;
+	uint64_t free;
+	uint_t index;
+	uint_t last;
+	uint_t min;
+	uint_t max;
+
+
+	ASSERT(handle != NULL);
+	ASSERT(resource != NULL);
+
+	rstruct = (ioat_rs_t *)handle;
+
+	mutex_enter(&rstruct->rs_mutex);
+	min = rstruct->rs_min;
+	max = rstruct->rs_max;
+
+	/*
+	 * Find a free resource. This will return out of the loop once it finds
+	 * a free resource. There are a total of 'max'-'min'+1 resources.
+	 * Performs a round robin allocation.
+	 */
+	for (index = min; index <= max; index++) {
+
+		array_idx = rstruct->rs_last >> 6;
+		free = rstruct->rs_free[array_idx];
+		last = rstruct->rs_last & 0x3F;
+
+		/* if the next resource to check is free */
+		if ((free & ((uint64_t)1 << last)) != 0) {
+			/* we are using this resource */
+			*resource = rstruct->rs_last;
+
+			/* take it out of the free list */
+			rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
+
+			/*
+			 * increment the last count so we start checking the
+			 * next resource on the next alloc().  Note the rollover
+			 * at 'max'+1.
+			 */
+			rstruct->rs_last++;
+			if (rstruct->rs_last > max) {
+				rstruct->rs_last = rstruct->rs_min;
+			}
+
+			/* unlock the resource structure */
+			mutex_exit(&rstruct->rs_mutex);
+
+			return (DDI_SUCCESS);
+		}
+
+		/*
+		 * This resource is not free, lets go to the next one. Note the
+		 * rollover at 'max'.
+		 */
+		rstruct->rs_last++;
+		if (rstruct->rs_last > max) {
+			rstruct->rs_last = rstruct->rs_min;
+		}
+	}
+
+	mutex_exit(&rstruct->rs_mutex);
+
+	return (DDI_FAILURE);
+}
+
+
+/*
+ * ioat_rs_free()
+ *    Free the previously alloc'd resource.  Once a resource has been free'd,
+ *    it can be used again when alloc is called.
+ */
+void
+ioat_rs_free(ioat_rs_hdl_t handle, uint_t resource)
+{
+	ioat_rs_t *rstruct;
+	uint_t array_idx;
+	uint_t offset;
+
+
+	ASSERT(handle != NULL);
+
+	rstruct = (ioat_rs_t *)handle;
+	ASSERT(resource >= rstruct->rs_min);
+	ASSERT(resource <= rstruct->rs_max);
+
+	mutex_enter(&rstruct->rs_mutex);
+
+	/* Put the resource back in the free list */
+	array_idx = resource >> 6;
+	offset = resource & 0x3F;
+	rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
+
+	mutex_exit(&rstruct->rs_mutex);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/i86pc/ioat/Makefile	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/ioat/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+#	This makefile drives the production of the ioat driver kernel
+#	module.
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= ioat
+OBJECTS		= $(IOAT_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(IOAT_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(ROOT_PSM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR     = $(UTSBASE)/i86pc/io/ioat
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY) $(SRC_CONFILE)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+#	Dependency
+#
+LDFLAGS		+= -dy -Nmisc/dcopy
+
+#
+#	Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR	 = $(OBJS_DIR)
+CLEANFILES	+= $(MODSTUBS_O)
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/i86pc/sys/ioat.h	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,359 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_IOAT_H
+#define	_SYS_IOAT_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/dcopy.h>
+#include <sys/dcopy_device.h>
+
+
+/* ioat ioctls */
+#define	IOATIOC			('T'<< 8)
+typedef enum {
+	IOAT_IOCTL_WRITE_REG	= (IOATIOC | 0x0),
+	IOAT_IOCTL_READ_REG	= (IOATIOC | 0x1),
+	IOAT_IOCTL_TEST		= (IOATIOC | 0x2)
+} ioat_ioctl_enum_t;
+
+typedef struct ioat_ioctl_reg_s {
+	uint_t		size;
+	uint_t		addr;
+	uint64_t	data;
+} ioat_ioctl_reg_t;
+typedef ioat_ioctl_reg_t ioat_ioctl_wrreg_t;
+typedef ioat_ioctl_reg_t ioat_ioctl_rdreg_t;
+
+#ifdef _KERNEL
+/* *** Driver Private Below *** */
+
+/* IOAT_DMACAPABILITY flags */
+#define	IOAT_DMACAP_PAGEBREAK	0x1
+#define	IOAT_DMACAP_CRC		0x2
+#define	IOAT_DMACAP_MARKERSKIP	0x4
+#define	IOAT_DMACAP_XOR		0x8
+#define	IOAT_DMACAP_DCA		0x10
+
+/* IOAT_INTRCTL bits */
+#define	IOAT_INTRCTL_MASTER_EN	0x1
+#define	IOAT_INTRCTL_INTR_STAT	0x2
+
+/* MMIO Registers */
+#define	IOAT_CHANCNT		0x0	/* 8-bit */
+#define	IOAT_XFERCAP		0x1	/* 8-bit */
+#define	IOAT_GENCTRL		0x2	/* 8-bit */
+#define	IOAT_INTRCTL		0x3	/* 8-bit */
+#define	IOAT_ATTNSTATUS		0x4	/* 32-bit */
+#define	IOAT_CBVER		0x8	/* 8-bit */
+#define	IOAT_PERPORT_OFF	0xA	/* 16-bit */
+#define	IOAT_INTRDELAY		0xC	/* 16-bit */
+#define	IOAT_CSSTATUS		0xE	/* 16-bit */
+#define	IOAT_DMACAPABILITY	0x10	/* 32-bit */
+
+#define	IOAT_CHANNELREG_OFFSET	0x80
+
+/* Channel Registers */
+#define	IOAT_CHAN_CTL		0x0	/* 16-bit */
+#define	IOAT_CHAN_COMP		0x2	/* 16-bit */
+#define	IOAT_CHAN_CMPL_LO	0x18	/* 32-bit */
+#define	IOAT_CHAN_CMPL_HI	0x1C	/* 32-bit */
+#define	IOAT_CHAN_ERR		0x28	/* 32-bit */
+#define	IOAT_CHAN_ERRMASK	0x2C	/* 32-bit */
+#define	IOAT_CHAN_DCACTRL	0x30	/* 32-bit */
+
+#define	IOAT_V1_CHAN_STS_LO	0x4	/* 32-bit */
+#define	IOAT_V1_CHAN_STS_HI	0x8	/* 32-bit */
+#define	IOAT_V1_CHAN_ADDR_LO	0x0C	/* 32-bit */
+#define	IOAT_V1_CHAN_ADDR_HI	0x10	/* 32-bit */
+#define	IOAT_V1_CHAN_CMD	0x14	/* 8-bit */
+
+#define	IOAT_V2_CHAN_CMD	0x4	/* 8-bit */
+#define	IOAT_V2_CHAN_CNT	0x6	/* 16-bit */
+#define	IOAT_V2_CHAN_STS_LO	0x8	/* 32-bit */
+#define	IOAT_V2_CHAN_STS_HI	0xC	/* 32-bit */
+#define	IOAT_V2_CHAN_ADDR_LO	0x10	/* 32-bit */
+#define	IOAT_V2_CHAN_ADDR_HI	0x14	/* 32-bit */
+
+#define	IOAT_CHAN_STS_ADDR_MASK		0xFFFFFFFFFFFFFFC0
+#define	IOAT_CHAN_STS_XFER_MASK		0x3F
+#define	IOAT_CHAN_STS_FAIL_MASK		0x6
+#define	IOAT_CMPL_INDEX(channel)	\
+	(((*channel->ic_cmpl & IOAT_CHAN_STS_ADDR_MASK) - \
+	ring->cr_phys_desc) >> 6)
+#define	IOAT_CMPL_FAILED(channel)	\
+	(*channel->ic_cmpl & IOAT_CHAN_STS_FAIL_MASK)
+
+
+typedef struct ioat_chan_desc_s {
+	uint32_t	dd_res0;
+	uint32_t	dd_ctrl;
+	uint64_t	dd_res1;
+	uint64_t	dd_res2;
+	uint64_t	dd_next_desc;
+	uint64_t	dd_res4;
+	uint64_t	dd_res5;
+	uint64_t	dd_res6;
+	uint64_t	dd_res7;
+} ioat_chan_desc_t;
+
+/* dca dd_ctrl bits */
+#define	IOAT_DESC_CTRL_OP_CNTX	((uint32_t)0xFF << 24)
+#define	IOAT_DESC_CTRL_CNTX_CHNG	0x1
+typedef struct ioat_chan_dca_desc_s {
+	uint32_t	dd_cntx;
+	uint32_t	dd_ctrl;
+	uint64_t	dd_res1;
+	uint64_t	dd_res2;
+	uint64_t	dd_next_desc;
+	uint64_t	dd_res4;
+	uint64_t	dd_res5;
+	uint64_t	dd_res6;
+	uint64_t	dd_res7;
+} ioat_chan_dca_desc_t;
+
+/* dma dd_ctrl bits */
+#define	IOAT_DESC_CTRL_OP_DMA	(0x0 << 24)
+#define	IOAT_DESC_DMACTRL_NULL	0x20
+#define	IOAT_DESC_CTRL_FENCE	0x10
+#define	IOAT_DESC_CTRL_CMPL	0x8
+#define	IOAT_DESC_CTRL_INTR	0x1
+typedef struct ioat_chan_dma_desc_s {
+	uint32_t	dd_size;
+	uint32_t	dd_ctrl;
+	uint64_t	dd_src_paddr;
+	uint64_t	dd_dest_paddr;
+	uint64_t	dd_next_desc;
+	uint64_t	dd_next_src_paddr;	/* v2 only */
+	uint64_t	dd_next_dest_paddr;	/* v2 only */
+	uint64_t	dd_res6;
+	uint64_t	dd_res7;
+} ioat_chan_dma_desc_t;
+
+
+typedef enum {
+	IOAT_CBv1,
+	IOAT_CBv2
+} ioat_version_t;
+
+/* ioat private data per command */
+typedef struct ioat_cmd_private_s {
+	uint64_t	ip_generation;
+	uint64_t	ip_index;
+	dcopy_cmd_t	ip_next;
+} ioat_cmd_private_t;
+
+/* descriptor ring state */
+typedef struct ioat_channel_ring_s {
+	/* protects cr_cmpl_gen & cr_cmpl_last */
+	kmutex_t		cr_cmpl_mutex;
+
+	/* desc ring generation for the last completion we saw */
+	uint64_t		cr_cmpl_gen;
+
+	/* last descriptor index we saw complete */
+	uint64_t		cr_cmpl_last;
+
+	/* protects cr_desc_* */
+	kmutex_t		cr_desc_mutex;
+
+	/*
+	 * last descriptor posted. used to update its next pointer when we
+	 * add a new desc. Also used to tack the completion (See comment for
+	 * cr_desc_gen_prev).
+	 */
+	uint64_t		cr_desc_prev;
+
+	/* where to put the next descriptor */
+	uint64_t		cr_desc_next;
+
+	/* what the current desc ring generation is */
+	uint64_t		cr_desc_gen;
+
+	/*
+	 * used during cmd_post to track the last desc posted. cr_desc_next
+	 * and cr_desc_gen will be pointing to the next free desc after
+	 * writing the descriptor to the ring. But we want to track the
+	 * completion for the last descriptor posted.
+	 */
+	uint64_t		cr_desc_gen_prev;
+
+	/* the last desc in the ring (for wrap) */
+	uint64_t		cr_desc_last;
+
+	/* pointer to the head of the ring */
+	ioat_chan_desc_t	*cr_desc;
+
+	/* physical address of the head of the ring */
+	uint64_t		cr_phys_desc;
+
+	/* back pointer to the channel state */
+	struct ioat_channel_s	*cr_chan;
+
+	/* for CB v2, number of desc posted (written to IOAT_V2_CHAN_CNT) */
+	uint_t			cr_post_cnt;
+} ioat_channel_ring_t;
+
+/* track channel state so we can handle a failure */
+typedef enum {
+	IOAT_CHANNEL_OK = 0,
+	IOAT_CHANNEL_IN_FAILURE = 1
+} ic_channel_state_t;
+
+typedef struct ioat_channel_s *ioat_channel_t;
+struct ioat_channel_s {
+	/* channel's ring state */
+	ioat_channel_ring_t	*ic_ring;
+
+	/* IOAT_CBv1 || IOAT_CBv2 */
+	ioat_version_t		ic_ver;
+
+	/*
+	 * state to determine if it's OK to post the the channel and if all
+	 * future polls should return failure.
+	 */
+	ic_channel_state_t	ic_channel_state;
+
+	/* channel command cache (*_cmd_alloc, *_cmd_free, etc) */
+	kmem_cache_t		*ic_cmd_cache;
+
+	/* dcopy state for dcopy_device_channel_notify() call */
+	dcopy_handle_t		ic_dcopy_handle;
+
+	/* location in memory where completions are DMA'ed into */
+	volatile uint64_t	*ic_cmpl;
+
+	/* channel specific registers */
+	uint8_t			*ic_regs;
+
+	/* if this channel is using DCA */
+	boolean_t		ic_dca_active;
+
+	/* DCA ID the channel is currently pointing to */
+	uint32_t		ic_dca_current;
+
+	/* devices channel number */
+	uint_t			ic_chan_num;
+
+	/* number of descriptors in ring */
+	uint_t			ic_chan_desc_cnt;
+
+	/* descriptor ring alloc state */
+	ddi_dma_handle_t	ic_desc_dma_handle;
+	size_t			ic_desc_alloc_size;
+	ddi_acc_handle_t	ic_desc_handle;
+	ddi_dma_cookie_t	ic_desc_cookies;
+
+	/* completion buffer alloc state */
+	ddi_dma_handle_t	ic_cmpl_dma_handle;
+	size_t			ic_cmpl_alloc_size;
+	ddi_acc_handle_t	ic_cmpl_handle;
+	ddi_dma_cookie_t	ic_cmpl_cookie;
+	uint64_t		ic_phys_cmpl;
+
+	/* if inuse, we need to re-init the channel during resume */
+	boolean_t		ic_inuse;
+
+	/* backpointer to driver state */
+	struct ioat_state_s	*ic_state;
+};
+
+typedef struct ioat_rs_s *ioat_rs_hdl_t;
+
+/* driver state */
+typedef struct ioat_state_s {
+	dev_info_t		*is_dip;
+	int			is_instance;
+
+	kmutex_t		is_mutex;
+
+	/* register handle and pointer to registers */
+	ddi_acc_handle_t	is_reg_handle;
+	uint8_t			*is_genregs;
+
+	/* IOAT_CBv1 || IOAT_CBv2 */
+	ioat_version_t		is_ver;
+
+	/* channel state */
+	ioat_channel_t		is_channel;
+	size_t			is_chansize;
+	ioat_rs_hdl_t		is_channel_rs;
+
+	ddi_iblock_cookie_t	is_iblock_cookie;
+
+	/* device info */
+	uint_t			is_chanoff;
+	uint_t			is_num_channels;
+	uint_t			is_maxxfer;
+	uint_t			is_cbver;
+	uint_t			is_intrdelay;
+	uint_t			is_status;
+	uint_t			is_capabilities;
+
+	/* dcopy_device_register()/dcopy_device_unregister() state */
+	dcopy_device_handle_t	is_device_handle;
+	dcopy_device_info_t	is_deviceinfo;
+} ioat_state_t;
+
+
+int ioat_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
+    int *rval);
+
+void ioat_rs_init(ioat_state_t *state, uint_t min_val, uint_t max_val,
+    ioat_rs_hdl_t *handle);
+void ioat_rs_fini(ioat_rs_hdl_t *handle);
+int ioat_rs_alloc(ioat_rs_hdl_t handle, uint_t *rs);
+void ioat_rs_free(ioat_rs_hdl_t handle, uint_t rs);
+
+int ioat_channel_init(ioat_state_t *state);
+void ioat_channel_fini(ioat_state_t *state);
+void ioat_channel_suspend(ioat_state_t *state);
+int ioat_channel_resume(ioat_state_t *state);
+
+int ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags,
+    uint_t size, dcopy_query_channel_t *info, void *channel_private);
+void ioat_channel_free(void *channel_private);
+void ioat_channel_intr(ioat_channel_t channel);
+int ioat_cmd_alloc(void *channel, int flags, dcopy_cmd_t *cmd);
+void ioat_cmd_free(void *channel, dcopy_cmd_t *cmd);
+int ioat_cmd_post(void *channel, dcopy_cmd_t cmd);
+int ioat_cmd_poll(void *channel, dcopy_cmd_t cmd);
+void ioat_unregister_complete(void *device_private, int status);
+
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_IOAT_H */
--- a/usr/src/uts/i86xpv/Makefile.files	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/i86xpv/Makefile.files	Fri May 23 20:14:10 2008 -0700
@@ -179,12 +179,13 @@
 #
 #			driver & misc modules
 #
-ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o
+BALLOON_OBJS += balloon_drv.o
 DOMCAPS_OBJS += domcaps.o
-BALLOON_OBJS += balloon_drv.o
 EVTCHN_OBJS += evtchn_dev.o
 GFX_PRIVATE_OBJS += gfx_private.o gfxp_pci.o gfxp_segmap.o \
 		    gfxp_devmap.o gfxp_vgatext.o gfxp_vm.o vgasubr.o
+IOAT_OBJS += ioat.o ioat_rs.o ioat_ioctl.o ioat_chan.o
+ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o
 PCI_E_MISC_OBJS += pcie.o pcie_fault.o
 PCI_E_NEXUS_OBJS += npe.o npe_misc.o
 PCI_E_NEXUS_OBJS += pci_common.o pci_kstats.o pci_tools.o
--- a/usr/src/uts/i86xpv/Makefile.i86xpv.shared	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/i86xpv/Makefile.i86xpv.shared	Fri May 23 20:14:10 2008 -0700
@@ -240,6 +240,7 @@
 #
 
 DRV_KMODS	+= rootnex
+DRV_KMODS	+= ioat
 DRV_KMODS	+= isa
 DRV_KMODS	+= pci
 DRV_KMODS	+= npe
--- a/usr/src/uts/i86xpv/Makefile.rules	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/i86xpv/Makefile.rules	Fri May 23 20:14:10 2008 -0700
@@ -57,6 +57,10 @@
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
 
+$(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/ioat/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 $(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/pci/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
@@ -215,6 +219,9 @@
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/common/cpr/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
+$(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/ioat/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/pci/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/i86xpv/ioat/Makefile	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86xpv/ioat/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+#
+#	This makefile drives the production of the ioat driver kernel
+#	module.
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= ioat
+OBJECTS		= $(IOAT_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(IOAT_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(ROOT_PSM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR     = $(UTSBASE)/i86pc/io/ioat
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/i86xpv/Makefile.i86xpv
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY) $(SRC_CONFILE)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+LINTTAGS	+= -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+#	Dependency
+#
+LDFLAGS		+= -dy -Nmisc/dcopy
+
+#
+#	Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR	 = $(OBJS_DIR)
+CLEANFILES	+= $(MODSTUBS_O)
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/i86xpv/Makefile.targ
+
--- a/usr/src/uts/intel/Makefile.files	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/intel/Makefile.files	Fri May 23 20:14:10 2008 -0700
@@ -138,6 +138,7 @@
 CMLB_OBJS += cmlb.o
 CPUNEX_OBJS += cpunex.o
 DADK_OBJS += dadk.o
+DCOPY_OBJS += dcopy.o
 DNET_OBJS += dnet.o mii.o
 FD_OBJS += fd.o
 GDA_OBJS += gda.o
--- a/usr/src/uts/intel/Makefile.intel.shared	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/intel/Makefile.intel.shared	Fri May 23 20:14:10 2008 -0700
@@ -528,6 +528,7 @@
 MISC_KMODS	+= consconfig
 MISC_KMODS	+= ctf
 MISC_KMODS	+= dadk
+MISC_KMODS	+= dcopy
 MISC_KMODS	+= dls
 MISC_KMODS	+= drm
 MISC_KMODS	+= fssnap_if
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/usr/src/uts/intel/dcopy/Makefile	Fri May 23 20:14:10 2008 -0700
@@ -0,0 +1,84 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/dcopy/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+#	This makefile drives the production of the dcopy
+#	kernel module.
+#
+#	intel architecture dependent
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= dcopy
+OBJECTS		= $(DCOPY_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(DCOPY_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(ROOT_MISC_DIR)/$(MODULE)
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE)
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/uts/intel/ia32/ml/modstubs.s	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s	Fri May 23 20:14:10 2008 -0700
@@ -1313,6 +1313,22 @@
 	END_MODULE(kssl);
 #endif
 
+/*
+ * Stubs for dcopy, for Intel IOAT KAPIs
+ */
+#ifndef DCOPY_MODULE
+	MODULE(dcopy,misc);
+	NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void);
+	NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one);
+	END_MODULE(dcopy);
+#endif
+
 / this is just a marker for the area of text that contains stubs 
 
 	ENTRY_NP(stubs_end)
--- a/usr/src/uts/sparc/ml/modstubs.s	Fri May 23 18:47:44 2008 -0700
+++ b/usr/src/uts/sparc/ml/modstubs.s	Fri May 23 20:14:10 2008 -0700
@@ -1265,6 +1265,22 @@
 	END_MODULE(kssl);
 #endif
 
+/*
+ * Stubs for dcopy, for Intel IOAT KAPIs
+ */
+#ifndef DCOPY_MODULE
+	MODULE(dcopy,misc);
+	NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void);
+	NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one);
+	NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one);
+	END_MODULE(dcopy);
+#endif
+
 ! this is just a marker for the area of text that contains stubs
 	.seg ".text"
 	.global stubs_end