Mercurial > illumos > illumos-gate
changeset 6707:c3bc7e4da11b
6582335 TCP/IP receive-side zero CPU copy for support of Intel I/OAT hardware
6582330 sodirect - socket direct, for support of Intel I/OAT hardware
6582323 uioa - uio asynchronous, for support of Intel I/OAT hardware
6567008 driver for intel ioat v1 & v2 DMA engine needed
6691281 tcp->tcp_sodirect == 0, file: ../../common/inet/tcp/tcp_fusion .c, line: 291
6693127 putback for 6567008/6582323/6582330/6582335 causes PANIC when using SCTP sockets
6694188 Solaris PIT test net/tcp/tests/win0_urg_processing timed out
6694389 assertion failed: ((tcp)->tcp_sodirect == 0 || !((tcp)->tcp_sodirect->sod_state & 0x0001))
line wrap: on
line diff
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/Makefile Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#pragma ident "%Z%%M% %I% %E% SMI" -# - -include ../Makefile.com - -TMPLFILES += postinstall preremove -DATAFILES += depend - -.KEEP_STATE: - -all: $(FILES) -install: all pkg - -include ../Makefile.targ -include ../Makefile.prtarg
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# - -# -# This required package information file describes characteristics of the -# package, such as package abbreviation, full package name, package version, -# and package architecture. -# -PKG="SUNWdcopy" -NAME="Sun dcopy DMA drivers" -ARCH="i386" -CATEGORY="system" -BASEDIR=/ -SUNW_PKGVERS="1.0" -SUNW_PKGTYPE="root" -CLASSES="none" -DESC="Sun dcopy DMA drivers" -SUNW_PRODNAME="SunOS" -SUNW_PRODVERS="RELEASE/VERSION" -VERSION="ONVERS,REV=0.0.0" -VENDOR="Sun Microsystems, Inc." -HOTLINE="Please contact your local service provider" -EMAIL="" -MAXINST="1000" -SUNW_PKG_ALLZONES="true" -SUNW_PKG_HOLLOW="true" -SUNW_PKG_THISZONE="false"
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -#!/bin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" -# - -include drv_utils - -CB1='"pciex8086,1a38" "pciex8086,360b"' -CB2='"pciex8086,402f"' - -pkg_drvadd -i "'$CB1 $CB2'" ioat || exit 1
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#!/sbin/sh -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" -# - -include drv_utils - -pkg_drvrem ioat || exit 1 -
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_com Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,53 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" -# -# This required package information file contains a list of package contents. -# The 'pkgmk' command uses this file to identify the contents of a package -# and their location on the development machine when building the package. -# Can be created via a text editor or through use of the 'pkgproto' command. - -#!search <pathname pathname ...> # where to find pkg objects -#!include <filename> # include another 'prototype' file -#!default <mode> <owner> <group> # default used if not specified on entry -#!<param>=<value> # puts parameter in pkg environment - -# -# packaging files -i copyright -i depend -i pkginfo -i postinstall -i preremove - -# -# source locations relative to the prototype file -# -# -# SUNWdcopy -# -d none kernel 0755 root sys -d none kernel/misc 0755 root sys -f none kernel/misc/dcopy 0755 root sys
--- a/deleted_files/usr/src/pkgdefs/SUNWdcopy/prototype_i386 Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# -# This required package information file contains a list of package contents. -# The 'pkgmk' command uses this file to identify the contents of a package -# and their location on the development machine when building the package. -# Can be created via a text editor or through use of the 'pkgproto' command. - -#!search <pathname pathname ...> # where to find pkg objects -#!include <filename> # include another 'prototype' file -#!default <mode> <owner> <group> # default used if not specified on entry -#!<param>=<value> # puts parameter in pkg environment -# -# -# Include ISA independent files (prototype_com) -# -!include prototype_com -# -# -# List files which are i386 specific here -# -# SUNWioat -# -d none kernel/misc/amd64 0755 root sys -f none kernel/misc/amd64/dcopy 0755 root sys -d none platform 0755 root sys -d none platform/i86pc 0755 root sys -d none platform/i86pc/kernel 0755 root sys -d none platform/i86pc/kernel/drv 0755 root sys -f none platform/i86pc/kernel/drv/ioat 755 root sys -f none platform/i86pc/kernel/drv/ioat.conf 644 root sys -d none platform/i86pc/kernel/drv/amd64 0755 root sys -f none platform/i86pc/kernel/drv/amd64/ioat 755 root sys -d none platform/i86xpv 0755 root sys -d none platform/i86xpv/kernel 0755 root sys -d none platform/i86xpv/kernel/drv 0755 root sys -f none platform/i86xpv/kernel/drv/ioat 755 root sys -f none platform/i86xpv/kernel/drv/ioat.conf 644 root sys -d none platform/i86xpv/kernel/drv/amd64 0755 root sys -f none platform/i86xpv/kernel/drv/amd64/ioat 755 root sys
--- a/deleted_files/usr/src/uts/common/io/dcopy.c Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,932 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * dcopy.c - * dcopy misc module - */ - -#include <sys/conf.h> -#include <sys/kmem.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/modctl.h> -#include <sys/sysmacros.h> -#include <sys/atomic.h> - - -#include <sys/dcopy.h> -#include <sys/dcopy_device.h> - - -/* Number of entries per channel to allocate */ -uint_t dcopy_channel_size = 1024; - - -typedef struct dcopy_list_s { - list_t dl_list; - kmutex_t dl_mutex; - uint_t dl_cnt; /* num entries on list */ -} dcopy_list_t; - -/* device state for register/unregister */ -struct dcopy_device_s { - /* DMA device drivers private pointer */ - void *dc_device_private; - - /* to track list of channels from this DMA device */ - dcopy_list_t dc_devchan_list; - list_node_t dc_device_list_node; - - /* - * dc_removing_cnt track how many channels still have to be freed up - * before it's safe to allow the DMA device driver to detach. - */ - uint_t dc_removing_cnt; - dcopy_device_cb_t *dc_cb; - - dcopy_device_info_t dc_info; - -}; - -typedef struct dcopy_stats_s { - kstat_named_t cs_bytes_xfer; - kstat_named_t cs_cmd_alloc; - kstat_named_t cs_cmd_post; - kstat_named_t cs_cmd_poll; - kstat_named_t cs_notify_poll; - kstat_named_t cs_notify_pending; - kstat_named_t cs_id; - kstat_named_t cs_capabilities; -} dcopy_stats_t; - -/* DMA channel state */ -struct dcopy_channel_s { - /* DMA driver channel private pointer */ - void *ch_channel_private; - - /* shortcut to device callbacks */ - dcopy_device_cb_t *ch_cb; - - /* - * number of outstanding allocs for this channel. used to track when - * it's safe to free up this channel so the DMA device driver can - * detach. - */ - uint64_t ch_ref_cnt; - - /* state for if channel needs to be removed when ch_ref_cnt gets to 0 */ - boolean_t ch_removing; - - list_node_t ch_devchan_list_node; - list_node_t ch_globalchan_list_node; - - /* - * per channel list of commands actively blocking waiting for - * completion. - */ - dcopy_list_t ch_poll_list; - - /* pointer back to our device */ - struct dcopy_device_s *ch_device; - - dcopy_query_channel_t ch_info; - - kstat_t *ch_kstat; - dcopy_stats_t ch_stat; -}; - -/* - * If grabbing both device_list mutex & globalchan_list mutex, - * Always grab globalchan_list mutex before device_list mutex - */ -typedef struct dcopy_state_s { - dcopy_list_t d_device_list; - dcopy_list_t d_globalchan_list; -} dcopy_state_t; -dcopy_state_t *dcopy_statep; - - -/* Module Driver Info */ -static struct modlmisc dcopy_modlmisc = { - &mod_miscops, - "dcopy kernel module" -}; - -/* Module Linkage */ -static struct modlinkage dcopy_modlinkage = { - MODREV_1, - &dcopy_modlmisc, - NULL -}; - -static int dcopy_init(); -static void dcopy_fini(); - -static int dcopy_list_init(dcopy_list_t *list, size_t node_size, - offset_t link_offset); -static void dcopy_list_fini(dcopy_list_t *list); -static void dcopy_list_push(dcopy_list_t *list, void *list_node); -static void *dcopy_list_pop(dcopy_list_t *list); - -static void dcopy_device_cleanup(dcopy_device_handle_t device, - boolean_t do_callback); - -static int dcopy_stats_init(dcopy_handle_t channel); -static void dcopy_stats_fini(dcopy_handle_t channel); - - -/* - * _init() - */ -int -_init() -{ - int e; - - e = dcopy_init(); - if (e != 0) { - return (e); - } - - return (mod_install(&dcopy_modlinkage)); -} - - -/* - * _info() - */ -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&dcopy_modlinkage, modinfop)); -} - - -/* - * _fini() - */ -int -_fini() -{ - int e; - - e = mod_remove(&dcopy_modlinkage); - if (e != 0) { - return (e); - } - - dcopy_fini(); - - return (e); -} - -/* - * dcopy_init() - */ -static int -dcopy_init() -{ - int e; - - - dcopy_statep = kmem_zalloc(sizeof (*dcopy_statep), KM_SLEEP); - - /* Initialize the list we use to track device register/unregister */ - e = dcopy_list_init(&dcopy_statep->d_device_list, - sizeof (struct dcopy_device_s), - offsetof(struct dcopy_device_s, dc_device_list_node)); - if (e != DCOPY_SUCCESS) { - goto dcopyinitfail_device; - } - - /* Initialize the list we use to track all DMA channels */ - e = dcopy_list_init(&dcopy_statep->d_globalchan_list, - sizeof (struct dcopy_channel_s), - offsetof(struct dcopy_channel_s, ch_globalchan_list_node)); - if (e != DCOPY_SUCCESS) { - goto dcopyinitfail_global; - } - - return (0); - -dcopyinitfail_cback: - dcopy_list_fini(&dcopy_statep->d_globalchan_list); -dcopyinitfail_global: - dcopy_list_fini(&dcopy_statep->d_device_list); -dcopyinitfail_device: - kmem_free(dcopy_statep, sizeof (*dcopy_statep)); - - return (-1); -} - - -/* - * dcopy_fini() - */ -static void -dcopy_fini() -{ - /* - * if mod_remove was successfull, we shouldn't have any - * devices/channels to worry about. - */ - ASSERT(list_head(&dcopy_statep->d_globalchan_list.dl_list) == NULL); - ASSERT(list_head(&dcopy_statep->d_device_list.dl_list) == NULL); - - dcopy_list_fini(&dcopy_statep->d_globalchan_list); - dcopy_list_fini(&dcopy_statep->d_device_list); - kmem_free(dcopy_statep, sizeof (*dcopy_statep)); -} - - -/* *** EXTERNAL INTERFACE *** */ -/* - * dcopy_query() - */ -void -dcopy_query(dcopy_query_t *query) -{ - query->dq_version = DCOPY_QUERY_V0; - query->dq_num_channels = dcopy_statep->d_globalchan_list.dl_cnt; -} - - -/* - * dcopy_alloc() - */ -/*ARGSUSED*/ -int -dcopy_alloc(int flags, dcopy_handle_t *handle) -{ - dcopy_handle_t channel; - dcopy_list_t *list; - - - /* - * we don't use the dcopy_list_* code here because we need to due - * some non-standard stuff. - */ - - list = &dcopy_statep->d_globalchan_list; - - /* - * if nothing is on the channel list, return DCOPY_NORESOURCES. This - * can happen if there aren't any DMA device registered. - */ - mutex_enter(&list->dl_mutex); - channel = list_head(&list->dl_list); - if (channel == NULL) { - mutex_exit(&list->dl_mutex); - return (DCOPY_NORESOURCES); - } - - /* - * increment the reference count, and pop the channel off the head and - * push it on the tail. This ensures we rotate through the channels. - * DMA channels are shared. - */ - channel->ch_ref_cnt++; - list_remove(&list->dl_list, channel); - list_insert_tail(&list->dl_list, channel); - mutex_exit(&list->dl_mutex); - - *handle = (dcopy_handle_t)channel; - return (DCOPY_SUCCESS); -} - - -/* - * dcopy_free() - */ -void -dcopy_free(dcopy_handle_t *channel) -{ - dcopy_device_handle_t device; - dcopy_list_t *list; - boolean_t cleanup; - - - ASSERT(*channel != NULL); - - /* - * we don't need to add the channel back to the list since we never - * removed it. decrement the reference count. - */ - list = &dcopy_statep->d_globalchan_list; - mutex_enter(&list->dl_mutex); - (*channel)->ch_ref_cnt--; - - /* - * if we need to remove this channel, and the reference count is down - * to 0, decrement the number of channels which still need to be - * removed on the device. - */ - if ((*channel)->ch_removing && ((*channel)->ch_ref_cnt == 0)) { - cleanup = B_FALSE; - device = (*channel)->ch_device; - mutex_enter(&device->dc_devchan_list.dl_mutex); - device->dc_removing_cnt--; - if (device->dc_removing_cnt == 0) { - cleanup = B_TRUE; - } - mutex_exit(&device->dc_devchan_list.dl_mutex); - } - mutex_exit(&list->dl_mutex); - - /* - * if there are no channels which still need to be removed, cleanup the - * device state and call back into the DMA device driver to tell them - * the device is free. - */ - if (cleanup) { - dcopy_device_cleanup(device, B_TRUE); - } - - *channel = NULL; -} - - -/* - * dcopy_query_channel() - */ -void -dcopy_query_channel(dcopy_handle_t channel, dcopy_query_channel_t *query) -{ - *query = channel->ch_info; -} - - -/* - * dcopy_cmd_alloc() - */ -int -dcopy_cmd_alloc(dcopy_handle_t handle, int flags, dcopy_cmd_t *cmd) -{ - dcopy_handle_t channel; - dcopy_cmd_priv_t priv; - int e; - - - channel = handle; - - atomic_inc_64(&channel->ch_stat.cs_cmd_alloc.value.ui64); - e = channel->ch_cb->cb_cmd_alloc(channel->ch_channel_private, flags, - cmd); - if (e == DCOPY_SUCCESS) { - priv = (*cmd)->dp_private; - priv->pr_channel = channel; - /* - * we won't initialize the blocking state until we actually - * need to block. - */ - priv->pr_block_init = B_FALSE; - } - - return (e); -} - - -/* - * dcopy_cmd_free() - */ -void -dcopy_cmd_free(dcopy_cmd_t *cmd) -{ - dcopy_handle_t channel; - dcopy_cmd_priv_t priv; - - - ASSERT(*cmd != NULL); - - priv = (*cmd)->dp_private; - channel = priv->pr_channel; - - /* if we initialized the blocking state, clean it up too */ - if (priv->pr_block_init) { - cv_destroy(&priv->pr_cv); - mutex_destroy(&priv->pr_mutex); - } - - channel->ch_cb->cb_cmd_free(channel->ch_channel_private, cmd); -} - - -/* - * dcopy_cmd_post() - */ -int -dcopy_cmd_post(dcopy_cmd_t cmd) -{ - dcopy_handle_t channel; - int e; - - - channel = cmd->dp_private->pr_channel; - - atomic_inc_64(&channel->ch_stat.cs_cmd_post.value.ui64); - if (cmd->dp_cmd == DCOPY_CMD_COPY) { - atomic_add_64(&channel->ch_stat.cs_bytes_xfer.value.ui64, - cmd->dp.copy.cc_size); - } - e = channel->ch_cb->cb_cmd_post(channel->ch_channel_private, cmd); - if (e != DCOPY_SUCCESS) { - return (e); - } - - return (DCOPY_SUCCESS); -} - - -/* - * dcopy_cmd_poll() - */ -int -dcopy_cmd_poll(dcopy_cmd_t cmd, int flags) -{ - dcopy_handle_t channel; - dcopy_cmd_priv_t priv; - int e; - - - priv = cmd->dp_private; - channel = priv->pr_channel; - - /* - * if the caller is trying to block, they needed to post the - * command with DCOPY_CMD_INTR set. - */ - if ((flags & DCOPY_POLL_BLOCK) && !(cmd->dp_flags & DCOPY_CMD_INTR)) { - return (DCOPY_FAILURE); - } - - atomic_inc_64(&channel->ch_stat.cs_cmd_poll.value.ui64); - -repoll: - e = channel->ch_cb->cb_cmd_poll(channel->ch_channel_private, cmd); - if (e == DCOPY_PENDING) { - /* - * if the command is still active, and the blocking flag - * is set. - */ - if (flags & DCOPY_POLL_BLOCK) { - - /* - * if we haven't initialized the state, do it now. A - * command can be re-used, so it's possible it's - * already been initialized. - */ - if (!priv->pr_block_init) { - priv->pr_block_init = B_TRUE; - mutex_init(&priv->pr_mutex, NULL, MUTEX_DRIVER, - NULL); - cv_init(&priv->pr_cv, NULL, CV_DRIVER, NULL); - priv->pr_cmd = cmd; - } - - /* push it on the list for blocking commands */ - priv->pr_wait = B_TRUE; - dcopy_list_push(&channel->ch_poll_list, priv); - - mutex_enter(&priv->pr_mutex); - /* - * it's possible we already cleared pr_wait before we - * grabbed the mutex. - */ - if (priv->pr_wait) { - cv_wait(&priv->pr_cv, &priv->pr_mutex); - } - mutex_exit(&priv->pr_mutex); - - /* - * the command has completed, go back and poll so we - * get the status. - */ - goto repoll; - } - } - - return (e); -} - -/* *** END OF EXTERNAL INTERFACE *** */ - -/* - * dcopy_list_init() - */ -static int -dcopy_list_init(dcopy_list_t *list, size_t node_size, offset_t link_offset) -{ - mutex_init(&list->dl_mutex, NULL, MUTEX_DRIVER, NULL); - list_create(&list->dl_list, node_size, link_offset); - list->dl_cnt = 0; - - return (DCOPY_SUCCESS); -} - - -/* - * dcopy_list_fini() - */ -static void -dcopy_list_fini(dcopy_list_t *list) -{ - list_destroy(&list->dl_list); - mutex_destroy(&list->dl_mutex); -} - - -/* - * dcopy_list_push() - */ -static void -dcopy_list_push(dcopy_list_t *list, void *list_node) -{ - mutex_enter(&list->dl_mutex); - list_insert_tail(&list->dl_list, list_node); - list->dl_cnt++; - mutex_exit(&list->dl_mutex); -} - - -/* - * dcopy_list_pop() - */ -static void * -dcopy_list_pop(dcopy_list_t *list) -{ - list_node_t *list_node; - - mutex_enter(&list->dl_mutex); - list_node = list_head(&list->dl_list); - if (list_node == NULL) { - mutex_exit(&list->dl_mutex); - return (list_node); - } - list->dl_cnt--; - list_remove(&list->dl_list, list_node); - mutex_exit(&list->dl_mutex); - - return (list_node); -} - - -/* *** DEVICE INTERFACE *** */ -/* - * dcopy_device_register() - */ -int -dcopy_device_register(void *device_private, dcopy_device_info_t *info, - dcopy_device_handle_t *handle) -{ - struct dcopy_channel_s *channel; - struct dcopy_device_s *device; - int e; - int i; - - - /* initialize the per device state */ - device = kmem_zalloc(sizeof (*device), KM_SLEEP); - device->dc_device_private = device_private; - device->dc_info = *info; - device->dc_removing_cnt = 0; - device->dc_cb = info->di_cb; - - /* - * we have a per device channel list so we can remove a device in the - * future. - */ - e = dcopy_list_init(&device->dc_devchan_list, - sizeof (struct dcopy_channel_s), - offsetof(struct dcopy_channel_s, ch_devchan_list_node)); - if (e != DCOPY_SUCCESS) { - goto registerfail_devchan; - } - - /* - * allocate state for each channel, allocate the channel, and then add - * the devices dma channels to the devices channel list. - */ - for (i = 0; i < info->di_num_dma; i++) { - channel = kmem_zalloc(sizeof (*channel), KM_SLEEP); - channel->ch_device = device; - channel->ch_removing = B_FALSE; - channel->ch_ref_cnt = 0; - channel->ch_cb = info->di_cb; - - e = info->di_cb->cb_channel_alloc(device_private, channel, - DCOPY_SLEEP, dcopy_channel_size, &channel->ch_info, - &channel->ch_channel_private); - if (e != DCOPY_SUCCESS) { - kmem_free(channel, sizeof (*channel)); - goto registerfail_alloc; - } - - e = dcopy_stats_init(channel); - if (e != DCOPY_SUCCESS) { - info->di_cb->cb_channel_free( - &channel->ch_channel_private); - kmem_free(channel, sizeof (*channel)); - goto registerfail_alloc; - } - - e = dcopy_list_init(&channel->ch_poll_list, - sizeof (struct dcopy_cmd_priv_s), - offsetof(struct dcopy_cmd_priv_s, pr_poll_list_node)); - if (e != DCOPY_SUCCESS) { - dcopy_stats_fini(channel); - info->di_cb->cb_channel_free( - &channel->ch_channel_private); - kmem_free(channel, sizeof (*channel)); - goto registerfail_alloc; - } - - dcopy_list_push(&device->dc_devchan_list, channel); - } - - /* add the device to device list */ - dcopy_list_push(&dcopy_statep->d_device_list, device); - - /* - * add the device's dma channels to the global channel list (where - * dcopy_alloc's come from) - */ - mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex); - mutex_enter(&dcopy_statep->d_device_list.dl_mutex); - channel = list_head(&device->dc_devchan_list.dl_list); - while (channel != NULL) { - list_insert_tail(&dcopy_statep->d_globalchan_list.dl_list, - channel); - dcopy_statep->d_globalchan_list.dl_cnt++; - channel = list_next(&device->dc_devchan_list.dl_list, channel); - } - mutex_exit(&dcopy_statep->d_device_list.dl_mutex); - mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex); - - *handle = device; - return (DCOPY_SUCCESS); - -registerfail_alloc: - channel = list_head(&device->dc_devchan_list.dl_list); - while (channel != NULL) { - /* remove from the list */ - channel = dcopy_list_pop(&device->dc_devchan_list); - ASSERT(channel != NULL); - - dcopy_list_fini(&channel->ch_poll_list); - dcopy_stats_fini(channel); - info->di_cb->cb_channel_free(&channel->ch_channel_private); - kmem_free(channel, sizeof (*channel)); - } - - dcopy_list_fini(&device->dc_devchan_list); -registerfail_devchan: - kmem_free(device, sizeof (*device)); - - return (DCOPY_FAILURE); -} - - -/* - * dcopy_device_unregister() - */ -/*ARGSUSED*/ -int -dcopy_device_unregister(dcopy_device_handle_t *handle) -{ - struct dcopy_channel_s *channel; - dcopy_device_handle_t device; - boolean_t device_busy; - - - device = *handle; - device_busy = B_FALSE; - - /* - * remove the devices dma channels from the global channel list (where - * dcopy_alloc's come from) - */ - mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex); - mutex_enter(&device->dc_devchan_list.dl_mutex); - channel = list_head(&device->dc_devchan_list.dl_list); - while (channel != NULL) { - /* - * if the channel has outstanding allocs, mark it as having - * to be removed and increment the number of channels which - * need to be removed in the device state too. - */ - if (channel->ch_ref_cnt != 0) { - channel->ch_removing = B_TRUE; - device_busy = B_TRUE; - device->dc_removing_cnt++; - } - dcopy_statep->d_globalchan_list.dl_cnt--; - list_remove(&dcopy_statep->d_globalchan_list.dl_list, channel); - channel = list_next(&device->dc_devchan_list.dl_list, channel); - } - mutex_exit(&device->dc_devchan_list.dl_mutex); - mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex); - - /* - * if there are channels which still need to be removed, we will clean - * up the device state after they are freed up. - */ - if (device_busy) { - return (DCOPY_PENDING); - } - - dcopy_device_cleanup(device, B_FALSE); - - *handle = NULL; - return (DCOPY_SUCCESS); -} - - -/* - * dcopy_device_cleanup() - */ -static void -dcopy_device_cleanup(dcopy_device_handle_t device, boolean_t do_callback) -{ - struct dcopy_channel_s *channel; - - /* - * remove all the channels in the device list, free them, and clean up - * the state. - */ - mutex_enter(&dcopy_statep->d_device_list.dl_mutex); - channel = list_head(&device->dc_devchan_list.dl_list); - while (channel != NULL) { - device->dc_devchan_list.dl_cnt--; - list_remove(&device->dc_devchan_list.dl_list, channel); - dcopy_list_fini(&channel->ch_poll_list); - dcopy_stats_fini(channel); - channel->ch_cb->cb_channel_free(&channel->ch_channel_private); - kmem_free(channel, sizeof (*channel)); - channel = list_head(&device->dc_devchan_list.dl_list); - } - - /* remove it from the list of devices */ - list_remove(&dcopy_statep->d_device_list.dl_list, device); - - mutex_exit(&dcopy_statep->d_device_list.dl_mutex); - - /* - * notify the DMA device driver that the device is free to be - * detached. - */ - if (do_callback) { - device->dc_cb->cb_unregister_complete( - device->dc_device_private, DCOPY_SUCCESS); - } - - dcopy_list_fini(&device->dc_devchan_list); - kmem_free(device, sizeof (*device)); -} - - -/* - * dcopy_device_channel_notify() - */ -/*ARGSUSED*/ -void -dcopy_device_channel_notify(dcopy_handle_t handle, int status) -{ - struct dcopy_channel_s *channel; - dcopy_list_t *poll_list; - dcopy_cmd_priv_t priv; - int e; - - - ASSERT(status == DCOPY_COMPLETION); - channel = handle; - - poll_list = &channel->ch_poll_list; - - /* - * when we get a completion notification from the device, go through - * all of the commands blocking on this channel and see if they have - * completed. Remove the command and wake up the block thread if they - * have. Once we hit a command which is still pending, we are done - * polling since commands in a channel complete in order. - */ - mutex_enter(&poll_list->dl_mutex); - if (poll_list->dl_cnt != 0) { - priv = list_head(&poll_list->dl_list); - while (priv != NULL) { - atomic_inc_64(&channel-> - ch_stat.cs_notify_poll.value.ui64); - e = channel->ch_cb->cb_cmd_poll( - channel->ch_channel_private, - priv->pr_cmd); - if (e == DCOPY_PENDING) { - atomic_inc_64(&channel-> - ch_stat.cs_notify_pending.value.ui64); - break; - } - - poll_list->dl_cnt--; - list_remove(&poll_list->dl_list, priv); - - mutex_enter(&priv->pr_mutex); - priv->pr_wait = B_FALSE; - cv_signal(&priv->pr_cv); - mutex_exit(&priv->pr_mutex); - - priv = list_head(&poll_list->dl_list); - } - } - - mutex_exit(&poll_list->dl_mutex); -} - - -/* - * dcopy_stats_init() - */ -static int -dcopy_stats_init(dcopy_handle_t channel) -{ -#define CHANSTRSIZE 20 - char chanstr[CHANSTRSIZE]; - dcopy_stats_t *stats; - int instance; - char *name; - - - stats = &channel->ch_stat; - name = (char *)ddi_driver_name(channel->ch_device->dc_info.di_dip); - instance = ddi_get_instance(channel->ch_device->dc_info.di_dip); - - (void) snprintf(chanstr, CHANSTRSIZE, "channel%d", - (uint32_t)channel->ch_info.qc_chan_num); - - channel->ch_kstat = kstat_create(name, instance, chanstr, "misc", - KSTAT_TYPE_NAMED, sizeof (dcopy_stats_t) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - if (channel->ch_kstat == NULL) { - return (DCOPY_FAILURE); - } - channel->ch_kstat->ks_data = stats; - - kstat_named_init(&stats->cs_bytes_xfer, "bytes_xfer", - KSTAT_DATA_UINT64); - kstat_named_init(&stats->cs_cmd_alloc, "cmd_alloc", - KSTAT_DATA_UINT64); - kstat_named_init(&stats->cs_cmd_post, "cmd_post", - KSTAT_DATA_UINT64); - kstat_named_init(&stats->cs_cmd_poll, "cmd_poll", - KSTAT_DATA_UINT64); - kstat_named_init(&stats->cs_notify_poll, "notify_poll", - KSTAT_DATA_UINT64); - kstat_named_init(&stats->cs_notify_pending, "notify_pending", - KSTAT_DATA_UINT64); - kstat_named_init(&stats->cs_id, "id", - KSTAT_DATA_UINT64); - kstat_named_init(&stats->cs_capabilities, "capabilities", - KSTAT_DATA_UINT64); - - kstat_install(channel->ch_kstat); - - channel->ch_stat.cs_id.value.ui64 = channel->ch_info.qc_id; - channel->ch_stat.cs_capabilities.value.ui64 = - channel->ch_info.qc_capabilities; - - return (DCOPY_SUCCESS); -} - - -/* - * dcopy_stats_fini() - */ -static void -dcopy_stats_fini(dcopy_handle_t channel) -{ - kstat_delete(channel->ch_kstat); -} -/* *** END OF DEVICE INTERFACE *** */
--- a/deleted_files/usr/src/uts/common/sys/dcopy.h Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,235 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_DCOPY_H -#define _SYS_DCOPY_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/types.h> - -/* - * *** This interface is for private use by the IP stack only *** - */ - -/* Function return status */ -#define DCOPY_FAILURE (-1) -#define DCOPY_SUCCESS (0) -#define DCOPY_NORESOURCES (1) /* _alloc & _cmd_alloc, _cmd_post only */ -#define DCOPY_PENDING (0x10) /* dcopy_poll(), dcopy_unregister() */ -#define DCOPY_COMPLETED (0x20) /* dcopy_poll() only */ - - -/* dq_version */ -#define DCOPY_QUERY_V0 0 - -typedef struct dcopy_query_s { - int dq_version; /* DCOPY_QUERY_V0 */ - uint_t dq_num_channels; /* number of dma channels */ -} dcopy_query_t; - -/* - * dcopy_query() - * query for the number of DMA engines usable in the system. - */ -void dcopy_query(dcopy_query_t *query); - - -typedef struct dcopy_channel_s *dcopy_handle_t; - -/* dcopy_alloc() and dcopy_cmd_alloc() common flags */ -#define DCOPY_SLEEP (0) -#define DCOPY_NOSLEEP (1 << 0) - -/* - * dcopy_alloc() - * Allocate a DMA channel which is used for posting DMA requests. Note: this - * does not give the caller exclusive access to the DMA engine. Commands - * posted to a channel will complete in order. - * flags - (DCOPY_SLEEP, DCOPY_NOSLEEP) - * returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES - */ -int dcopy_alloc(int flags, dcopy_handle_t *handle); - -/* - * dcopy_free() - * Free the DMA channel. The client can no longer use the handle to post or - * poll for status on posts which were previously done on this channel. - */ -void dcopy_free(dcopy_handle_t *handle); - -/* dq_version */ -#define DCOPY_QUERY_CHANNEL_V0 0 - -/* Per DMA channel info */ -typedef struct dcopy_query_channel_s { - int qc_version; /* DCOPY_QUERY_CHANNEL_V0 */ - - /* Does DMA channel support DCA */ - boolean_t qc_dca_supported; - - /* device id and device specific capabilities */ - uint64_t qc_id; - uint64_t qc_capabilities; - - /* - * DMA channel size. This may not be the same as the number of posts - * that the DMA channel can handle since a post may consume 1 or more - * entries. - */ - uint64_t qc_channel_size; - - /* DMA channel number within the device. Not unique across devices */ - uint64_t qc_chan_num; -} dcopy_query_channel_t; - -/* - * dcopy_query_channel() - * query DMA engines capabilities - */ -void dcopy_query_channel(dcopy_handle_t handle, dcopy_query_channel_t *query); - - -/* dp_version */ -#define DCOPY_CMD_V0 0 - -/* dp_cmd */ -#define DCOPY_CMD_COPY 0x1 - -/* dp_flags */ -/* - * DCOPY_CMD_QUEUE - * Hint to queue up the post but don't notify the DMA engine. This can be - * used as an optimization when multiple posts are going to be queued up and - * you only want notify the DMA engine after the last post. Note, this does - * not mean the DMA engine won't process the request since it could notice - * it anyway. - * DCOPY_CMD_NOSTAT - * Don't generate a status. If this flag is used, You cannot poll for - * completion status on this command. This can be a useful performance - * optimization if your posting multiple commands and just want to poll on - * the last command. - * DCOPY_CMD_DCA - * If DCA is supported, direct this and all future command data (until the - * next command with DCOPY_POST_DCA set) to the processor specified in - * dp_dca_id. This flag is ignored if DCA is not supported. - * DCOPY_CMD_INTR - * Generate an interrupt when command completes. This flag is required if - * the caller is going to call dcopy_cmd_poll(() with DCOPY_POLL_BLOCK set - * for this command. - */ -#define DCOPY_CMD_NOFLAGS (0) -#define DCOPY_CMD_QUEUE (1 << 0) -#define DCOPY_CMD_NOSTAT (1 << 1) -#define DCOPY_CMD_DCA (1 << 2) -#define DCOPY_CMD_INTR (1 << 3) - -typedef struct dcopy_cmd_copy_s { - uint64_t cc_source; /* Source physical address */ - uint64_t cc_dest; /* Destination physical address */ - size_t cc_size; -} dcopy_cmd_copy_t; - -typedef union dcopy_cmd_u { - dcopy_cmd_copy_t copy; -} dcopy_cmd_u_t; - -typedef struct dcopy_cmd_priv_s *dcopy_cmd_priv_t; - -struct dcopy_cmd_s { - uint_t dp_version; /* DCOPY_CMD_V0 */ - uint_t dp_flags; - uint64_t dp_cmd; - dcopy_cmd_u_t dp; - uint32_t dp_dca_id; - dcopy_cmd_priv_t dp_private; -}; -typedef struct dcopy_cmd_s *dcopy_cmd_t; - - -/* - * dcopy_cmd_alloc() specific flags - * DCOPY_ALLOC_LINK - when set, the caller passes in a previously alloced - * command in cmd. dcopy_cmd_alloc() will allocate a new command and - * link it to the old command. The caller can use this to build a - * chain of commands, keeping only the last cmd alloced. calling - * dcopy_cmd_free() with the last cmd alloced in the chain will free all of - * the commands in the chain. dcopy_cmd_post() and dcopy_cmd_poll() have - * no knowledge of a chain of commands. It's only used for alloc/free. - */ -#define DCOPY_ALLOC_LINK (1 << 16) - -/* - * dcopy_cmd_alloc() - * allocate a command. A command can be re-used after it completes. - * flags - (DCOPY_SLEEP || DCOPY_NOSLEEP), DCOPY_ALLOC_LINK - * returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES - */ -int dcopy_cmd_alloc(dcopy_handle_t handle, int flags, dcopy_cmd_t *cmd); - -/* - * dcopy_cmd_free() - * free the command. This call cannot be called after dcopy_free(). - */ -void dcopy_cmd_free(dcopy_cmd_t *cmd); - -/* - * dcopy_cmd_post() - * post a command (allocated from dcopy_cmd_alloc()) to the DMA channel - * returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES - */ -int dcopy_cmd_post(dcopy_cmd_t cmd); - -/* dcopy_cmd_poll() flags */ -#define DCOPY_POLL_NOFLAGS (0) -#define DCOPY_POLL_BLOCK (1 << 0) - -/* - * dcopy_cmd_poll() - * poll on completion status of a previous post. This call cannot be called - * after dcopy_free(). - * - * if flags == DCOPY_POLL_NOFLAGS, return status can be DCOPY_FAILURE, - * DCOPY_PENDING, or DCOPY_COMPLETED. - * - * if flags & DCOPY_POLL_BLOCK, return status can be DCOPY_FAILURE or - * DCOPY_COMPLETED. DCOPY_POLL_BLOCK can only be set in base context. - * - * The command cannot be re-used or freed until the command has completed - * (e.g. DCOPY_FAILURE or DCOPY_COMPLETED). - */ -int dcopy_cmd_poll(dcopy_cmd_t cmd, int flags); - - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_DCOPY_H */
--- a/deleted_files/usr/src/uts/common/sys/dcopy_device.h Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,154 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_DCOPY_DEVICE_H -#define _SYS_DCOPY_DEVICE_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/types.h> -#include <sys/dcopy.h> - -/* - * private command state. Space for this structure should be allocated during - * (*cb_cmd_alloc). The DMA driver must set dp_private in dcopy_cmd_t to point - * to the memory it allocated. Other than pr_device_cmd_private, the DMA driver - * should not touch any of the fields in this structure. pr_device_cmd_private - * is a private pointer for the DMA engine to use. - */ -struct dcopy_cmd_priv_s { - /* - * we only init the state used to track a command which blocks when it - * actually blocks. pr_block_init tells us when we need to clean it - * up during a cmd_free. - */ - boolean_t pr_block_init; - - /* dcopy_poll blocking state */ - list_node_t pr_poll_list_node; - volatile boolean_t pr_wait; - kmutex_t pr_mutex; - kcondvar_t pr_cv; - - /* back pointer to the command */ - dcopy_cmd_t pr_cmd; - - /* shortcut to the channel we're on */ - struct dcopy_channel_s *pr_channel; - - /* DMA driver private pointer */ - void *pr_device_cmd_private; -}; - -/* cb_version */ -#define DCOPY_DEVICECB_V0 0 - -typedef struct dcopy_device_chaninfo_s { - uint_t di_chan_num; -} dcopy_device_chaninfo_t; - -typedef struct dcopy_device_cb_s { - int cb_version; - int cb_res1; - - /* allocate/free a DMA channel. See dcopy.h for return status */ - int (*cb_channel_alloc)(void *device_private, - dcopy_handle_t handle, int flags, uint_t size, - dcopy_query_channel_t *info, void *channel_private); - void (*cb_channel_free)(void *channel_private); - - /* allocate/free a command. See dcopy.h for return status */ - int (*cb_cmd_alloc)(void *channel_private, int flags, - dcopy_cmd_t *cmd); - void (*cb_cmd_free)(void *channel_private, dcopy_cmd_t *cmd); - - /* - * post a command/poll for command status. See dcopy.h for return - * status - */ - int (*cb_cmd_post)(void *channel_private, dcopy_cmd_t cmd); - int (*cb_cmd_poll)(void *channel_private, dcopy_cmd_t cmd); - - /* - * if dcopy_device_unregister() returns DCOPY_PENDING, dcopy will - * call this routine when all the channels are no longer being - * used and have been free'd up. e.g. it's safe for the DMA driver - * to detach. - * status = DCOPY_SUCCESS || DCOPY_FAILURE - */ - void (*cb_unregister_complete)(void *device_private, int status); -} dcopy_device_cb_t; - - -typedef struct dcopy_device_info_s { - dev_info_t *di_dip; - dcopy_device_cb_t *di_cb; /* must be a static array */ - uint_t di_num_dma; - uint_t di_maxxfer; - uint_t di_capabilities; - uint64_t di_id; -} dcopy_device_info_t; - -typedef struct dcopy_device_s *dcopy_device_handle_t; - -/* dcopy_device_notify() status */ -#define DCOPY_COMPLETION 0 - -/* - * dcopy_device_register() - * register the DMA device with dcopy. - * return status => DCOPY_FAILURE, DCOPY_SUCCESS - */ -int dcopy_device_register(void *device_private, dcopy_device_info_t *info, - dcopy_device_handle_t *handle); - -/* - * dcopy_device_unregister() - * try to unregister the DMA device with dcopy. If the DMA engines are - * still being used by upper layer modules, DCOPY_PENDING will be returned. - * return status => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_PENDING - * if DCOPY_PENDING, (*cb_unregister_complete)() will be called when - * completed. - */ -int dcopy_device_unregister(dcopy_device_handle_t *handle); - -/* - * dcopy_device_channel_notify() - * Notify dcopy of an event. - * dcopy_handle_t handle => what was passed into (*cb_alloc)() - * status => DCOPY_COMPLETION - */ -void dcopy_device_channel_notify(dcopy_handle_t handle, int status); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_DCOPY_DEVICE_H */
--- a/deleted_files/usr/src/uts/common/sys/sodirect.h Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - -/* - * University Copyright- Copyright (c) 1982, 1986, 1988 - * The Regents of the University of California - * All Rights Reserved - * - * University Acknowledgment- Portions of this document are derived from - * software developed by the University of California, Berkeley, and its - * contributors. - */ - -#ifndef _SYS_SODIRECT_H -#define _SYS_SODIRECT_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * Sodirect ... - * - * Currently the sodirect_t uses the sockfs streamhead STREAMS Q directly, - * in the future when we have STREAMless sockets a sonode Q will have to - * be implemented however the sodirect KPI shouldn't need to change. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct sodirect_s { - uint32_t sod_state; /* State bits */ - uint32_t sod_want; /* Pending read byte count or 0 */ - queue_t *sod_q; /* Socket Q */ - int (*sod_enqueue)(); /* Call to enqueue an mblk_t */ - void (*sod_wakeup)(); /* Call to awkake a read()er, if any */ - mblk_t *sod_uioafh; /* To be freed list head, or NULL */ - mblk_t *sod_uioaft; /* To be freed list tail */ - kmutex_t *sod_lock; /* Lock needed to protect all members */ - uioa_t sod_uioa; /* Pending uio_t for uioa_t use */ -} sodirect_t; - -/* - * sod_state bits: - */ - -#define SOD_DISABLED 0 /* No more sodirect */ - -#define SOD_ENABLED 0x0001 /* sodirect_t enabled */ - -#define SOD_WAKE_NOT 0x0010 /* Wakeup not needed */ -#define SOD_WAKE_NEED 0x0020 /* Wakeup needed */ -#define SOD_WAKE_DONE 0x0040 /* Wakeup done */ -#define SOD_WAKE_CLR ~(SOD_WAKE_NOT|SOD_WAKE_NEED|SOD_WAKE_DONE) - -/* - * Usefull macros: - */ - -#define SOD_QSETBE(p) ((p)->sod_q->q_flag |= QWANTW) -#define SOD_QCLRBE(p) ((p)->sod_q->q_flag &= ~QWANTW) -#define SOD_QEMPTY(p) ((p)->sod_q->q_first == NULL) -#define SOD_QFULL(p) ((p)->sod_q->q_flag & QFULL) -#define SOD_QCNT(p) ((p)->sod_q->q_count) - -#define SOD_DISABLE(p) (p)->sod_state &= ~SOD_ENABLED - -#define SOD_QTOSODP(q) (q)->q_stream->sd_sodirect - - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_SODIRECT_H */
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat.c Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,665 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/errno.h> -#include <sys/types.h> -#include <sys/conf.h> -#include <sys/kmem.h> -#include <sys/ddi.h> -#include <sys/stat.h> -#include <sys/sunddi.h> -#include <sys/file.h> -#include <sys/open.h> -#include <sys/modctl.h> -#include <sys/ddi_impldefs.h> -#include <sys/sysmacros.h> - -#include <sys/ioat.h> - -static int ioat_open(dev_t *devp, int flag, int otyp, cred_t *cred); -static int ioat_close(dev_t devp, int flag, int otyp, cred_t *cred); -static int ioat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); -static int ioat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); -static int ioat_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, - void **result); - -static struct cb_ops ioat_cb_ops = { - ioat_open, /* cb_open */ - ioat_close, /* cb_close */ - nodev, /* cb_strategy */ - nodev, /* cb_print */ - nodev, /* cb_dump */ - nodev, /* cb_read */ - nodev, /* cb_write */ - ioat_ioctl, /* cb_ioctl */ - nodev, /* cb_devmap */ - nodev, /* cb_mmap */ - nodev, /* cb_segmap */ - nochpoll, /* cb_chpoll */ - ddi_prop_op, /* cb_prop_op */ - NULL, /* cb_stream */ - D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */ - CB_REV -}; - -static struct dev_ops ioat_dev_ops = { - DEVO_REV, /* devo_rev */ - 0, /* devo_refcnt */ - ioat_getinfo, /* devo_getinfo */ - nulldev, /* devo_identify */ - nulldev, /* devo_probe */ - ioat_attach, /* devo_attach */ - ioat_detach, /* devo_detach */ - nodev, /* devo_reset */ - &ioat_cb_ops, /* devo_cb_ops */ - NULL, /* devo_bus_ops */ - NULL /* power */ -}; - -static struct modldrv ioat_modldrv = { - &mod_driverops, /* Type of module. This one is a driver */ - "ioat driver v%I%", /* Name of the module. */ - &ioat_dev_ops, /* driver ops */ -}; - -static struct modlinkage ioat_modlinkage = { - MODREV_1, - (void *) &ioat_modldrv, - NULL -}; - - -void *ioat_statep; - -static int ioat_chip_init(ioat_state_t *state); -static void ioat_chip_fini(ioat_state_t *state); -static int ioat_drv_init(ioat_state_t *state); -static void ioat_drv_fini(ioat_state_t *state); -static uint_t ioat_isr(caddr_t parm); -static void ioat_intr_enable(ioat_state_t *state); -static void ioat_intr_disable(ioat_state_t *state); -void ioat_detach_finish(ioat_state_t *state); - - -ddi_device_acc_attr_t ioat_acc_attr = { - DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ - DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ - DDI_STORECACHING_OK_ACC, /* devacc_attr_dataorder */ - DDI_DEFAULT_ACC /* devacc_attr_access */ -}; - -/* dcopy callback interface */ -dcopy_device_cb_t ioat_cb = { - DCOPY_DEVICECB_V0, - 0, /* reserved */ - ioat_channel_alloc, - ioat_channel_free, - ioat_cmd_alloc, - ioat_cmd_free, - ioat_cmd_post, - ioat_cmd_poll, - ioat_unregister_complete -}; - -/* - * _init() - */ -int -_init(void) -{ - int e; - - e = ddi_soft_state_init(&ioat_statep, sizeof (ioat_state_t), 1); - if (e != 0) { - return (e); - } - - e = mod_install(&ioat_modlinkage); - if (e != 0) { - ddi_soft_state_fini(&ioat_statep); - return (e); - } - - return (0); -} - -/* - * _info() - */ -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&ioat_modlinkage, modinfop)); -} - -/* - * _fini() - */ -int -_fini(void) -{ - int e; - - e = mod_remove(&ioat_modlinkage); - if (e != 0) { - return (e); - } - - ddi_soft_state_fini(&ioat_statep); - - return (0); -} - -/* - * ioat_attach() - */ -static int -ioat_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - ioat_state_t *state; - int instance; - int e; - - - switch (cmd) { - case DDI_ATTACH: - break; - - case DDI_RESUME: - instance = ddi_get_instance(dip); - state = ddi_get_soft_state(ioat_statep, instance); - if (state == NULL) { - return (DDI_FAILURE); - } - e = ioat_channel_resume(state); - if (e != DDI_SUCCESS) { - return (DDI_FAILURE); - } - ioat_intr_enable(state); - return (DDI_SUCCESS); - - default: - return (DDI_FAILURE); - } - - instance = ddi_get_instance(dip); - e = ddi_soft_state_zalloc(ioat_statep, instance); - if (e != DDI_SUCCESS) { - return (DDI_FAILURE); - } - state = ddi_get_soft_state(ioat_statep, instance); - if (state == NULL) { - goto attachfail_get_soft_state; - } - - state->is_dip = dip; - state->is_instance = instance; - - /* setup the registers, save away some device info */ - e = ioat_chip_init(state); - if (e != DDI_SUCCESS) { - goto attachfail_chip_init; - } - - /* initialize driver state, must be after chip init */ - e = ioat_drv_init(state); - if (e != DDI_SUCCESS) { - goto attachfail_drv_init; - } - - /* create the minor node (for the ioctl) */ - e = ddi_create_minor_node(dip, "ioat", S_IFCHR, instance, DDI_PSEUDO, - 0); - if (e != DDI_SUCCESS) { - goto attachfail_minor_node; - } - - /* Enable device interrupts */ - ioat_intr_enable(state); - - /* Report that driver was loaded */ - ddi_report_dev(dip); - - /* register with dcopy */ - e = dcopy_device_register(state, &state->is_deviceinfo, - &state->is_device_handle); - if (e != DCOPY_SUCCESS) { - goto attachfail_register; - } - - return (DDI_SUCCESS); - -attachfail_register: - ioat_intr_disable(state); - ddi_remove_minor_node(dip, NULL); -attachfail_minor_node: - ioat_drv_fini(state); -attachfail_drv_init: - ioat_chip_fini(state); -attachfail_chip_init: -attachfail_get_soft_state: - (void) ddi_soft_state_free(ioat_statep, instance); - - return (DDI_FAILURE); -} - -/* - * ioat_detach() - */ -static int -ioat_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) -{ - ioat_state_t *state; - int instance; - int e; - - - instance = ddi_get_instance(dip); - state = ddi_get_soft_state(ioat_statep, instance); - if (state == NULL) { - return (DDI_FAILURE); - } - - switch (cmd) { - case DDI_DETACH: - break; - - case DDI_SUSPEND: - ioat_channel_suspend(state); - return (DDI_SUCCESS); - - default: - return (DDI_FAILURE); - } - - /* - * try to unregister from dcopy. Since this driver doesn't follow the - * traditional parent/child model, we may still be in use so we can't - * detach yet. - */ - e = dcopy_device_unregister(&state->is_device_handle); - if (e != DCOPY_SUCCESS) { - if (e == DCOPY_PENDING) { - cmn_err(CE_NOTE, "device busy, performing asynchronous" - " detach\n"); - } - return (DDI_FAILURE); - } - - ioat_detach_finish(state); - - return (DDI_SUCCESS); -} - -/* - * ioat_getinfo() - */ -/*ARGSUSED*/ -static int -ioat_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) -{ - ioat_state_t *state; - int instance; - dev_t dev; - int e; - - - dev = (dev_t)arg; - instance = getminor(dev); - - switch (cmd) { - case DDI_INFO_DEVT2DEVINFO: - state = ddi_get_soft_state(ioat_statep, instance); - if (state == NULL) { - return (DDI_FAILURE); - } - *result = (void *)state->is_dip; - e = DDI_SUCCESS; - break; - - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)(uintptr_t)instance; - e = DDI_SUCCESS; - break; - - default: - e = DDI_FAILURE; - break; - } - - return (e); -} - - -/* - * ioat_open() - */ -/*ARGSUSED*/ -static int -ioat_open(dev_t *devp, int flag, int otyp, cred_t *cred) -{ - ioat_state_t *state; - int instance; - - instance = getminor(*devp); - state = ddi_get_soft_state(ioat_statep, instance); - if (state == NULL) { - return (ENXIO); - } - - return (0); -} - - -/* - * ioat_close() - */ -/*ARGSUSED*/ -static int -ioat_close(dev_t devp, int flag, int otyp, cred_t *cred) -{ - return (0); -} - - -/* - * ioat_chip_init() - */ -static int -ioat_chip_init(ioat_state_t *state) -{ - ddi_device_acc_attr_t attr; - int e; - - - attr.devacc_attr_version = DDI_DEVICE_ATTR_V0; - attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; - attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC; - - e = ddi_regs_map_setup(state->is_dip, 1, (caddr_t *)&state->is_genregs, - 0, 0, &attr, &state->is_reg_handle); - if (e != DDI_SUCCESS) { - goto chipinitfail_regsmap; - } - - /* save away ioat chip info */ - state->is_num_channels = (uint_t)ddi_get8(state->is_reg_handle, - &state->is_genregs[IOAT_CHANCNT]); - state->is_maxxfer = (uint_t)ddi_get8(state->is_reg_handle, - &state->is_genregs[IOAT_XFERCAP]); - state->is_chanoff = (uintptr_t)ddi_get16(state->is_reg_handle, - (uint16_t *)&state->is_genregs[IOAT_PERPORT_OFF]); - state->is_cbver = (uint_t)ddi_get8(state->is_reg_handle, - &state->is_genregs[IOAT_CBVER]); - state->is_intrdelay = (uint_t)ddi_get16(state->is_reg_handle, - (uint16_t *)&state->is_genregs[IOAT_INTRDELAY]); - state->is_status = (uint_t)ddi_get16(state->is_reg_handle, - (uint16_t *)&state->is_genregs[IOAT_CSSTATUS]); - state->is_capabilities = (uint_t)ddi_get32(state->is_reg_handle, - (uint32_t *)&state->is_genregs[IOAT_DMACAPABILITY]); - - if (state->is_cbver & 0x10) { - state->is_ver = IOAT_CBv1; - } else if (state->is_cbver & 0x20) { - state->is_ver = IOAT_CBv2; - } else { - goto chipinitfail_version; - } - - return (DDI_SUCCESS); - -chipinitfail_version: - ddi_regs_map_free(&state->is_reg_handle); -chipinitfail_regsmap: - return (DDI_FAILURE); -} - - -/* - * ioat_chip_fini() - */ -static void -ioat_chip_fini(ioat_state_t *state) -{ - ddi_regs_map_free(&state->is_reg_handle); -} - - -/* - * ioat_drv_init() - */ -static int -ioat_drv_init(ioat_state_t *state) -{ - ddi_acc_handle_t handle; - int e; - - - mutex_init(&state->is_mutex, NULL, MUTEX_DRIVER, NULL); - - state->is_deviceinfo.di_dip = state->is_dip; - state->is_deviceinfo.di_num_dma = state->is_num_channels; - state->is_deviceinfo.di_maxxfer = state->is_maxxfer; - state->is_deviceinfo.di_capabilities = state->is_capabilities; - state->is_deviceinfo.di_cb = &ioat_cb; - - e = pci_config_setup(state->is_dip, &handle); - if (e != DDI_SUCCESS) { - goto drvinitfail_config_setup; - } - - /* read in Vendor ID */ - state->is_deviceinfo.di_id = (uint64_t)pci_config_get16(handle, 0); - state->is_deviceinfo.di_id = state->is_deviceinfo.di_id << 16; - - /* read in Device ID */ - state->is_deviceinfo.di_id |= (uint64_t)pci_config_get16(handle, 2); - state->is_deviceinfo.di_id = state->is_deviceinfo.di_id << 32; - - /* Add in chipset version */ - state->is_deviceinfo.di_id |= (uint64_t)state->is_cbver; - pci_config_teardown(&handle); - - e = ddi_intr_hilevel(state->is_dip, 0); - if (e != 0) { - cmn_err(CE_WARN, "hilevel interrupt not supported\n"); - goto drvinitfail_hilevel; - } - - /* we don't support MSIs for v2 yet */ - e = ddi_add_intr(state->is_dip, 0, NULL, NULL, ioat_isr, - (caddr_t)state); - if (e != DDI_SUCCESS) { - goto drvinitfail_add_intr; - } - - e = ddi_get_iblock_cookie(state->is_dip, 0, &state->is_iblock_cookie); - if (e != DDI_SUCCESS) { - goto drvinitfail_iblock_cookie; - } - - e = ioat_channel_init(state); - if (e != DDI_SUCCESS) { - goto drvinitfail_channel_init; - } - - return (DDI_SUCCESS); - -drvinitfail_channel_init: -drvinitfail_iblock_cookie: - ddi_remove_intr(state->is_dip, 0, state->is_iblock_cookie); -drvinitfail_add_intr: -drvinitfail_hilevel: -drvinitfail_config_setup: - mutex_destroy(&state->is_mutex); - - return (DDI_FAILURE); -} - - -/* - * ioat_drv_fini() - */ -static void -ioat_drv_fini(ioat_state_t *state) -{ - ioat_channel_fini(state); - ddi_remove_intr(state->is_dip, 0, state->is_iblock_cookie); - mutex_destroy(&state->is_mutex); -} - - -/* - * ioat_unregister_complete() - */ -void -ioat_unregister_complete(void *device_private, int status) -{ - ioat_state_t *state; - - - state = device_private; - - if (status != DCOPY_SUCCESS) { - cmn_err(CE_WARN, "asynchronous detach aborted\n"); - return; - } - - cmn_err(CE_CONT, "detach completing\n"); - ioat_detach_finish(state); -} - - -/* - * ioat_detach_finish() - */ -void -ioat_detach_finish(ioat_state_t *state) -{ - ioat_intr_disable(state); - ddi_remove_minor_node(state->is_dip, NULL); - ioat_drv_fini(state); - ioat_chip_fini(state); - (void) ddi_soft_state_free(ioat_statep, state->is_instance); -} - - -/* - * ioat_intr_enable() - */ -static void -ioat_intr_enable(ioat_state_t *state) -{ - uint32_t intr_status; - - - /* Clear any pending interrupts */ - intr_status = ddi_get32(state->is_reg_handle, - (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS]); - if (intr_status != 0) { - ddi_put32(state->is_reg_handle, - (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS], - intr_status); - } - - /* Enable interrupts on the device */ - ddi_put8(state->is_reg_handle, &state->is_genregs[IOAT_INTRCTL], - IOAT_INTRCTL_MASTER_EN); -} - - -/* - * ioat_intr_disable() - */ -static void -ioat_intr_disable(ioat_state_t *state) -{ - /* - * disable interrupts on the device. A read of the interrupt control - * register clears the enable bit. - */ - (void) ddi_get8(state->is_reg_handle, - &state->is_genregs[IOAT_INTRCTL]); -} - - -/* - * ioat_isr() - */ -static uint_t -ioat_isr(caddr_t parm) -{ - uint32_t intr_status; - ioat_state_t *state; - uint8_t intrctrl; - uint32_t chan; - uint_t r; - int i; - - state = (ioat_state_t *)parm; - - intrctrl = ddi_get8(state->is_reg_handle, - &state->is_genregs[IOAT_INTRCTL]); - /* master interrupt enable should always be set */ - ASSERT(intrctrl & IOAT_INTRCTL_MASTER_EN); - - /* If the interrupt status bit isn't set, it's not ours */ - if (!(intrctrl & IOAT_INTRCTL_INTR_STAT)) { - /* re-set master interrupt enable (since it clears on read) */ - ddi_put8(state->is_reg_handle, - &state->is_genregs[IOAT_INTRCTL], intrctrl); - return (DDI_INTR_UNCLAIMED); - } - - /* see which channels generated the interrupt */ - intr_status = ddi_get32(state->is_reg_handle, - (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS]); - - /* call the intr handler for the channels */ - r = DDI_INTR_UNCLAIMED; - chan = 1; - for (i = 0; i < state->is_num_channels; i++) { - if (intr_status & chan) { - ioat_channel_intr(&state->is_channel[i]); - r = DDI_INTR_CLAIMED; - } - chan = chan << 1; - } - - /* - * if interrupt status bit was set, there should have been an - * attention status bit set too. - */ - ASSERT(r == DDI_INTR_CLAIMED); - - /* re-set master interrupt enable (since it clears on read) */ - ddi_put8(state->is_reg_handle, &state->is_genregs[IOAT_INTRCTL], - intrctrl); - - return (r); -}
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat.conf Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" - -# -# force attach this driver to support misc/driver -ddi-forceattach=1; -
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_chan.c Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1319 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/errno.h> -#include <sys/types.h> -#include <sys/conf.h> -#include <sys/kmem.h> -#include <sys/ddi.h> -#include <sys/stat.h> -#include <sys/sunddi.h> -#include <sys/file.h> -#include <sys/open.h> -#include <sys/modctl.h> -#include <sys/ddi_impldefs.h> -#include <sys/sysmacros.h> -#include <vm/hat.h> -#include <vm/as.h> -#include <sys/mach_mmu.h> -#ifdef __xpv -#include <sys/hypervisor.h> -#endif - -#include <sys/ioat.h> - - -extern ddi_device_acc_attr_t ioat_acc_attr; - -/* dma attr for the descriptor rings */ -ddi_dma_attr_t ioat_desc_dma_attr = { - DMA_ATTR_V0, /* dma_attr_version */ - 0x0, /* dma_attr_addr_lo */ - 0xffffffffffffffff, /* dma_attr_addr_hi */ - 0xffffffff, /* dma_attr_count_max */ - 0x1000, /* dma_attr_align */ - 0x1, /* dma_attr_burstsizes */ - 0x1, /* dma_attr_minxfer */ - 0xffffffff, /* dma_attr_maxxfer */ - 0xffffffff, /* dma_attr_seg */ - 0x1, /* dma_attr_sgllen */ - 0x1, /* dma_attr_granular */ - 0x0, /* dma_attr_flags */ -}; - -/* dma attr for the completion buffers */ -ddi_dma_attr_t ioat_cmpl_dma_attr = { - DMA_ATTR_V0, /* dma_attr_version */ - 0x0, /* dma_attr_addr_lo */ - 0xffffffffffffffff, /* dma_attr_addr_hi */ - 0xffffffff, /* dma_attr_count_max */ - 0x40, /* dma_attr_align */ - 0x1, /* dma_attr_burstsizes */ - 0x1, /* dma_attr_minxfer */ - 0xffffffff, /* dma_attr_maxxfer */ - 0xffffffff, /* dma_attr_seg */ - 0x1, /* dma_attr_sgllen */ - 0x1, /* dma_attr_granular */ - 0x0, /* dma_attr_flags */ -}; - -static int ioat_completion_alloc(ioat_channel_t channel); -static void ioat_completion_free(ioat_channel_t channel); -static void ioat_channel_start(ioat_channel_t channel); -static void ioat_channel_reset(ioat_channel_t channel); - -int ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt); -void ioat_ring_free(ioat_channel_t channel); -void ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *desc); -int ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring, - dcopy_cmd_t cmd); - -static void ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr, - uint64_t dest_addr, uint32_t size, uint32_t ctrl); -static void ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id); - - -/* - * ioat_channel_init() - */ -int -ioat_channel_init(ioat_state_t *state) -{ - int i; - - /* - * initialize each dma channel's state which doesn't change across - * channel alloc/free. - */ - state->is_chansize = sizeof (struct ioat_channel_s) * - state->is_num_channels; - state->is_channel = kmem_zalloc(state->is_chansize, KM_SLEEP); - for (i = 0; i < state->is_num_channels; i++) { - state->is_channel[i].ic_state = state; - state->is_channel[i].ic_regs = (uint8_t *) - ((uintptr_t)state->is_genregs + - (uintptr_t)(IOAT_CHANNELREG_OFFSET * (i + 1))); - } - - /* initial the allocator (from 0 to state->is_num_channels) */ - ioat_rs_init(state, 0, state->is_num_channels, &state->is_channel_rs); - - return (DDI_SUCCESS); -} - - -/* - * ioat_channel_fini() - */ -void -ioat_channel_fini(ioat_state_t *state) -{ - ioat_rs_fini(&state->is_channel_rs); - kmem_free(state->is_channel, state->is_chansize); -} - - -/* - * ioat_channel_alloc() - * NOTE: We intentionaly don't handle DCOPY_SLEEP (if no channels are - * available) - */ -/*ARGSUSED*/ -int -ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags, - uint_t size, dcopy_query_channel_t *info, void *channel_private) -{ -#define CHANSTRSIZE 20 - struct ioat_channel_s *channel; - char chanstr[CHANSTRSIZE]; - ioat_channel_t *chan; - ioat_state_t *state; - size_t cmd_size; - uint_t chan_num; - uint32_t estat; - int e; - - - state = (ioat_state_t *)device_private; - chan = (ioat_channel_t *)channel_private; - - /* allocate a H/W channel */ - e = ioat_rs_alloc(state->is_channel_rs, &chan_num); - if (e != DDI_SUCCESS) { - return (DCOPY_NORESOURCES); - } - - channel = &state->is_channel[chan_num]; - channel->ic_inuse = B_TRUE; - channel->ic_chan_num = chan_num; - channel->ic_ver = state->is_ver; - channel->ic_dca_active = B_FALSE; - channel->ic_channel_state = IOAT_CHANNEL_OK; - channel->ic_dcopy_handle = handle; - -#ifdef DEBUG - { - /* if we're cbv2, verify that the V2 compatibility bit is set */ - uint16_t reg; - if (channel->ic_ver == IOAT_CBv2) { - reg = ddi_get16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_CHAN_COMP]); - ASSERT(reg & 0x2); - } - } -#endif - - /* - * Configure DMA channel - * Channel In Use - * Error Interrupt Enable - * Any Error Abort Enable - * Error Completion Enable - */ - ddi_put16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C); - - /* check channel error register, clear any errors */ - estat = ddi_get32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]); - if (estat != 0) { -#ifdef DEBUG - cmn_err(CE_CONT, "cleared errors (0x%x) before channel (%d) " - "enable\n", estat, channel->ic_chan_num); -#endif - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], estat); - } - - /* allocate and initialize the descriptor buf */ - e = ioat_ring_alloc(channel, size); - if (e != DDI_SUCCESS) { - goto chinitfail_desc_alloc; - } - - /* allocate and initialize the completion space */ - e = ioat_completion_alloc(channel); - if (e != DDI_SUCCESS) { - goto chinitfail_completion_alloc; - } - - /* setup kmem_cache for commands */ - cmd_size = sizeof (struct dcopy_cmd_s) + - sizeof (struct dcopy_cmd_priv_s) + - sizeof (struct ioat_cmd_private_s); - (void) snprintf(chanstr, CHANSTRSIZE, "ioat%dchan%dcmd", - state->is_instance, channel->ic_chan_num); - channel->ic_cmd_cache = kmem_cache_create(chanstr, cmd_size, 64, - NULL, NULL, NULL, NULL, NULL, 0); - if (channel->ic_cmd_cache == NULL) { - goto chinitfail_kmem_cache; - } - - /* start-up the channel */ - ioat_channel_start(channel); - - /* fill in the channel info returned to dcopy */ - info->qc_version = DCOPY_QUERY_CHANNEL_V0; - info->qc_id = state->is_deviceinfo.di_id; - info->qc_capabilities = (uint64_t)state->is_capabilities; - info->qc_channel_size = (uint64_t)size; - info->qc_chan_num = (uint64_t)channel->ic_chan_num; - if (channel->ic_ver == IOAT_CBv1) { - info->qc_dca_supported = B_FALSE; - } else { - if (info->qc_capabilities & IOAT_DMACAP_DCA) { - info->qc_dca_supported = B_TRUE; - } else { - info->qc_dca_supported = B_FALSE; - } - } - - *chan = channel; - - return (DCOPY_SUCCESS); - -chinitfail_kmem_cache: - ioat_completion_free(channel); -chinitfail_completion_alloc: - ioat_ring_free(channel); -chinitfail_desc_alloc: - return (DCOPY_FAILURE); -} - - -/* - * ioat_channel_suspend() - */ -/*ARGSUSED*/ -void -ioat_channel_suspend(ioat_state_t *state) -{ - /* - * normally you would disable interrupts and reset the H/W here. But - * since the suspend framework doesn't know who is using us, it may - * not suspend their I/O before us. Since we won't actively be doing - * any DMA or interrupts unless someone asks us to, it's safe to not - * do anything here. - */ -} - - -/* - * ioat_channel_resume() - */ -int -ioat_channel_resume(ioat_state_t *state) -{ - ioat_channel_ring_t *ring; - ioat_channel_t channel; - uint32_t estat; - int i; - - - for (i = 0; i < state->is_num_channels; i++) { - channel = &state->is_channel[i]; - ring = channel->ic_ring; - - if (!channel->ic_inuse) { - continue; - } - - /* - * Configure DMA channel - * Channel In Use - * Error Interrupt Enable - * Any Error Abort Enable - * Error Completion Enable - */ - ddi_put16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C); - - /* check channel error register, clear any errors */ - estat = ddi_get32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]); - if (estat != 0) { -#ifdef DEBUG - cmn_err(CE_CONT, "cleared errors (0x%x) before channel" - " (%d) enable\n", estat, channel->ic_chan_num); -#endif - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], - estat); - } - - /* Re-initialize the ring */ - bzero(ring->cr_desc, channel->ic_desc_alloc_size); - /* write the physical address into the chain address register */ - if (channel->ic_ver == IOAT_CBv1) { - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], - (uint32_t)(ring->cr_phys_desc & 0xffffffff)); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], - (uint32_t)(ring->cr_phys_desc >> 32)); - } else { - ASSERT(channel->ic_ver == IOAT_CBv2); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], - (uint32_t)(ring->cr_phys_desc & 0xffffffff)); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], - (uint32_t)(ring->cr_phys_desc >> 32)); - } - - /* re-initialize the completion buffer */ - bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size); - /* write the phys addr into the completion address register */ - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], - (uint32_t)(channel->ic_phys_cmpl & 0xffffffff)); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], - (uint32_t)(channel->ic_phys_cmpl >> 32)); - - /* start-up the channel */ - ioat_channel_start(channel); - - } - - return (DDI_SUCCESS); -} - - -/* - * ioat_channel_free() - */ -void -ioat_channel_free(void *channel_private) -{ - struct ioat_channel_s *channel; - ioat_channel_t *chan; - ioat_state_t *state; - uint_t chan_num; - - - chan = (ioat_channel_t *)channel_private; - channel = *chan; - - state = channel->ic_state; - chan_num = channel->ic_chan_num; - - /* disable the interrupts */ - ddi_put16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x0); - - ioat_channel_reset(channel); - - /* cleanup command cache */ - kmem_cache_destroy(channel->ic_cmd_cache); - - /* clean-up/free-up the completion space and descriptors */ - ioat_completion_free(channel); - ioat_ring_free(channel); - - channel->ic_inuse = B_FALSE; - - /* free the H/W DMA engine */ - ioat_rs_free(state->is_channel_rs, chan_num); - - *chan = NULL; -} - - -/* - * ioat_channel_intr() - */ -void -ioat_channel_intr(ioat_channel_t channel) -{ - ioat_state_t *state; - uint16_t chanctrl; - uint32_t chanerr; - uint32_t status; - - - state = channel->ic_state; - - if (channel->ic_ver == IOAT_CBv1) { - status = ddi_get32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_STS_LO]); - } else { - ASSERT(channel->ic_ver == IOAT_CBv2); - status = ddi_get32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_STS_LO]); - } - - /* if that status isn't ACTIVE or IDLE, the channel has failed */ - if (status & IOAT_CHAN_STS_FAIL_MASK) { - chanerr = ddi_get32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]); - cmn_err(CE_WARN, "channel(%d) fatal failure! " - "chanstat_lo=0x%X; chanerr=0x%X\n", - channel->ic_chan_num, status, chanerr); - channel->ic_channel_state = IOAT_CHANNEL_IN_FAILURE; - ioat_channel_reset(channel); - - return; - } - - /* - * clear interrupt disable bit if set (it's a RW1C). Read it back to - * ensure the write completes. - */ - chanctrl = ddi_get16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]); - ddi_put16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], chanctrl); - (void) ddi_get16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]); - - /* tell dcopy we have seen a completion on this channel */ - dcopy_device_channel_notify(channel->ic_dcopy_handle, DCOPY_COMPLETION); -} - - -/* - * ioat_channel_start() - */ -void -ioat_channel_start(ioat_channel_t channel) -{ - ioat_chan_dma_desc_t desc; - - /* set the first descriptor up as a NULL descriptor */ - bzero(&desc, sizeof (desc)); - desc.dd_size = 0; - desc.dd_ctrl = IOAT_DESC_CTRL_OP_DMA | IOAT_DESC_DMACTRL_NULL | - IOAT_DESC_CTRL_CMPL; - desc.dd_next_desc = 0x0; - - /* setup the very first descriptor */ - ioat_ring_seed(channel, &desc); -} - - -/* - * ioat_channel_reset() - */ -void -ioat_channel_reset(ioat_channel_t channel) -{ - ioat_state_t *state; - - state = channel->ic_state; - - /* hit the reset bit */ - if (channel->ic_ver == IOAT_CBv1) { - ddi_put8(state->is_reg_handle, - &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x20); - } else { - ASSERT(channel->ic_ver == IOAT_CBv2); - ddi_put8(state->is_reg_handle, - &channel->ic_regs[IOAT_V2_CHAN_CMD], 0x20); - } -} - - -/* - * ioat_completion_alloc() - */ -int -ioat_completion_alloc(ioat_channel_t channel) -{ - ioat_state_t *state; - size_t real_length; - uint_t cookie_cnt; - int e; - - - state = channel->ic_state; - - /* - * allocate memory for the completion status, zero it out, and get - * the paddr. We'll allocate a physically contiguous cache line. - */ - e = ddi_dma_alloc_handle(state->is_dip, &ioat_cmpl_dma_attr, - DDI_DMA_SLEEP, NULL, &channel->ic_cmpl_dma_handle); - if (e != DDI_SUCCESS) { - goto cmplallocfail_alloc_handle; - } - channel->ic_cmpl_alloc_size = 64; - e = ddi_dma_mem_alloc(channel->ic_cmpl_dma_handle, - channel->ic_cmpl_alloc_size, &ioat_acc_attr, - DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, - (caddr_t *)&channel->ic_cmpl, &real_length, - &channel->ic_cmpl_handle); - if (e != DDI_SUCCESS) { - goto cmplallocfail_mem_alloc; - } - bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size); - e = ddi_dma_addr_bind_handle(channel->ic_cmpl_dma_handle, NULL, - (caddr_t)channel->ic_cmpl, channel->ic_cmpl_alloc_size, - DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, - &channel->ic_cmpl_cookie, &cookie_cnt); - if (e != DDI_SUCCESS) { - goto cmplallocfail_addr_bind; - } - ASSERT(cookie_cnt == 1); - ASSERT(channel->ic_cmpl_cookie.dmac_size == - channel->ic_cmpl_alloc_size); - channel->ic_phys_cmpl = channel->ic_cmpl_cookie.dmac_laddress; - - /* write the physical address into the completion address register */ - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], - (uint32_t)(channel->ic_phys_cmpl & 0xffffffff)); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], - (uint32_t)(channel->ic_phys_cmpl >> 32)); - - return (DDI_SUCCESS); - -cmplallocfail_addr_bind: - ddi_dma_mem_free(&channel->ic_desc_handle); -cmplallocfail_mem_alloc: - ddi_dma_free_handle(&channel->ic_desc_dma_handle); -cmplallocfail_alloc_handle: - return (DDI_FAILURE); -} - - -/* - * ioat_completion_free() - */ -void -ioat_completion_free(ioat_channel_t channel) -{ - ioat_state_t *state; - - state = channel->ic_state; - - /* reset the completion address register */ - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], 0x0); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], 0x0); - - /* unbind, then free up the memory, dma handle */ - (void) ddi_dma_unbind_handle(channel->ic_cmpl_dma_handle); - ddi_dma_mem_free(&channel->ic_cmpl_handle); - ddi_dma_free_handle(&channel->ic_cmpl_dma_handle); -} - -/* - * ioat_ring_alloc() - */ -int -ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt) -{ - ioat_channel_ring_t *ring; - ioat_state_t *state; - size_t real_length; - uint_t cookie_cnt; - int e; - - - state = channel->ic_state; - - ring = kmem_zalloc(sizeof (ioat_channel_ring_t), KM_SLEEP); - channel->ic_ring = ring; - ring->cr_chan = channel; - ring->cr_post_cnt = 0; - - mutex_init(&ring->cr_cmpl_mutex, NULL, MUTEX_DRIVER, - channel->ic_state->is_iblock_cookie); - mutex_init(&ring->cr_desc_mutex, NULL, MUTEX_DRIVER, - channel->ic_state->is_iblock_cookie); - - /* - * allocate memory for the ring, zero it out, and get the paddr. - * We'll allocate a physically contiguous chunck of memory which - * simplifies the completion logic. - */ - e = ddi_dma_alloc_handle(state->is_dip, &ioat_desc_dma_attr, - DDI_DMA_SLEEP, NULL, &channel->ic_desc_dma_handle); - if (e != DDI_SUCCESS) { - goto ringallocfail_alloc_handle; - } - /* - * allocate one extra descriptor so we can simplify the empty/full - * logic. Then round that number up to a whole multiple of 4. - */ - channel->ic_chan_desc_cnt = ((desc_cnt + 1) + 3) & ~0x3; - ring->cr_desc_last = channel->ic_chan_desc_cnt - 1; - channel->ic_desc_alloc_size = channel->ic_chan_desc_cnt * - sizeof (ioat_chan_desc_t); - e = ddi_dma_mem_alloc(channel->ic_desc_dma_handle, - channel->ic_desc_alloc_size, &ioat_acc_attr, - DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, - (caddr_t *)&ring->cr_desc, &real_length, &channel->ic_desc_handle); - if (e != DDI_SUCCESS) { - goto ringallocfail_mem_alloc; - } - bzero(ring->cr_desc, channel->ic_desc_alloc_size); - e = ddi_dma_addr_bind_handle(channel->ic_desc_dma_handle, NULL, - (caddr_t)ring->cr_desc, channel->ic_desc_alloc_size, - DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, - &channel->ic_desc_cookies, &cookie_cnt); - if (e != DDI_SUCCESS) { - goto ringallocfail_addr_bind; - } - ASSERT(cookie_cnt == 1); - ASSERT(channel->ic_desc_cookies.dmac_size == - channel->ic_desc_alloc_size); - ring->cr_phys_desc = channel->ic_desc_cookies.dmac_laddress; - - /* write the physical address into the chain address register */ - if (channel->ic_ver == IOAT_CBv1) { - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], - (uint32_t)(ring->cr_phys_desc & 0xffffffff)); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], - (uint32_t)(ring->cr_phys_desc >> 32)); - } else { - ASSERT(channel->ic_ver == IOAT_CBv2); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], - (uint32_t)(ring->cr_phys_desc & 0xffffffff)); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], - (uint32_t)(ring->cr_phys_desc >> 32)); - } - - return (DCOPY_SUCCESS); - -ringallocfail_addr_bind: - ddi_dma_mem_free(&channel->ic_desc_handle); -ringallocfail_mem_alloc: - ddi_dma_free_handle(&channel->ic_desc_dma_handle); -ringallocfail_alloc_handle: - mutex_destroy(&ring->cr_desc_mutex); - mutex_destroy(&ring->cr_cmpl_mutex); - kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t)); - - return (DCOPY_FAILURE); -} - - -/* - * ioat_ring_free() - */ -void -ioat_ring_free(ioat_channel_t channel) -{ - ioat_state_t *state; - - - state = channel->ic_state; - - /* reset the chain address register */ - if (channel->ic_ver == IOAT_CBv1) { - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], 0x0); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], 0x0); - } else { - ASSERT(channel->ic_ver == IOAT_CBv2); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], 0x0); - ddi_put32(state->is_reg_handle, - (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], 0x0); - } - - /* unbind, then free up the memory, dma handle */ - (void) ddi_dma_unbind_handle(channel->ic_desc_dma_handle); - ddi_dma_mem_free(&channel->ic_desc_handle); - ddi_dma_free_handle(&channel->ic_desc_dma_handle); - - mutex_destroy(&channel->ic_ring->cr_desc_mutex); - mutex_destroy(&channel->ic_ring->cr_cmpl_mutex); - kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t)); - -} - - -/* - * ioat_ring_seed() - * write the first descriptor in the ring. - */ -void -ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *in_desc) -{ - ioat_channel_ring_t *ring; - ioat_chan_dma_desc_t *desc; - ioat_chan_dma_desc_t *prev; - ioat_state_t *state; - - - state = channel->ic_state; - ring = channel->ic_ring; - - /* init the completion state */ - ring->cr_cmpl_gen = 0x0; - ring->cr_cmpl_last = 0x0; - - /* write in the descriptor and init the descriptor state */ - ring->cr_post_cnt++; - channel->ic_ring->cr_desc[0] = *(ioat_chan_desc_t *)in_desc; - ring->cr_desc_gen = 0; - ring->cr_desc_prev = 0; - ring->cr_desc_next = 1; - - if (channel->ic_ver == IOAT_CBv1) { - /* hit the start bit */ - ddi_put8(state->is_reg_handle, - &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x1); - } else { - /* - * if this is CBv2, link the descriptor to an empty - * descriptor - */ - ASSERT(ring->cr_chan->ic_ver == IOAT_CBv2); - desc = (ioat_chan_dma_desc_t *) - &ring->cr_desc[ring->cr_desc_next]; - prev = (ioat_chan_dma_desc_t *) - &ring->cr_desc[ring->cr_desc_prev]; - - desc->dd_ctrl = 0; - desc->dd_next_desc = 0x0; - - prev->dd_next_desc = ring->cr_phys_desc + - (ring->cr_desc_next << 6); - - ddi_put16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT], - (uint16_t)1); - } - -} - - -/* - * ioat_cmd_alloc() - */ -int -ioat_cmd_alloc(void *private, int flags, dcopy_cmd_t *cmd) -{ - ioat_cmd_private_t *priv; - ioat_channel_t channel; - dcopy_cmd_t oldcmd; - int kmflag; - - - channel = (ioat_channel_t)private; - - if (flags & DCOPY_NOSLEEP) { - kmflag = KM_NOSLEEP; - } else { - kmflag = KM_SLEEP; - } - - /* save the command passed incase DCOPY_ALLOC_LINK is set */ - oldcmd = *cmd; - - *cmd = kmem_cache_alloc(channel->ic_cmd_cache, kmflag); - if (*cmd == NULL) { - return (DCOPY_NORESOURCES); - } - - /* setup the dcopy and ioat private state pointers */ - (*cmd)->dp_version = DCOPY_CMD_V0; - (*cmd)->dp_cmd = 0; - (*cmd)->dp_private = (struct dcopy_cmd_priv_s *) - ((uintptr_t)(*cmd) + sizeof (struct dcopy_cmd_s)); - (*cmd)->dp_private->pr_device_cmd_private = - (struct ioat_cmd_private_s *)((uintptr_t)(*cmd)->dp_private + - sizeof (struct dcopy_cmd_priv_s)); - - /* - * if DCOPY_ALLOC_LINK is set, link the old command to the new one - * just allocated. - */ - priv = (*cmd)->dp_private->pr_device_cmd_private; - if (flags & DCOPY_ALLOC_LINK) { - priv->ip_next = oldcmd; - } else { - priv->ip_next = NULL; - } - - return (DCOPY_SUCCESS); -} - - -/* - * ioat_cmd_free() - */ -void -ioat_cmd_free(void *private, dcopy_cmd_t *cmdp) -{ - ioat_cmd_private_t *priv; - ioat_channel_t channel; - dcopy_cmd_t next; - dcopy_cmd_t cmd; - - - channel = (ioat_channel_t)private; - cmd = *(cmdp); - - /* - * free all the commands in the chain (see DCOPY_ALLOC_LINK in - * ioat_cmd_alloc() for more info). - */ - while (cmd != NULL) { - priv = cmd->dp_private->pr_device_cmd_private; - next = priv->ip_next; - kmem_cache_free(channel->ic_cmd_cache, cmd); - cmd = next; - } - *cmdp = NULL; -} - - -/* - * ioat_cmd_post() - */ -int -ioat_cmd_post(void *private, dcopy_cmd_t cmd) -{ - ioat_channel_ring_t *ring; - ioat_cmd_private_t *priv; - ioat_channel_t channel; - ioat_state_t *state; - uint64_t dest_paddr; - uint64_t src_paddr; - uint64_t dest_addr; - uint32_t dest_size; - uint64_t src_addr; - uint32_t src_size; - size_t xfer_size; - uint32_t ctrl; - size_t size; - int e; - - - channel = (ioat_channel_t)private; - priv = cmd->dp_private->pr_device_cmd_private; - - state = channel->ic_state; - ring = channel->ic_ring; - - mutex_enter(&ring->cr_desc_mutex); - - /* if the channel has had a fatal failure, return failure */ - if (channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) { - mutex_exit(&ring->cr_cmpl_mutex); - return (DCOPY_FAILURE); - } - - /* make sure we have space for the descriptors */ - e = ioat_ring_reserve(channel, ring, cmd); - if (e != DCOPY_SUCCESS) { - mutex_exit(&ring->cr_cmpl_mutex); - return (DCOPY_NORESOURCES); - } - - /* if we support DCA, and the DCA flag is set, post a DCA desc */ - if ((channel->ic_ver == IOAT_CBv2) && - (cmd->dp_flags & DCOPY_CMD_DCA)) { - ioat_cmd_post_dca(ring, cmd->dp_dca_id); - } - - /* - * the dma copy may have to be broken up into multiple descriptors - * since we can't cross a page boundary. - */ - ASSERT(cmd->dp_version == DCOPY_CMD_V0); - ASSERT(cmd->dp_cmd == DCOPY_CMD_COPY); - src_addr = cmd->dp.copy.cc_source; - dest_addr = cmd->dp.copy.cc_dest; - size = cmd->dp.copy.cc_size; - while (size > 0) { - src_paddr = pa_to_ma(src_addr); - dest_paddr = pa_to_ma(dest_addr); - - /* adjust for any offset into the page */ - if ((src_addr & PAGEOFFSET) == 0) { - src_size = PAGESIZE; - } else { - src_size = PAGESIZE - (src_addr & PAGEOFFSET); - } - if ((dest_addr & PAGEOFFSET) == 0) { - dest_size = PAGESIZE; - } else { - dest_size = PAGESIZE - (dest_addr & PAGEOFFSET); - } - - /* take the smallest of the three */ - xfer_size = MIN(src_size, dest_size); - xfer_size = MIN(xfer_size, size); - - /* - * if this is the last descriptor, and we are supposed to - * generate a completion, generate a completion. same logic - * for interrupt. - */ - ctrl = 0; - if (xfer_size == size) { - if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) { - ctrl |= IOAT_DESC_CTRL_CMPL; - } - if ((cmd->dp_flags & DCOPY_CMD_INTR)) { - ctrl |= IOAT_DESC_CTRL_INTR; - } - } - - ioat_cmd_post_copy(ring, src_paddr, dest_paddr, xfer_size, - ctrl); - - /* go to the next page */ - src_addr += xfer_size; - dest_addr += xfer_size; - size -= xfer_size; - } - - /* - * if we are going to create a completion, save away the state so we - * can poll on it. - */ - if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) { - priv->ip_generation = ring->cr_desc_gen_prev; - priv->ip_index = ring->cr_desc_prev; - } - - /* if queue not defined, tell the DMA engine about it */ - if (!(cmd->dp_flags & DCOPY_CMD_QUEUE)) { - if (channel->ic_ver == IOAT_CBv1) { - ddi_put8(state->is_reg_handle, - (uint8_t *)&channel->ic_regs[IOAT_V1_CHAN_CMD], - 0x2); - } else { - ASSERT(channel->ic_ver == IOAT_CBv2); - ddi_put16(state->is_reg_handle, - (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT], - (uint16_t)(ring->cr_post_cnt & 0xFFFF)); - } - } - - mutex_exit(&ring->cr_desc_mutex); - - return (DCOPY_SUCCESS); -} - - -/* - * ioat_cmd_post_dca() - */ -static void -ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id) -{ - ioat_chan_dca_desc_t *desc; - ioat_chan_dca_desc_t *prev; - ioat_channel_t channel; - - - channel = ring->cr_chan; - desc = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_next]; - prev = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_prev]; - - /* keep track of the number of descs posted for cbv2 */ - ring->cr_post_cnt++; - - /* - * post a context change desriptor. If dca has never been used on - * this channel, or if the id doesn't match the last id used on this - * channel, set CONTEXT_CHANGE bit and dca id, set dca state to active, - * and save away the id we're using. - */ - desc->dd_ctrl = IOAT_DESC_CTRL_OP_CNTX; - desc->dd_next_desc = 0x0; - if (!channel->ic_dca_active || (channel->ic_dca_current != dca_id)) { - channel->ic_dca_active = B_TRUE; - channel->ic_dca_current = dca_id; - desc->dd_ctrl |= IOAT_DESC_CTRL_CNTX_CHNG; - desc->dd_cntx = dca_id; - } - - /* Put the descriptors physical address in the previous descriptor */ - /*LINTED:E_TRUE_LOGICAL_EXPR*/ - ASSERT(sizeof (ioat_chan_dca_desc_t) == 64); - - /* sync the current desc */ - (void) ddi_dma_sync(channel->ic_desc_dma_handle, - ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV); - - /* update the previous desc and sync it too */ - prev->dd_next_desc = ring->cr_phys_desc + - (ring->cr_desc_next << 6); - (void) ddi_dma_sync(channel->ic_desc_dma_handle, - ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV); - - /* save the current desc_next and desc_last for the completion */ - ring->cr_desc_prev = ring->cr_desc_next; - ring->cr_desc_gen_prev = ring->cr_desc_gen; - - /* increment next/gen so it points to the next free desc */ - ring->cr_desc_next++; - if (ring->cr_desc_next > ring->cr_desc_last) { - ring->cr_desc_next = 0; - ring->cr_desc_gen++; - } - - /* - * if this is CBv2, link the descriptor to an empty descriptor. Since - * we always leave on desc empty to detect full, this works out. - */ - if (ring->cr_chan->ic_ver == IOAT_CBv2) { - desc = (ioat_chan_dca_desc_t *) - &ring->cr_desc[ring->cr_desc_next]; - prev = (ioat_chan_dca_desc_t *) - &ring->cr_desc[ring->cr_desc_prev]; - desc->dd_ctrl = 0; - desc->dd_next_desc = 0x0; - - prev->dd_next_desc = ring->cr_phys_desc + - (ring->cr_desc_next << 6); - } -} - - -/* - * ioat_cmd_post_copy() - * - */ -static void -ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr, - uint64_t dest_addr, uint32_t size, uint32_t ctrl) -{ - ioat_chan_dma_desc_t *desc; - ioat_chan_dma_desc_t *prev; - ioat_channel_t channel; - - - channel = ring->cr_chan; - desc = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_next]; - prev = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev]; - - /* keep track of the number of descs posted for cbv2 */ - ring->cr_post_cnt++; - - /* write in the DMA desc */ - desc->dd_ctrl = IOAT_DESC_CTRL_OP_DMA | ctrl; - desc->dd_size = size; - desc->dd_src_paddr = src_addr; - desc->dd_dest_paddr = dest_addr; - desc->dd_next_desc = 0x0; - - /* Put the descriptors physical address in the previous descriptor */ - /*LINTED:E_TRUE_LOGICAL_EXPR*/ - ASSERT(sizeof (ioat_chan_dma_desc_t) == 64); - - /* sync the current desc */ - (void) ddi_dma_sync(channel->ic_desc_dma_handle, - ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV); - - /* update the previous desc and sync it too */ - prev->dd_next_desc = ring->cr_phys_desc + - (ring->cr_desc_next << 6); - (void) ddi_dma_sync(channel->ic_desc_dma_handle, - ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV); - - /* increment next/gen so it points to the next free desc */ - ring->cr_desc_prev = ring->cr_desc_next; - ring->cr_desc_gen_prev = ring->cr_desc_gen; - - /* increment next/gen so it points to the next free desc */ - ring->cr_desc_next++; - if (ring->cr_desc_next > ring->cr_desc_last) { - ring->cr_desc_next = 0; - ring->cr_desc_gen++; - } - - /* - * if this is CBv2, link the descriptor to an empty descriptor. Since - * we always leave on desc empty to detect full, this works out. - */ - if (ring->cr_chan->ic_ver == IOAT_CBv2) { - desc = (ioat_chan_dma_desc_t *) - &ring->cr_desc[ring->cr_desc_next]; - prev = (ioat_chan_dma_desc_t *) - &ring->cr_desc[ring->cr_desc_prev]; - desc->dd_size = 0; - desc->dd_ctrl = 0; - desc->dd_next_desc = 0x0; - - prev->dd_next_desc = ring->cr_phys_desc + - (ring->cr_desc_next << 6); - } -} - - -/* - * ioat_cmd_poll() - */ -int -ioat_cmd_poll(void *private, dcopy_cmd_t cmd) -{ - ioat_channel_ring_t *ring; - ioat_cmd_private_t *priv; - ioat_channel_t channel; - uint64_t generation; - uint64_t last_cmpl; - - - channel = (ioat_channel_t)private; - priv = cmd->dp_private->pr_device_cmd_private; - - ring = channel->ic_ring; - ASSERT(ring != NULL); - - mutex_enter(&ring->cr_cmpl_mutex); - - /* if the channel had a fatal failure, fail all polls */ - if ((channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) || - IOAT_CMPL_FAILED(channel)) { - mutex_exit(&ring->cr_cmpl_mutex); - return (DCOPY_FAILURE); - } - - /* - * if the current completion is the same as the last time we read one, - * post is still pending, nothing further to do. We track completions - * as indexes into the ring since post uses VAs and the H/W returns - * PAs. We grab a snapshot of generation and last_cmpl in the mutex. - */ - (void) ddi_dma_sync(channel->ic_cmpl_dma_handle, 0, 0, - DDI_DMA_SYNC_FORCPU); - last_cmpl = IOAT_CMPL_INDEX(channel); - if (last_cmpl != ring->cr_cmpl_last) { - /* - * if we wrapped the ring, increment the generation. Store - * the last cmpl. This logic assumes a physically contiguous - * ring. - */ - if (last_cmpl < ring->cr_cmpl_last) { - ring->cr_cmpl_gen++; - } - ring->cr_cmpl_last = last_cmpl; - generation = ring->cr_cmpl_gen; - - } else { - generation = ring->cr_cmpl_gen; - } - - mutex_exit(&ring->cr_cmpl_mutex); - - /* - * if cmd isn't passed in, well return. Useful for updating the - * consumer pointer (ring->cr_cmpl_last). - */ - if (cmd == NULL) { - return (DCOPY_PENDING); - } - - /* - * if the post's generation is old, this post has completed. No reason - * to go check the last completion. if the generation is the same - * and if the post is before or = to the last completion processed, - * the post has completed. - */ - if (priv->ip_generation < generation) { - return (DCOPY_COMPLETED); - } else if ((priv->ip_generation == generation) && - (priv->ip_index <= last_cmpl)) { - return (DCOPY_COMPLETED); - } - - return (DCOPY_PENDING); -} - - -/* - * ioat_ring_reserve() - */ -int -ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring, - dcopy_cmd_t cmd) -{ - uint64_t dest_addr; - uint32_t dest_size; - uint64_t src_addr; - uint32_t src_size; - size_t xfer_size; - uint64_t desc; - int num_desc; - size_t size; - int i; - - - /* - * figure out how many descriptors we need. This can include a dca - * desc and multiple desc for a dma copy. - */ - num_desc = 0; - if ((channel->ic_ver == IOAT_CBv2) && - (cmd->dp_flags & DCOPY_CMD_DCA)) { - num_desc++; - } - src_addr = cmd->dp.copy.cc_source; - dest_addr = cmd->dp.copy.cc_dest; - size = cmd->dp.copy.cc_size; - while (size > 0) { - num_desc++; - - /* adjust for any offset into the page */ - if ((src_addr & PAGEOFFSET) == 0) { - src_size = PAGESIZE; - } else { - src_size = PAGESIZE - (src_addr & PAGEOFFSET); - } - if ((dest_addr & PAGEOFFSET) == 0) { - dest_size = PAGESIZE; - } else { - dest_size = PAGESIZE - (dest_addr & PAGEOFFSET); - } - - /* take the smallest of the three */ - xfer_size = MIN(src_size, dest_size); - xfer_size = MIN(xfer_size, size); - - /* go to the next page */ - src_addr += xfer_size; - dest_addr += xfer_size; - size -= xfer_size; - } - - /* Make sure we have space for these descriptors */ - desc = ring->cr_desc_next; - for (i = 0; i < num_desc; i++) { - - /* - * if this is the last descriptor in the ring, see if the - * last completed descriptor is #0. - */ - if (desc == ring->cr_desc_last) { - if (ring->cr_cmpl_last == 0) { - /* - * if we think the ring is full, update where - * the H/W really is and check for full again. - */ - (void) ioat_cmd_poll(channel, NULL); - if (ring->cr_cmpl_last == 0) { - return (DCOPY_NORESOURCES); - } - } - - /* - * go to the next descriptor which is zero in this - * case. - */ - desc = 0; - - /* - * if this is not the last descriptor in the ring, see if - * the last completion we saw was the next descriptor. - */ - } else { - if ((desc + 1) == ring->cr_cmpl_last) { - /* - * if we think the ring is full, update where - * the H/W really is and check for full again. - */ - (void) ioat_cmd_poll(channel, NULL); - if ((desc + 1) == ring->cr_cmpl_last) { - return (DCOPY_NORESOURCES); - } - } - - /* go to the next descriptor */ - desc++; - } - } - - return (DCOPY_SUCCESS); -}
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,343 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/errno.h> -#include <sys/types.h> -#include <sys/conf.h> -#include <sys/kmem.h> -#include <sys/ddi.h> -#include <sys/stat.h> -#include <sys/sunddi.h> -#include <sys/file.h> -#include <sys/open.h> -#include <sys/modctl.h> -#include <sys/ddi_impldefs.h> -#include <sys/sysmacros.h> - -#include <vm/hat.h> -#include <vm/as.h> - -#include <sys/ioat.h> - - -extern void *ioat_statep; -#define ptob64(x) (((uint64_t)(x)) << PAGESHIFT) - -static int ioat_ioctl_rdreg(ioat_state_t *state, void *arg, int mode); -#ifdef DEBUG -static int ioat_ioctl_wrreg(ioat_state_t *state, void *arg, int mode); -static int ioat_ioctl_test(ioat_state_t *state, void *arg, int mode); -#endif - -/* - * ioat_ioctl() - */ -/*ARGSUSED*/ -int -ioat_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval) -{ - ioat_state_t *state; - int instance; - int e; - - - e = drv_priv(cred); - if (e != 0) { - return (EPERM); - } - instance = getminor(dev); - if (instance == -1) { - return (EBADF); - } - state = ddi_get_soft_state(ioat_statep, instance); - if (state == NULL) { - return (EBADF); - } - - switch (cmd) { - case IOAT_IOCTL_READ_REG: - e = ioat_ioctl_rdreg(state, (void *)arg, mode); - break; -#ifdef DEBUG - case IOAT_IOCTL_WRITE_REG: - e = ioat_ioctl_wrreg(state, (void *)arg, mode); - break; - case IOAT_IOCTL_TEST: - e = ioat_ioctl_test(state, (void *)arg, mode); - break; -#endif - - default: - e = ENXIO; - } - - return (e); -} - - -/* - * ioat_ioctl_rdreg() - */ -static int -ioat_ioctl_rdreg(ioat_state_t *state, void *arg, int mode) -{ - ioat_ioctl_rdreg_t rdreg; - int e; - - - e = ddi_copyin(arg, &rdreg, sizeof (ioat_ioctl_rdreg_t), mode); - if (e != 0) { - return (EFAULT); - } - - /* - * read a device register, where size is read size in bits, addr is - * the offset into MMIO registers. - */ - switch (rdreg.size) { - case 8: - rdreg.data = (uint64_t)ddi_get8(state->is_reg_handle, - (uint8_t *)&state->is_genregs[rdreg.addr]); - break; - case 16: - rdreg.data = (uint64_t)ddi_get16(state->is_reg_handle, - (uint16_t *)&state->is_genregs[rdreg.addr]); - break; - case 32: - rdreg.data = (uint64_t)ddi_get32(state->is_reg_handle, - (uint32_t *)&state->is_genregs[rdreg.addr]); - break; - case 64: - rdreg.data = (uint64_t)ddi_get64(state->is_reg_handle, - (uint64_t *)&state->is_genregs[rdreg.addr]); - break; - default: - return (EFAULT); - } - - e = ddi_copyout(&rdreg, arg, sizeof (ioat_ioctl_rdreg_t), mode); - if (e != 0) { - return (EFAULT); - } - - return (0); -} - - -#ifdef DEBUG -/* - * ioat_ioctl_wrreg() - */ -static int -ioat_ioctl_wrreg(ioat_state_t *state, void *arg, int mode) -{ - ioat_ioctl_wrreg_t wrreg; - int e; - - - e = ddi_copyin(arg, &wrreg, sizeof (ioat_ioctl_wrreg_t), mode); - if (e != 0) { - return (EFAULT); - } - - /* - * write a device register, where size is write size in bits, addr is - * the offset into MMIO registers. - */ - switch (wrreg.size) { - case 8: - ddi_put8(state->is_reg_handle, - (uint8_t *)&state->is_genregs[wrreg.addr], - (uint8_t)wrreg.data); - break; - case 16: - ddi_put16(state->is_reg_handle, - (uint16_t *)&state->is_genregs[wrreg.addr], - (uint16_t)wrreg.data); - break; - case 32: - ddi_put32(state->is_reg_handle, - (uint32_t *)&state->is_genregs[wrreg.addr], - (uint32_t)wrreg.data); - break; - case 64: - ddi_put64(state->is_reg_handle, - (uint64_t *)&state->is_genregs[wrreg.addr], - (uint64_t)wrreg.data); - break; - default: - return (EFAULT); - } - - return (0); -} - - -/* - * ioat_ioctl_test() - */ -/*ARGSUSED*/ -static int -ioat_ioctl_test(ioat_state_t *state, void *arg, int mode) -{ - dcopy_handle_t channel; - dcopy_cmd_t cmd; - uint8_t *source; - uint_t buf_size; - uint_t poll_cnt; - uint8_t *dest; - uint8_t *buf; - int flags; - int i; - int e; - - - /* allocate 2 paged aligned 4k pages */ - buf_size = 0x1000; - buf = kmem_zalloc((buf_size * 2) + 0x1000, KM_SLEEP); - source = (uint8_t *)(((uintptr_t)buf + PAGEOFFSET) & PAGEMASK); - dest = source + buf_size; - - /* Init source buffer */ - for (i = 0; i < buf_size; i++) { - source[i] = (uint8_t)(i & 0xFF); - } - - /* allocate a DMA channel */ - e = dcopy_alloc(DCOPY_SLEEP, &channel); - if (e != DCOPY_SUCCESS) { - cmn_err(CE_CONT, "dcopy_alloc() failed\n"); - goto testfail_alloc; - } - - /* - * post 32 DMA copy's from dest to dest. These will complete in order - * so they won't stomp on each other. We don't care about the data - * right now which is why we go dest to dest. - */ - flags = DCOPY_SLEEP; - for (i = 0; i < 32; i++) { - /* - * if this is the second command, link the commands from here - * on out. We only want to keep track of the last command. We - * will poll on the last command completing (which infers that - * the other commands completed). If any of the previous - * commands fail, so will the last one. Linking the commands - * also allows us to only call free for the last command. free - * will free up the entire chain of commands. - */ - if (i == 1) { - flags |= DCOPY_ALLOC_LINK; - } - e = dcopy_cmd_alloc(channel, flags, &cmd); - if (e != DCOPY_SUCCESS) { - cmn_err(CE_CONT, "dcopy_cmd_alloc() failed\n"); - goto testfail_alloc; - } - - ASSERT(cmd->dp_version == DCOPY_CMD_V0); - cmd->dp_cmd = DCOPY_CMD_COPY; - cmd->dp_flags = DCOPY_CMD_NOFLAGS; - - /* do a bunch of dest to dest DMA's */ - cmd->dp.copy.cc_source = ptob64(hat_getpfnum(kas.a_hat, - (caddr_t)source)) + ((uintptr_t)dest & PAGEOFFSET); - cmd->dp.copy.cc_dest = ptob64(hat_getpfnum(kas.a_hat, - (caddr_t)dest)) + ((uintptr_t)dest & PAGEOFFSET); - cmd->dp.copy.cc_size = PAGESIZE; - - e = dcopy_cmd_post(cmd); - if (e != DCOPY_SUCCESS) { - cmn_err(CE_CONT, "dcopy_post() failed\n"); - goto testfail_post; - } - } - - e = dcopy_cmd_alloc(channel, flags, &cmd); - if (e != DCOPY_SUCCESS) { - cmn_err(CE_CONT, "dcopy_cmd_alloc() failed\n"); - goto testfail_alloc; - } - - /* now queue up the DMA we are going to check status and data for */ - cmd->dp_cmd = DCOPY_CMD_COPY; - cmd->dp_flags = DCOPY_CMD_INTR; - cmd->dp.copy.cc_source = ptob64(hat_getpfnum(kas.a_hat, - (caddr_t)source)) + ((uintptr_t)source & PAGEOFFSET); - cmd->dp.copy.cc_dest = ptob64(hat_getpfnum(kas.a_hat, - (caddr_t)dest)) + ((uintptr_t)dest & PAGEOFFSET); - cmd->dp.copy.cc_size = PAGESIZE; - e = dcopy_cmd_post(cmd); - if (e != DCOPY_SUCCESS) { - cmn_err(CE_CONT, "dcopy_post() failed\n"); - goto testfail_post; - } - - /* check the status of the last command */ - poll_cnt = 0; - flags = DCOPY_POLL_NOFLAGS; - while ((e = dcopy_cmd_poll(cmd, flags)) == DCOPY_PENDING) { - poll_cnt++; - if (poll_cnt >= 16) { - flags |= DCOPY_POLL_BLOCK; - } - } - if (e != DCOPY_COMPLETED) { - cmn_err(CE_CONT, "dcopy_poll() failed\n"); - goto testfail_poll; - } - - /* since the cmd's are linked we only need to pass in the last cmd */ - dcopy_cmd_free(&cmd); - dcopy_free(&channel); - - /* verify the data */ - for (i = 0; i < PAGESIZE; i++) { - if (dest[i] != (uint8_t)(i & 0xFF)) { - cmn_err(CE_CONT, - "dcopy_data_compare() failed, %p[%d]: %x, %x\n", - (void *)dest, i, dest[i], i & 0xFF); - return (-1); - } - } - - kmem_free(buf, (buf_size * 2) + 0x1000); - - return (0); - -testfail_data_compare: -testfail_poll: -testfail_post: - dcopy_cmd_free(&cmd); - dcopy_free(&channel); -testfail_alloc: - kmem_free(buf, (buf_size * 2) + 0x1000); - - return (-1); -} -#endif
--- a/deleted_files/usr/src/uts/i86pc/io/ioat/ioat_rs.c Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,246 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/kmem.h> -#include <sys/types.h> -#include <sys/conf.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> - -#include <sys/ioat.h> - - -/* structure used to keep track of resources */ -typedef struct ioat_rs_s { - /* - * Bounds of resource allocation. We will start allocating at rs_min - * and rollover at rs_max+1 (rs_max is included). e.g. for rs_min=0 - * and rs_max=7, we will have 8 total resources which can be alloced. - */ - uint_t rs_min; - uint_t rs_max; - - /* - * rs_free points to an array of 64-bit values used to track resource - * allocation. rs_free_size is the free buffer size in bytes. - */ - uint64_t *rs_free; - uint_t rs_free_size; - - /* - * last tracks the last alloc'd resource. This allows us to do a round - * robin allocation. - */ - uint_t rs_last; - - kmutex_t rs_mutex; -} ioat_rs_t; - - -/* - * ioat_rs_init() - * Initialize the resource structure. This structure will be protected - * by a mutex at the iblock_cookie passed in. init() returns a handle to be - * used for the rest of the resource functions. This code is written assuming - * that min_val will be close to 0. Therefore, we will allocate the free - * buffer only taking max_val into account. - */ -void -ioat_rs_init(ioat_state_t *state, uint_t min_val, uint_t max_val, - ioat_rs_hdl_t *handle) -{ - ioat_rs_t *rstruct; - uint_t array_size; - uint_t index; - - - ASSERT(handle != NULL); - ASSERT(min_val < max_val); - - /* alloc space for resource structure */ - rstruct = kmem_alloc(sizeof (ioat_rs_t), KM_SLEEP); - - /* - * Test to see if the max value is 64-bit aligned. If so, we don't need - * to allocate an extra 64-bit word. alloc space for free buffer - * (8 bytes per uint64_t). - */ - if ((max_val & 0x3F) == 0) { - rstruct->rs_free_size = (max_val >> 6) * 8; - } else { - rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; - } - rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); - - /* Initialize resource structure */ - rstruct->rs_min = min_val; - rstruct->rs_last = min_val; - rstruct->rs_max = max_val; - mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, - state->is_iblock_cookie); - - /* Mark all resources as free */ - array_size = rstruct->rs_free_size >> 3; - for (index = 0; index < array_size; index++) { - rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; - } - - /* setup handle which is returned from this function */ - *handle = rstruct; -} - - -/* - * ioat_rs_fini() - * Frees up the space allocated in init(). Notice that a pointer to the - * handle is used for the parameter. fini() will set the handle to NULL - * before returning. - */ -void -ioat_rs_fini(ioat_rs_hdl_t *handle) -{ - ioat_rs_t *rstruct; - - - ASSERT(handle != NULL); - - rstruct = (ioat_rs_t *)*handle; - - mutex_destroy(&rstruct->rs_mutex); - kmem_free(rstruct->rs_free, rstruct->rs_free_size); - kmem_free(rstruct, sizeof (ioat_rs_t)); - - /* set handle to null. This helps catch bugs. */ - *handle = NULL; -} - - -/* - * ioat_rs_alloc() - * alloc a resource. If alloc fails, we are out of resources. - */ -int -ioat_rs_alloc(ioat_rs_hdl_t handle, uint_t *resource) -{ - ioat_rs_t *rstruct; - uint_t array_idx; - uint64_t free; - uint_t index; - uint_t last; - uint_t min; - uint_t max; - - - ASSERT(handle != NULL); - ASSERT(resource != NULL); - - rstruct = (ioat_rs_t *)handle; - - mutex_enter(&rstruct->rs_mutex); - min = rstruct->rs_min; - max = rstruct->rs_max; - - /* - * Find a free resource. This will return out of the loop once it finds - * a free resource. There are a total of 'max'-'min'+1 resources. - * Performs a round robin allocation. - */ - for (index = min; index <= max; index++) { - - array_idx = rstruct->rs_last >> 6; - free = rstruct->rs_free[array_idx]; - last = rstruct->rs_last & 0x3F; - - /* if the next resource to check is free */ - if ((free & ((uint64_t)1 << last)) != 0) { - /* we are using this resource */ - *resource = rstruct->rs_last; - - /* take it out of the free list */ - rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); - - /* - * increment the last count so we start checking the - * next resource on the next alloc(). Note the rollover - * at 'max'+1. - */ - rstruct->rs_last++; - if (rstruct->rs_last > max) { - rstruct->rs_last = rstruct->rs_min; - } - - /* unlock the resource structure */ - mutex_exit(&rstruct->rs_mutex); - - return (DDI_SUCCESS); - } - - /* - * This resource is not free, lets go to the next one. Note the - * rollover at 'max'. - */ - rstruct->rs_last++; - if (rstruct->rs_last > max) { - rstruct->rs_last = rstruct->rs_min; - } - } - - mutex_exit(&rstruct->rs_mutex); - - return (DDI_FAILURE); -} - - -/* - * ioat_rs_free() - * Free the previously alloc'd resource. Once a resource has been free'd, - * it can be used again when alloc is called. - */ -void -ioat_rs_free(ioat_rs_hdl_t handle, uint_t resource) -{ - ioat_rs_t *rstruct; - uint_t array_idx; - uint_t offset; - - - ASSERT(handle != NULL); - - rstruct = (ioat_rs_t *)handle; - ASSERT(resource >= rstruct->rs_min); - ASSERT(resource <= rstruct->rs_max); - - mutex_enter(&rstruct->rs_mutex); - - /* Put the resource back in the free list */ - array_idx = resource >> 6; - offset = resource & 0x3F; - rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); - - mutex_exit(&rstruct->rs_mutex); -}
--- a/deleted_files/usr/src/uts/i86pc/ioat/Makefile Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/i86pc/ioat/Makefile -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#pragma ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the ioat driver kernel -# module. -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = ioat -OBJECTS = $(IOAT_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(IOAT_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE) -CONF_SRCDIR = $(UTSBASE)/i86pc/io/ioat - -# -# Include common rules. -# -include $(UTSBASE)/i86pc/Makefile.i86pc - -# -# Define targets -# -ALL_TARGET = $(BINARY) $(SRC_CONFILE) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) - -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN - -# -# Dependency -# -LDFLAGS += -dy -Nmisc/dcopy - -# -# Override defaults to build a unique, local modstubs.o. -# -MODSTUBS_DIR = $(OBJS_DIR) -CLEANFILES += $(MODSTUBS_O) - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/i86pc/Makefile.targ -
--- a/deleted_files/usr/src/uts/i86pc/sys/ioat.h Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,359 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_IOAT_H -#define _SYS_IOAT_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/types.h> -#include <sys/dcopy.h> -#include <sys/dcopy_device.h> - - -/* ioat ioctls */ -#define IOATIOC ('T'<< 8) -typedef enum { - IOAT_IOCTL_WRITE_REG = (IOATIOC | 0x0), - IOAT_IOCTL_READ_REG = (IOATIOC | 0x1), - IOAT_IOCTL_TEST = (IOATIOC | 0x2) -} ioat_ioctl_enum_t; - -typedef struct ioat_ioctl_reg_s { - uint_t size; - uint_t addr; - uint64_t data; -} ioat_ioctl_reg_t; -typedef ioat_ioctl_reg_t ioat_ioctl_wrreg_t; -typedef ioat_ioctl_reg_t ioat_ioctl_rdreg_t; - -#ifdef _KERNEL -/* *** Driver Private Below *** */ - -/* IOAT_DMACAPABILITY flags */ -#define IOAT_DMACAP_PAGEBREAK 0x1 -#define IOAT_DMACAP_CRC 0x2 -#define IOAT_DMACAP_MARKERSKIP 0x4 -#define IOAT_DMACAP_XOR 0x8 -#define IOAT_DMACAP_DCA 0x10 - -/* IOAT_INTRCTL bits */ -#define IOAT_INTRCTL_MASTER_EN 0x1 -#define IOAT_INTRCTL_INTR_STAT 0x2 - -/* MMIO Registers */ -#define IOAT_CHANCNT 0x0 /* 8-bit */ -#define IOAT_XFERCAP 0x1 /* 8-bit */ -#define IOAT_GENCTRL 0x2 /* 8-bit */ -#define IOAT_INTRCTL 0x3 /* 8-bit */ -#define IOAT_ATTNSTATUS 0x4 /* 32-bit */ -#define IOAT_CBVER 0x8 /* 8-bit */ -#define IOAT_PERPORT_OFF 0xA /* 16-bit */ -#define IOAT_INTRDELAY 0xC /* 16-bit */ -#define IOAT_CSSTATUS 0xE /* 16-bit */ -#define IOAT_DMACAPABILITY 0x10 /* 32-bit */ - -#define IOAT_CHANNELREG_OFFSET 0x80 - -/* Channel Registers */ -#define IOAT_CHAN_CTL 0x0 /* 16-bit */ -#define IOAT_CHAN_COMP 0x2 /* 16-bit */ -#define IOAT_CHAN_CMPL_LO 0x18 /* 32-bit */ -#define IOAT_CHAN_CMPL_HI 0x1C /* 32-bit */ -#define IOAT_CHAN_ERR 0x28 /* 32-bit */ -#define IOAT_CHAN_ERRMASK 0x2C /* 32-bit */ -#define IOAT_CHAN_DCACTRL 0x30 /* 32-bit */ - -#define IOAT_V1_CHAN_STS_LO 0x4 /* 32-bit */ -#define IOAT_V1_CHAN_STS_HI 0x8 /* 32-bit */ -#define IOAT_V1_CHAN_ADDR_LO 0x0C /* 32-bit */ -#define IOAT_V1_CHAN_ADDR_HI 0x10 /* 32-bit */ -#define IOAT_V1_CHAN_CMD 0x14 /* 8-bit */ - -#define IOAT_V2_CHAN_CMD 0x4 /* 8-bit */ -#define IOAT_V2_CHAN_CNT 0x6 /* 16-bit */ -#define IOAT_V2_CHAN_STS_LO 0x8 /* 32-bit */ -#define IOAT_V2_CHAN_STS_HI 0xC /* 32-bit */ -#define IOAT_V2_CHAN_ADDR_LO 0x10 /* 32-bit */ -#define IOAT_V2_CHAN_ADDR_HI 0x14 /* 32-bit */ - -#define IOAT_CHAN_STS_ADDR_MASK 0xFFFFFFFFFFFFFFC0 -#define IOAT_CHAN_STS_XFER_MASK 0x3F -#define IOAT_CHAN_STS_FAIL_MASK 0x6 -#define IOAT_CMPL_INDEX(channel) \ - (((*channel->ic_cmpl & IOAT_CHAN_STS_ADDR_MASK) - \ - ring->cr_phys_desc) >> 6) -#define IOAT_CMPL_FAILED(channel) \ - (*channel->ic_cmpl & IOAT_CHAN_STS_FAIL_MASK) - - -typedef struct ioat_chan_desc_s { - uint32_t dd_res0; - uint32_t dd_ctrl; - uint64_t dd_res1; - uint64_t dd_res2; - uint64_t dd_next_desc; - uint64_t dd_res4; - uint64_t dd_res5; - uint64_t dd_res6; - uint64_t dd_res7; -} ioat_chan_desc_t; - -/* dca dd_ctrl bits */ -#define IOAT_DESC_CTRL_OP_CNTX ((uint32_t)0xFF << 24) -#define IOAT_DESC_CTRL_CNTX_CHNG 0x1 -typedef struct ioat_chan_dca_desc_s { - uint32_t dd_cntx; - uint32_t dd_ctrl; - uint64_t dd_res1; - uint64_t dd_res2; - uint64_t dd_next_desc; - uint64_t dd_res4; - uint64_t dd_res5; - uint64_t dd_res6; - uint64_t dd_res7; -} ioat_chan_dca_desc_t; - -/* dma dd_ctrl bits */ -#define IOAT_DESC_CTRL_OP_DMA (0x0 << 24) -#define IOAT_DESC_DMACTRL_NULL 0x20 -#define IOAT_DESC_CTRL_FENCE 0x10 -#define IOAT_DESC_CTRL_CMPL 0x8 -#define IOAT_DESC_CTRL_INTR 0x1 -typedef struct ioat_chan_dma_desc_s { - uint32_t dd_size; - uint32_t dd_ctrl; - uint64_t dd_src_paddr; - uint64_t dd_dest_paddr; - uint64_t dd_next_desc; - uint64_t dd_next_src_paddr; /* v2 only */ - uint64_t dd_next_dest_paddr; /* v2 only */ - uint64_t dd_res6; - uint64_t dd_res7; -} ioat_chan_dma_desc_t; - - -typedef enum { - IOAT_CBv1, - IOAT_CBv2 -} ioat_version_t; - -/* ioat private data per command */ -typedef struct ioat_cmd_private_s { - uint64_t ip_generation; - uint64_t ip_index; - dcopy_cmd_t ip_next; -} ioat_cmd_private_t; - -/* descriptor ring state */ -typedef struct ioat_channel_ring_s { - /* protects cr_cmpl_gen & cr_cmpl_last */ - kmutex_t cr_cmpl_mutex; - - /* desc ring generation for the last completion we saw */ - uint64_t cr_cmpl_gen; - - /* last descriptor index we saw complete */ - uint64_t cr_cmpl_last; - - /* protects cr_desc_* */ - kmutex_t cr_desc_mutex; - - /* - * last descriptor posted. used to update its next pointer when we - * add a new desc. Also used to tack the completion (See comment for - * cr_desc_gen_prev). - */ - uint64_t cr_desc_prev; - - /* where to put the next descriptor */ - uint64_t cr_desc_next; - - /* what the current desc ring generation is */ - uint64_t cr_desc_gen; - - /* - * used during cmd_post to track the last desc posted. cr_desc_next - * and cr_desc_gen will be pointing to the next free desc after - * writing the descriptor to the ring. But we want to track the - * completion for the last descriptor posted. - */ - uint64_t cr_desc_gen_prev; - - /* the last desc in the ring (for wrap) */ - uint64_t cr_desc_last; - - /* pointer to the head of the ring */ - ioat_chan_desc_t *cr_desc; - - /* physical address of the head of the ring */ - uint64_t cr_phys_desc; - - /* back pointer to the channel state */ - struct ioat_channel_s *cr_chan; - - /* for CB v2, number of desc posted (written to IOAT_V2_CHAN_CNT) */ - uint_t cr_post_cnt; -} ioat_channel_ring_t; - -/* track channel state so we can handle a failure */ -typedef enum { - IOAT_CHANNEL_OK = 0, - IOAT_CHANNEL_IN_FAILURE = 1 -} ic_channel_state_t; - -typedef struct ioat_channel_s *ioat_channel_t; -struct ioat_channel_s { - /* channel's ring state */ - ioat_channel_ring_t *ic_ring; - - /* IOAT_CBv1 || IOAT_CBv2 */ - ioat_version_t ic_ver; - - /* - * state to determine if it's OK to post the the channel and if all - * future polls should return failure. - */ - ic_channel_state_t ic_channel_state; - - /* channel command cache (*_cmd_alloc, *_cmd_free, etc) */ - kmem_cache_t *ic_cmd_cache; - - /* dcopy state for dcopy_device_channel_notify() call */ - dcopy_handle_t ic_dcopy_handle; - - /* location in memory where completions are DMA'ed into */ - volatile uint64_t *ic_cmpl; - - /* channel specific registers */ - uint8_t *ic_regs; - - /* if this channel is using DCA */ - boolean_t ic_dca_active; - - /* DCA ID the channel is currently pointing to */ - uint32_t ic_dca_current; - - /* devices channel number */ - uint_t ic_chan_num; - - /* number of descriptors in ring */ - uint_t ic_chan_desc_cnt; - - /* descriptor ring alloc state */ - ddi_dma_handle_t ic_desc_dma_handle; - size_t ic_desc_alloc_size; - ddi_acc_handle_t ic_desc_handle; - ddi_dma_cookie_t ic_desc_cookies; - - /* completion buffer alloc state */ - ddi_dma_handle_t ic_cmpl_dma_handle; - size_t ic_cmpl_alloc_size; - ddi_acc_handle_t ic_cmpl_handle; - ddi_dma_cookie_t ic_cmpl_cookie; - uint64_t ic_phys_cmpl; - - /* if inuse, we need to re-init the channel during resume */ - boolean_t ic_inuse; - - /* backpointer to driver state */ - struct ioat_state_s *ic_state; -}; - -typedef struct ioat_rs_s *ioat_rs_hdl_t; - -/* driver state */ -typedef struct ioat_state_s { - dev_info_t *is_dip; - int is_instance; - - kmutex_t is_mutex; - - /* register handle and pointer to registers */ - ddi_acc_handle_t is_reg_handle; - uint8_t *is_genregs; - - /* IOAT_CBv1 || IOAT_CBv2 */ - ioat_version_t is_ver; - - /* channel state */ - ioat_channel_t is_channel; - size_t is_chansize; - ioat_rs_hdl_t is_channel_rs; - - ddi_iblock_cookie_t is_iblock_cookie; - - /* device info */ - uint_t is_chanoff; - uint_t is_num_channels; - uint_t is_maxxfer; - uint_t is_cbver; - uint_t is_intrdelay; - uint_t is_status; - uint_t is_capabilities; - - /* dcopy_device_register()/dcopy_device_unregister() state */ - dcopy_device_handle_t is_device_handle; - dcopy_device_info_t is_deviceinfo; -} ioat_state_t; - - -int ioat_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, - int *rval); - -void ioat_rs_init(ioat_state_t *state, uint_t min_val, uint_t max_val, - ioat_rs_hdl_t *handle); -void ioat_rs_fini(ioat_rs_hdl_t *handle); -int ioat_rs_alloc(ioat_rs_hdl_t handle, uint_t *rs); -void ioat_rs_free(ioat_rs_hdl_t handle, uint_t rs); - -int ioat_channel_init(ioat_state_t *state); -void ioat_channel_fini(ioat_state_t *state); -void ioat_channel_suspend(ioat_state_t *state); -int ioat_channel_resume(ioat_state_t *state); - -int ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags, - uint_t size, dcopy_query_channel_t *info, void *channel_private); -void ioat_channel_free(void *channel_private); -void ioat_channel_intr(ioat_channel_t channel); -int ioat_cmd_alloc(void *channel, int flags, dcopy_cmd_t *cmd); -void ioat_cmd_free(void *channel, dcopy_cmd_t *cmd); -int ioat_cmd_post(void *channel, dcopy_cmd_t cmd); -int ioat_cmd_poll(void *channel, dcopy_cmd_t cmd); -void ioat_unregister_complete(void *device_private, int status); - - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_IOAT_H */
--- a/deleted_files/usr/src/uts/i86xpv/ioat/Makefile Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/i86xpv/ioat/Makefile -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#pragma ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the ioat driver kernel -# module. -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = ioat -OBJECTS = $(IOAT_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(IOAT_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE) -CONF_SRCDIR = $(UTSBASE)/i86pc/io/ioat - -# -# Include common rules. -# -include $(UTSBASE)/i86xpv/Makefile.i86xpv - -# -# Define targets -# -ALL_TARGET = $(BINARY) $(SRC_CONFILE) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) - -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN - -# -# Dependency -# -LDFLAGS += -dy -Nmisc/dcopy - -# -# Override defaults to build a unique, local modstubs.o. -# -MODSTUBS_DIR = $(OBJS_DIR) -CLEANFILES += $(MODSTUBS_O) - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/i86xpv/Makefile.targ -
--- a/deleted_files/usr/src/uts/intel/dcopy/Makefile Fri May 23 18:47:44 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/dcopy/Makefile -# -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the dcopy -# kernel module. -# -# intel architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = dcopy -OBJECTS = $(DCOPY_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(DCOPY_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/pkgdefs/Makefile Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/pkgdefs/Makefile Fri May 23 20:14:10 2008 -0700 @@ -125,6 +125,7 @@ SUNWgrub \ SUNWgrubS \ SUNWhxge \ + SUNWdcopy \ SUNWipw \ SUNWiwi \ SUNWiwk \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/pkgdefs/SUNWdcopy/Makefile Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +TMPLFILES += postinstall preremove +DATAFILES += depend + +.KEEP_STATE: + +all: $(FILES) +install: all pkg + +include ../Makefile.targ +include ../Makefile.prtarg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/pkgdefs/SUNWdcopy/pkginfo.tmpl Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,50 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# This required package information file describes characteristics of the +# package, such as package abbreviation, full package name, package version, +# and package architecture. +# +PKG="SUNWdcopy" +NAME="Sun dcopy DMA drivers" +ARCH="i386" +CATEGORY="system" +BASEDIR=/ +SUNW_PKGVERS="1.0" +SUNW_PKGTYPE="root" +CLASSES="none" +DESC="Sun dcopy DMA drivers" +SUNW_PRODNAME="SunOS" +SUNW_PRODVERS="RELEASE/VERSION" +VERSION="ONVERS,REV=0.0.0" +VENDOR="Sun Microsystems, Inc." +HOTLINE="Please contact your local service provider" +EMAIL="" +MAXINST="1000" +SUNW_PKG_ALLZONES="true" +SUNW_PKG_HOLLOW="true" +SUNW_PKG_THISZONE="false"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/pkgdefs/SUNWdcopy/postinstall.tmpl Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,33 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +include drv_utils + +CB1='"pciex8086,1a38" "pciex8086,360b"' +CB2='"pciex8086,402f"' + +pkg_drvadd -i "'$CB1 $CB2'" ioat || exit 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/pkgdefs/SUNWdcopy/preremove.tmpl Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,31 @@ +#!/sbin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +include drv_utils + +pkg_drvrem ioat || exit 1 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/pkgdefs/SUNWdcopy/prototype_com Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. + +#!search <pathname pathname ...> # where to find pkg objects +#!include <filename> # include another 'prototype' file +#!default <mode> <owner> <group> # default used if not specified on entry +#!<param>=<value> # puts parameter in pkg environment + +# +# packaging files +i copyright +i depend +i pkginfo +i postinstall +i preremove + +# +# source locations relative to the prototype file +# +# +# SUNWdcopy +# +d none kernel 0755 root sys +d none kernel/misc 0755 root sys +f none kernel/misc/dcopy 0755 root sys
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/pkgdefs/SUNWdcopy/prototype_i386 Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,62 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. + +#!search <pathname pathname ...> # where to find pkg objects +#!include <filename> # include another 'prototype' file +#!default <mode> <owner> <group> # default used if not specified on entry +#!<param>=<value> # puts parameter in pkg environment +# +# +# Include ISA independent files (prototype_com) +# +!include prototype_com +# +# +# List files which are i386 specific here +# +# SUNWioat +# +d none kernel/misc/amd64 0755 root sys +f none kernel/misc/amd64/dcopy 0755 root sys +d none platform 0755 root sys +d none platform/i86pc 0755 root sys +d none platform/i86pc/kernel 0755 root sys +d none platform/i86pc/kernel/drv 0755 root sys +f none platform/i86pc/kernel/drv/ioat 755 root sys +f none platform/i86pc/kernel/drv/ioat.conf 644 root sys +d none platform/i86pc/kernel/drv/amd64 0755 root sys +f none platform/i86pc/kernel/drv/amd64/ioat 755 root sys +d none platform/i86xpv 0755 root sys +d none platform/i86xpv/kernel 0755 root sys +d none platform/i86xpv/kernel/drv 0755 root sys +f none platform/i86xpv/kernel/drv/ioat 755 root sys +f none platform/i86xpv/kernel/drv/ioat.conf 644 root sys +d none platform/i86xpv/kernel/drv/amd64 0755 root sys +f none platform/i86xpv/kernel/drv/amd64/ioat 755 root sys
--- a/usr/src/pkgdefs/SUNWhea/prototype_com Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/pkgdefs/SUNWhea/prototype_com Fri May 23 20:14:10 2008 -0700 @@ -1218,6 +1218,7 @@ f none usr/include/sys/socket_impl.h 644 root bin f none usr/include/sys/socketvar.h 644 root bin f none usr/include/sys/sockio.h 644 root bin +f none usr/include/sys/sodirect.h 644 root bin f none usr/include/sys/sservice.h 644 root bin f none usr/include/sys/squeue.h 644 root bin f none usr/include/sys/squeue_impl.h 644 root bin
--- a/usr/src/uts/common/fs/sockfs/socksctp.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/fs/sockfs/socksctp.c Fri May 23 20:14:10 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -171,6 +171,8 @@ so->so_nl7c_uri = NULL; so->so_nl7c_rcv_mp = NULL; + so->so_direct = NULL; + vp = vn_alloc(kmflags); if (vp == NULL) { return (-1); @@ -204,6 +206,8 @@ struct sonode *so = &ss->ss_so; struct vnode *vp = SOTOV(so); + ASSERT(so->so_direct == NULL); + ASSERT(so->so_nl7c_flags == 0); ASSERT(so->so_nl7c_uri == NULL); ASSERT(so->so_nl7c_rcv_mp == NULL);
--- a/usr/src/uts/common/fs/sockfs/socksdp.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/fs/sockfs/socksdp.c Fri May 23 20:14:10 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -133,6 +133,8 @@ so->so_nl7c_uri = NULL; so->so_nl7c_rcv_mp = NULL; + so->so_direct = NULL; + vp = vn_alloc(kmflags); if (vp == NULL) { return (-1); @@ -159,6 +161,8 @@ struct sonode *so = &ss->ss_so; struct vnode *vp = SOTOV(so); + ASSERT(so->so_direct == NULL); + ASSERT(so->so_nl7c_flags == 0); ASSERT(so->so_nl7c_uri == NULL); ASSERT(so->so_nl7c_rcv_mp == NULL);
--- a/usr/src/uts/common/fs/sockfs/sockstr.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/fs/sockfs/sockstr.c Fri May 23 20:14:10 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -69,6 +69,8 @@ #include <c2/audit.h> +#include <sys/dcopy.h> + int so_default_version = SOV_SOCKSTREAM; #ifdef DEBUG @@ -119,6 +121,26 @@ static int tlitosyserr(int terr); /* + * Sodirect kmem_cache and put/wakeup functions. + */ +struct kmem_cache *socktpi_sod_cache; +static int sodput(sodirect_t *, mblk_t *); +static void sodwakeup(sodirect_t *); + +/* + * Called by sockinit() when sockfs is loaded. + */ +int +sostr_init() +{ + /* Allocate sodirect_t kmem_cache */ + socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache", + sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + + return (0); +} + +/* * Convert a socket to a stream. Invoked when the illusory sockmod * is popped from the stream. * Change the stream head back to default operation without losing @@ -468,6 +490,34 @@ stp->sd_qn_minpsz = 0; mutex_exit(&stp->sd_lock); + /* + * If sodirect capable allocate and initialize sodirect_t. + * Note, SS_SODIRECT is set in socktpi_open(). + */ + if (so->so_state & SS_SODIRECT) { + sodirect_t *sodp; + + ASSERT(so->so_direct == NULL); + + sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP); + sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT; + sodp->sod_want = 0; + sodp->sod_q = RD(stp->sd_wrq); + sodp->sod_enqueue = sodput; + sodp->sod_wakeup = sodwakeup; + sodp->sod_uioafh = NULL; + sodp->sod_uioaft = NULL; + sodp->sod_lock = &stp->sd_lock; + /* + * Remainder of the sod_uioa members are left uninitialized + * but will be initialized later by uioainit() before uioa + * is enabled. + */ + sodp->sod_uioa.uioa_state = UIOA_ALLOC; + so->so_direct = sodp; + stp->sd_sodirect = sodp; + } + return (0); } @@ -2872,3 +2922,121 @@ else return (tli_errs[terr]); } + +/* + * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable + * transport driver/module with an mblk_t chain. + * + * Note, we in-line putq() for the fast-path cases of q is empty, q_last and + * bp are of type M_DATA. All other cases we call putq(). + * + * On success a zero will be return, else an errno will be returned. + */ +int +sodput(sodirect_t *sodp, mblk_t *bp) +{ + queue_t *q = sodp->sod_q; + struct stdata *stp = (struct stdata *)q->q_ptr; + mblk_t *nbp; + int ret; + mblk_t *last = q->q_last; + int bytecnt = 0; + int mblkcnt = 0; + + + ASSERT(MUTEX_HELD(sodp->sod_lock)); + + if (stp->sd_flag == STREOF) { + ret = 0; + goto error; + } + + if (q->q_first == NULL) { + /* Q empty, really fast fast-path */ + bp->b_prev = NULL; + bp->b_next = NULL; + q->q_first = bp; + q->q_last = bp; + + } else if (last->b_datap->db_type == M_DATA && + bp->b_datap->db_type == M_DATA) { + /* + * Last mblk_t chain and bp are both type M_DATA so + * in-line putq() here, if the DBLK_UIOA state match + * add bp to the end of the current last chain, else + * start a new last chain with bp. + */ + if ((last->b_datap->db_flags & DBLK_UIOA) == + (bp->b_datap->db_flags & DBLK_UIOA)) { + /* Added to end */ + while ((nbp = last->b_cont) != NULL) + last = nbp; + last->b_cont = bp; + } else { + /* New last */ + last->b_next = bp; + bp->b_next = NULL; + bp->b_prev = last; + q->q_last = bp; + } + } else { + /* + * Can't use q_last so just call putq(). + */ + (void) putq(q, bp); + return (0); + } + + /* Count bytes and mblk_t's */ + do { + bytecnt += MBLKL(bp); + mblkcnt++; + } while ((bp = bp->b_cont) != NULL); + q->q_count += bytecnt; + q->q_mblkcnt += mblkcnt; + + /* Check for QFULL */ + if (q->q_count >= q->q_hiwat + sodp->sod_want || + q->q_mblkcnt >= q->q_hiwat) { + q->q_flag |= QFULL; + } + + return (0); + +error: + do { + if ((nbp = bp->b_next) != NULL) + bp->b_next = NULL; + freemsg(bp); + } while ((bp = nbp) != NULL); + + return (ret); +} + +/* + * Sockfs sodirect read wakeup. Called from a sodirect enabled transport + * driver/module to indicate that read-side data is available. + * + * On return the sodirect_t.lock mutex will be exited so this must be the + * last sodirect_t call to guarantee atomic access of *sodp. + */ +void +sodwakeup(sodirect_t *sodp) +{ + queue_t *q = sodp->sod_q; + struct stdata *stp = (struct stdata *)q->q_ptr; + + ASSERT(MUTEX_HELD(sodp->sod_lock)); + + if (stp->sd_flag & RSLEEP) { + stp->sd_flag &= ~RSLEEP; + cv_broadcast(&q->q_wait); + } + + if (stp->sd_rput_opt & SR_POLLIN) { + stp->sd_rput_opt &= ~SR_POLLIN; + mutex_exit(sodp->sod_lock); + pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM); + } else + mutex_exit(sodp->sod_lock); +}
--- a/usr/src/uts/common/fs/sockfs/socksubr.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/fs/sockfs/socksubr.c Fri May 23 20:14:10 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -44,6 +44,7 @@ #include <sys/file.h> #include <sys/open.h> #include <sys/user.h> +#include <sys/uio.h> #include <sys/termios.h> #include <sys/stream.h> #include <sys/strsubr.h> @@ -90,6 +91,7 @@ #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */ static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; +struct kmem_cache *socktpi_sod_cache; dev_t sockdev; /* For fsid in getattr */ @@ -105,6 +107,8 @@ extern void nl7c_init(void); +extern int sostr_init(); + #define ADRSTRLEN (2 * sizeof (void *) + 1) /* * kernel structure for passing the sockinfo data back up to the user. @@ -523,6 +527,15 @@ so->so_nl7c_flags = 0; } + if (so->so_direct != NULL) { + sodirect_t *sodp = so->so_direct; + + ASSERT(sodp->sod_uioafh == NULL); + + so->so_direct = NULL; + kmem_cache_free(socktpi_sod_cache, sodp); + } + ASSERT(so->so_ux_bound_vp == NULL); if ((mp = so->so_unbind_mp) != NULL) { freemsg(mp); @@ -567,6 +580,8 @@ struct sonode *so = buf; struct vnode *vp; + so->so_direct = NULL; + so->so_nl7c_flags = 0; so->so_nl7c_uri = NULL; so->so_nl7c_rcv_mp = NULL; @@ -606,6 +621,8 @@ struct sonode *so = buf; struct vnode *vp = SOTOV(so); + ASSERT(so->so_direct == NULL); + ASSERT(so->so_nl7c_flags == 0); ASSERT(so->so_nl7c_uri == NULL); ASSERT(so->so_nl7c_rcv_mp == NULL); @@ -713,6 +730,12 @@ goto failure; } + error = sostr_init(); + if (error != 0) { + err_str = NULL; + goto failure; + } + /* * Create sonode caches. We create a special one for AF_UNIX so * that we can track them for netstat(1m).
--- a/usr/src/uts/common/fs/sockfs/socktpi.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/fs/sockfs/socktpi.c Fri May 23 20:14:10 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -58,6 +58,7 @@ #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/sockio.h> +#include <sys/sodirect.h> #include <netinet/in.h> #include <sys/un.h> #include <sys/strsun.h> @@ -186,6 +187,9 @@ static int sotpi_unbind(struct sonode *, int); +extern int sodput(sodirect_t *, mblk_t *); +extern void sodwakeup(sodirect_t *); + /* TPI sockfs sonode operations */ static int sotpi_accept(struct sonode *, int, struct sonode **); static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, @@ -2910,11 +2914,13 @@ t_uscalar_t namelen; int so_state = so->so_state; /* Snapshot */ ssize_t saved_resid; - int error; rval_t rval; int flags; clock_t timout; int first; + int error = 0; + struct uio *suiop = NULL; + sodirect_t *sodp = so->so_direct; flags = msg->msg_flags; msg->msg_flags = 0; @@ -3062,6 +3068,53 @@ opflag = pflag; first = 1; + if (uiop->uio_resid >= uioasync.mincnt && + sodp != NULL && (sodp->sod_state & SOD_ENABLED) && + uioasync.enabled && !(flags & MSG_PEEK) && + !(so_state & SS_CANTRCVMORE)) { + /* + * Big enough I/O for uioa min setup and an sodirect socket + * and sodirect enabled and uioa enabled and I/O will be done + * and not EOF so initialize the sodirect_t uioa_t with "uiop". + */ + mutex_enter(sodp->sod_lock); + if (!uioainit(uiop, &sodp->sod_uioa)) { + /* + * Successful uioainit() so the uio_t part of the + * uioa_t will be used for all uio_t work to follow, + * we save the original "uiop" in "suiop". + */ + suiop = uiop; + uiop = (uio_t *)&sodp->sod_uioa; + /* + * Before returning to the caller the passed in uio_t + * "uiop" will be updated via a call to uioafini() + * below. + * + * Note, the uioa.uioa_state isn't set to UIOA_ENABLED + * here as first we have to uioamove() any currently + * queued M_DATA mblk_t(s) so it will be done in + * kstrgetmsg(). + */ + } + /* + * In either uioainit() success or not case note the number + * of uio bytes the caller wants for sod framework and/or + * transport (e.g. TCP) strategy. + */ + sodp->sod_want = uiop->uio_resid; + mutex_exit(sodp->sod_lock); + } else if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) { + /* + * No uioa but still using sodirect so note the number of + * uio bytes the caller wants for sodirect framework and/or + * transport (e.g. TCP) strategy. + * + * Note, sod_lock not held, only writer is in this function + * and only one thread at a time so not needed just to init. + */ + sodp->sod_want = uiop->uio_resid; + } retry: saved_resid = uiop->uio_resid; pri = 0; @@ -3091,10 +3144,7 @@ eprintsoline(so, error); break; } - mutex_enter(&so->so_lock); - so_unlock_read(so); /* Clear SOREADLOCKED */ - mutex_exit(&so->so_lock); - return (error); + goto out; } /* * For datagrams the MOREDATA flag is used to set MSG_TRUNC. @@ -3137,9 +3187,7 @@ pflag = opflag | MSG_NOMARK; goto retry; } - so_unlock_read(so); /* Clear SOREADLOCKED */ - mutex_exit(&so->so_lock); - return (0); + goto out_locked; } /* strsock_proto has already verified length and alignment */ @@ -3179,9 +3227,7 @@ pflag = opflag | MSG_NOMARK; goto retry; } - so_unlock_read(so); /* Clear SOREADLOCKED */ - mutex_exit(&so->so_lock); - return (0); + goto out_locked; } case T_UNITDATA_IND: { void *addr; @@ -3207,7 +3253,7 @@ freemsg(mp); error = EPROTO; eprintsoline(so, error); - goto err; + goto out; } if (so->so_family == AF_UNIX) { /* @@ -3236,7 +3282,7 @@ freemsg(mp); error = EPROTO; eprintsoline(so, error); - goto err; + goto out; } if (so->so_family == AF_UNIX) so_getopt_srcaddr(opt, optlen, &addr, &addrlen); @@ -3283,17 +3329,14 @@ msg->msg_namelen); kmem_free(control, controllen); eprintsoline(so, error); - goto err; + goto out; } msg->msg_control = control; msg->msg_controllen = controllen; } freemsg(mp); - mutex_enter(&so->so_lock); - so_unlock_read(so); /* Clear SOREADLOCKED */ - mutex_exit(&so->so_lock); - return (0); + goto out; } case T_OPTDATA_IND: { struct T_optdata_req *tdr; @@ -3322,7 +3365,7 @@ freemsg(mp); error = EPROTO; eprintsoline(so, error); - goto err; + goto out; } ncontrollen = so_cmsglen(mp, opt, optlen, @@ -3350,7 +3393,7 @@ freemsg(mp); kmem_free(control, controllen); eprintsoline(so, error); - goto err; + goto out; } msg->msg_control = control; msg->msg_controllen = controllen; @@ -3382,9 +3425,7 @@ pflag = opflag | MSG_NOMARK; goto retry; } - so_unlock_read(so); /* Clear SOREADLOCKED */ - mutex_exit(&so->so_lock); - return (0); + goto out_locked; } case T_EXDATA_IND: { dprintso(so, 1, @@ -3441,10 +3482,7 @@ eprintsoline(so, error); } #endif /* SOCK_DEBUG */ - mutex_enter(&so->so_lock); - so_unlock_read(so); /* Clear SOREADLOCKED */ - mutex_exit(&so->so_lock); - return (error); + goto out; } ASSERT(mp); tpr = (union T_primitives *)mp->b_rptr; @@ -3490,11 +3528,40 @@ freemsg(mp); error = EPROTO; eprintsoline(so, error); - goto err; + goto out; } /* NOTREACHED */ -err: +out: mutex_enter(&so->so_lock); +out_locked: + if (sodp != NULL) { + /* Finish any sodirect and uioa processing */ + mutex_enter(sodp->sod_lock); + if (suiop != NULL) { + /* Finish any uioa_t processing */ + int ret; + + ASSERT(uiop == (uio_t *)&sodp->sod_uioa); + ret = uioafini(suiop, (uioa_t *)uiop); + if (error == 0 && ret != 0) { + /* If no error yet, set it */ + error = ret; + } + if ((mp = sodp->sod_uioafh) != NULL) { + sodp->sod_uioafh = NULL; + sodp->sod_uioaft = NULL; + freemsg(mp); + } + } + if (!(sodp->sod_state & SOD_WAKE_NOT)) { + /* Awoke */ + sodp->sod_state &= SOD_WAKE_CLR; + sodp->sod_state |= SOD_WAKE_NOT; + } + /* Last, clear sod_want value */ + sodp->sod_want = 0; + mutex_exit(sodp->sod_lock); + } so_unlock_read(so); /* Clear SOREADLOCKED */ mutex_exit(&so->so_lock); return (error);
--- a/usr/src/uts/common/fs/sockfs/sockvnops.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/fs/sockfs/sockvnops.c Fri May 23 20:14:10 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -239,6 +239,10 @@ * udp case, when some other module is autopushed * above it, or for some reasons the expected module * isn't purely D_MP (which is the main requirement). + * + * Else, SS_DIRECT is valid. If the read-side Q has + * _QSODIRECT set then and uioasync is enabled then + * set SS_SODIRECT to enable sodirect. */ if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || !(_OTHERQ(tq)->q_flag & _QDIRECT)) { @@ -255,6 +259,10 @@ return (error); } } + } else if ((_OTHERQ(tq)->q_flag & _QSODIRECT) && + uioasync.enabled) { + /* Enable sodirect */ + so->so_state |= SS_SODIRECT; } } } else {
--- a/usr/src/uts/common/inet/tcp.h Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/inet/tcp.h Fri May 23 20:14:10 2008 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -37,6 +37,7 @@ #include <netinet/ip6.h> #include <netinet/tcp.h> #include <sys/socket.h> +#include <sys/sodirect.h> #include <sys/multidata.h> #include <sys/md5.h> #include <inet/common.h> @@ -598,6 +599,13 @@ */ boolean_t tcp_flow_stopped; + /* + * tcp_sodirect is used by tcp on the receive side to push mblk_t(s) + * directly to sockfs. Also, to schedule asynchronous copyout directly + * to a pending user-land uio buffer. + */ + sodirect_t *tcp_sodirect; + #ifdef DEBUG pc_t tcmp_stk[15]; #endif
--- a/usr/src/uts/common/inet/tcp/tcp.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/inet/tcp/tcp.c Fri May 23 20:14:10 2008 -0700 @@ -66,6 +66,8 @@ #include <sys/isa_defs.h> #include <sys/md5.h> #include <sys/random.h> +#include <sys/sodirect.h> +#include <sys/uio.h> #include <netinet/in.h> #include <netinet/tcp.h> #include <netinet/ip6.h> @@ -216,6 +218,23 @@ * behaviour. Once tcp_issocket is unset, its never set for the * life of that connection. * + * In support of on-board asynchronous DMA hardware (e.g. Intel I/OAT) + * two consoldiation private KAPIs are used to enqueue M_DATA mblk_t's + * directly to the socket (sodirect) and start an asynchronous copyout + * to a user-land receive-side buffer (uioa) when a blocking socket read + * (e.g. read, recv, ...) is pending. + * + * This is accomplished when tcp_issocket is set and tcp_sodirect is not + * NULL so points to an sodirect_t and if marked enabled then we enqueue + * all mblk_t's directly to the socket. + * + * Further, if the sodirect_t sod_uioa and if marked enabled (due to a + * blocking socket read, e.g. user-land read, recv, ...) then an asynchronous + * copyout will be started directly to the user-land uio buffer. Also, as we + * have a pending read, TCP's push logic can take into account the number of + * bytes to be received and only awake the blocked read()er when the uioa_t + * byte count has been satisfied. + * * IPsec notes : * * Since a packet is always executed on the correct TCP perimeter @@ -246,6 +265,37 @@ squeue_func_t tcp_squeue_wput_proc; /* + * Macros for sodirect: + * + * SOD_PTR_ENTER(tcp, sodp) - for the tcp_t pointer "tcp" set the + * sodirect_t pointer "sodp" to the socket/tcp shared sodirect_t + * if it exists and is enabled, else to NULL. Note, in the current + * sodirect implementation the sod_lock must not be held across any + * STREAMS call (e.g. putnext) else a "recursive mutex_enter" PANIC + * will result as sod_lock is the streamhead stdata.sd_lock. + * + * SOD_NOT_ENABLED(tcp) - return true if not a sodirect tcp_t or the + * sodirect_t isn't enabled, usefull for ASSERT()ing that a recieve + * side tcp code path dealing with a tcp_rcv_list or putnext() isn't + * being used when sodirect code paths should be. + */ + +#define SOD_PTR_ENTER(tcp, sodp) \ + (sodp) = (tcp)->tcp_sodirect; \ + \ + if ((sodp) != NULL) { \ + mutex_enter((sodp)->sod_lock); \ + if (!((sodp)->sod_state & SOD_ENABLED)) { \ + mutex_exit((sodp)->sod_lock); \ + (sodp) = NULL; \ + } \ + } + +#define SOD_NOT_ENABLED(tcp) \ + ((tcp)->tcp_sodirect == NULL || \ + !((tcp)->tcp_sodirect->sod_state & SOD_ENABLED)) + +/* * This controls how tiny a write must be before we try to copy it * into the the mblk on the tail of the transmit queue. Not much * speedup is observed for values larger than sixteen. Zero will @@ -3808,6 +3858,7 @@ mblk_t *mp; queue_t *q; tcp_stack_t *tcps = tcp->tcp_tcps; + sodirect_t *sodp; TCP_CLD_STAT(tag); @@ -3872,6 +3923,13 @@ return (-1); } + /* If sodirect, not anymore */ + SOD_PTR_ENTER(tcp, sodp); + if (sodp != NULL) { + tcp->tcp_sodirect = NULL; + mutex_exit(sodp->sod_lock); + } + q = tcp->tcp_rq; /* Trash all inbound data */ @@ -4236,6 +4294,11 @@ */ /* FALLTHRU */ default: + if (tcp->tcp_sodirect != NULL) { + /* Ok, no more sodirect */ + tcp->tcp_sodirect = NULL; + } + if (tcp->tcp_fused) tcp_unfuse(tcp); @@ -6381,6 +6444,15 @@ *(uint16_t *)tcp->tcp_tcph->th_lport = tcp->tcp_lport; } + if (tcp->tcp_issocket) { + /* + * TCP is _D_SODIRECT and sockfs is directly above so save + * the shared sonode sodirect_t pointer (if any) to enable + * TCP sodirect. + */ + tcp->tcp_sodirect = SOD_QTOSODP(tcp->tcp_rq); + } + switch (tcp->tcp_state) { case TCPS_IDLE: /* @@ -8190,6 +8262,9 @@ ASSERT(!tcp->tcp_kssl_pending); PRESERVE(tcp->tcp_kssl_ent); + /* Sodirect */ + tcp->tcp_sodirect = NULL; + tcp->tcp_closemp_used = B_FALSE; #ifdef DEBUG @@ -8282,6 +8357,9 @@ tcp->tcp_fuse_rcv_unread_hiwater = 0; tcp->tcp_fuse_rcv_unread_cnt = 0; + /* Sodirect */ + tcp->tcp_sodirect = NULL; + /* Initialize the header template */ if (tcp->tcp_ipversion == IPV4_VERSION) { err = tcp_header_init_ipv4(tcp); @@ -11680,6 +11758,9 @@ if (tcp->tcp_listener != NULL) return (ret); + /* Can't be sodirect enabled */ + ASSERT(SOD_NOT_ENABLED(tcp)); + /* * Handle two cases here: we are currently fused or we were * previously fused and have some urgent data to be delivered @@ -11779,6 +11860,216 @@ } /* + * The tcp_rcv_sod_XXX() functions enqueue data directly to the socket + * above, in addition when uioa is enabled schedule an asynchronous uio + * prior to enqueuing. They implement the combinhed semantics of the + * tcp_rcv_XXX() functions, tcp_rcv_list push logic, and STREAMS putnext() + * canputnext(), i.e. flow-control with backenable. + * + * tcp_sod_wakeup() is called where tcp_rcv_drain() would be called in the + * non sodirect connection but as there are no tcp_tcv_list mblk_t's we deal + * with the rcv_wnd and push timer and call the sodirect wakeup function. + * + * Must be called with sodp->sod_lock held and will return with the lock + * released. + */ +static uint_t +tcp_rcv_sod_wakeup(tcp_t *tcp, sodirect_t *sodp) +{ + queue_t *q = tcp->tcp_rq; + uint_t thwin; + tcp_stack_t *tcps = tcp->tcp_tcps; + uint_t ret = 0; + + /* Can't be an eager connection */ + ASSERT(tcp->tcp_listener == NULL); + + /* Caller must have lock held */ + ASSERT(MUTEX_HELD(sodp->sod_lock)); + + /* Sodirect mode so must not be a tcp_rcv_list */ + ASSERT(tcp->tcp_rcv_list == NULL); + + if (SOD_QFULL(sodp)) { + /* Q is full, mark Q for need backenable */ + SOD_QSETBE(sodp); + } + /* Last advertised rwnd, i.e. rwnd last sent in a packet */ + thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win)) + << tcp->tcp_rcv_ws; + /* This is peer's calculated send window (our available rwnd). */ + thwin -= tcp->tcp_rnxt - tcp->tcp_rack; + /* + * Increase the receive window to max. But we need to do receiver + * SWS avoidance. This means that we need to check the increase of + * of receive window is at least 1 MSS. + */ + if (!SOD_QFULL(sodp) && (q->q_hiwat - thwin >= tcp->tcp_mss)) { + /* + * If the window that the other side knows is less than max + * deferred acks segments, send an update immediately. + */ + if (thwin < tcp->tcp_rack_cur_max * tcp->tcp_mss) { + BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate); + ret = TH_ACK_NEEDED; + } + tcp->tcp_rwnd = q->q_hiwat; + } + + if (!SOD_QEMPTY(sodp)) { + /* Wakeup to socket */ + sodp->sod_state &= SOD_WAKE_CLR; + sodp->sod_state |= SOD_WAKE_DONE; + (sodp->sod_wakeup)(sodp); + /* wakeup() does the mutex_ext() */ + } else { + /* Q is empty, no need to wake */ + sodp->sod_state &= SOD_WAKE_CLR; + sodp->sod_state |= SOD_WAKE_NOT; + mutex_exit(sodp->sod_lock); + } + + /* No need for the push timer now. */ + if (tcp->tcp_push_tid != 0) { + (void) TCP_TIMER_CANCEL(tcp, tcp->tcp_push_tid); + tcp->tcp_push_tid = 0; + } + + return (ret); +} + +/* + * Called where tcp_rcv_enqueue()/putnext(RD(q)) would be. For M_DATA + * mblk_t's if uioa enabled then start a uioa asynchronous copy directly + * to the user-land buffer and flag the mblk_t as such. + * + * Also, handle tcp_rwnd. + */ +uint_t +tcp_rcv_sod_enqueue(tcp_t *tcp, sodirect_t *sodp, mblk_t *mp, uint_t seg_len) +{ + uioa_t *uioap = &sodp->sod_uioa; + boolean_t qfull; + uint_t thwin; + + /* Can't be an eager connection */ + ASSERT(tcp->tcp_listener == NULL); + + /* Caller must have lock held */ + ASSERT(MUTEX_HELD(sodp->sod_lock)); + + /* Sodirect mode so must not be a tcp_rcv_list */ + ASSERT(tcp->tcp_rcv_list == NULL); + + /* Passed in segment length must be equal to mblk_t chain data size */ + ASSERT(seg_len == msgdsize(mp)); + + if (DB_TYPE(mp) != M_DATA) { + /* Only process M_DATA mblk_t's */ + goto enq; + } + if (uioap->uioa_state & UIOA_ENABLED) { + /* Uioa is enabled */ + mblk_t *mp1 = mp; + + if (seg_len > uioap->uio_resid) { + /* + * There isn't enough uio space for the mblk_t chain + * so disable uioa such that this and any additional + * mblk_t data is handled by the socket and schedule + * the socket for wakeup to finish this uioa. + */ + uioap->uioa_state &= UIOA_CLR; + uioap->uioa_state |= UIOA_FINI; + if (sodp->sod_state & SOD_WAKE_NOT) { + sodp->sod_state &= SOD_WAKE_CLR; + sodp->sod_state |= SOD_WAKE_NEED; + } + goto enq; + } + do { + uint32_t len = MBLKL(mp1); + + if (!uioamove(mp1->b_rptr, len, UIO_READ, uioap)) { + /* Scheduled, mark dblk_t as such */ + DB_FLAGS(mp1) |= DBLK_UIOA; + } else { + /* Error, turn off async processing */ + uioap->uioa_state &= UIOA_CLR; + uioap->uioa_state |= UIOA_FINI; + break; + } + } while ((mp1 = mp1->b_cont) != NULL); + + if (mp1 != NULL || uioap->uio_resid == 0) { + /* + * Not all mblk_t(s) uioamoved (error) or all uio + * space has been consumed so schedule the socket + * for wakeup to finish this uio. + */ + sodp->sod_state &= SOD_WAKE_CLR; + sodp->sod_state |= SOD_WAKE_NEED; + } + } else if (uioap->uioa_state & UIOA_FINI) { + /* + * Post UIO_ENABLED waiting for socket to finish processing + * so just enqueue and update tcp_rwnd. + */ + if (SOD_QFULL(sodp)) + tcp->tcp_rwnd -= seg_len; + } else if (sodp->sod_want > 0) { + /* + * Uioa isn't enabled but sodirect has a pending read(). + */ + if (SOD_QCNT(sodp) + seg_len >= sodp->sod_want) { + if (sodp->sod_state & SOD_WAKE_NOT) { + /* Schedule socket for wakeup */ + sodp->sod_state &= SOD_WAKE_CLR; + sodp->sod_state |= SOD_WAKE_NEED; + } + tcp->tcp_rwnd -= seg_len; + } + } else if (SOD_QCNT(sodp) + seg_len >= tcp->tcp_rq->q_hiwat >> 3) { + /* + * No pending sodirect read() so used the default + * TCP push logic to guess that a push is needed. + */ + if (sodp->sod_state & SOD_WAKE_NOT) { + /* Schedule socket for wakeup */ + sodp->sod_state &= SOD_WAKE_CLR; + sodp->sod_state |= SOD_WAKE_NEED; + } + tcp->tcp_rwnd -= seg_len; + } else { + /* Just update tcp_rwnd */ + tcp->tcp_rwnd -= seg_len; + } +enq: + qfull = SOD_QFULL(sodp); + + (sodp->sod_enqueue)(sodp, mp); + + if (! qfull && SOD_QFULL(sodp)) { + /* Wasn't QFULL, now QFULL, need back-enable */ + SOD_QSETBE(sodp); + } + + /* + * Check to see if remote avail swnd < mss due to delayed ACK, + * first get advertised rwnd. + */ + thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win)); + /* Minus delayed ACK count */ + thwin -= tcp->tcp_rnxt - tcp->tcp_rack; + if (thwin < tcp->tcp_mss) { + /* Remote avail swnd < mss, need ACK now */ + return (TH_ACK_NEEDED); + } + + return (0); +} + +/* * DEFAULT TCP ENTRY POINT via squeue on READ side. * * This is the default entry function into TCP on the read side. TCP is @@ -14976,13 +15267,39 @@ tcp_rcv_enqueue(tcp, mp, seg_len); } } else { + sodirect_t *sodp = tcp->tcp_sodirect; + + /* + * If an sodirect connection and an enabled sodirect_t then + * sodp will be set to point to the tcp_t/sonode_t shared + * sodirect_t and the sodirect_t's lock will be held. + */ + if (sodp != NULL) { + mutex_enter(sodp->sod_lock); + if (!(sodp->sod_state & SOD_ENABLED)) { + mutex_exit(sodp->sod_lock); + sodp = NULL; + } else if (tcp->tcp_kssl_ctx != NULL && + DB_TYPE(mp) == M_DATA) { + mutex_exit(sodp->sod_lock); + sodp = NULL; + } + } if (mp->b_datap->db_type != M_DATA || (flags & TH_MARKNEXT_NEEDED)) { - if (tcp->tcp_rcv_list != NULL) { + if (sodp != NULL) { + if (!SOD_QEMPTY(sodp) && + (sodp->sod_state & SOD_WAKE_NOT)) { + flags |= tcp_rcv_sod_wakeup(tcp, sodp); + /* sod_wakeup() did the mutex_exit() */ + mutex_enter(sodp->sod_lock); + } + } else if (tcp->tcp_rcv_list != NULL) { flags |= tcp_rcv_drain(tcp->tcp_rq, tcp); } ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg); + if (flags & TH_MARKNEXT_NEEDED) { #ifdef DEBUG (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE, @@ -15001,10 +15318,42 @@ mblk_t *, mp); tcp_kssl_input(tcp, mp); } else { + if (sodp) { + /* + * Done with sodirect, use putnext + * to push this non M_DATA headed + * mblk_t chain. + */ + mutex_exit(sodp->sod_lock); + } putnext(tcp->tcp_rq, mp); if (!canputnext(tcp->tcp_rq)) tcp->tcp_rwnd -= seg_len; } + } else if ((tcp->tcp_kssl_ctx != NULL) && + (DB_TYPE(mp) == M_DATA)) { + /* Do SSL processing first */ + DTRACE_PROBE1(kssl_mblk__ksslinput_data2, + mblk_t *, mp); + tcp_kssl_input(tcp, mp); + } else if (sodp != NULL) { + /* + * Sodirect so all mblk_t's are queued on the + * socket directly, check for wakeup of blocked + * reader (if any), and last if flow-controled. + */ + flags |= tcp_rcv_sod_enqueue(tcp, sodp, mp, seg_len); + if ((sodp->sod_state & SOD_WAKE_NEED) || + (flags & (TH_PUSH|TH_FIN))) { + flags |= tcp_rcv_sod_wakeup(tcp, sodp); + /* sod_wakeup() did the mutex_exit() */ + } else { + if (SOD_QFULL(sodp)) { + /* Q is full, need backenable */ + SOD_QSETBE(sodp); + } + mutex_exit(sodp->sod_lock); + } } else if ((flags & (TH_PUSH|TH_FIN)) || tcp->tcp_rcv_cnt + seg_len >= tcp->tcp_rq->q_hiwat >> 3) { if (tcp->tcp_rcv_list != NULL) { @@ -15024,41 +15373,33 @@ tcp_rcv_enqueue(tcp, mp, seg_len); flags |= tcp_rcv_drain(tcp->tcp_rq, tcp); } else { - /* Does this need SSL processing first? */ - if ((tcp->tcp_kssl_ctx != NULL) && - (DB_TYPE(mp) == M_DATA)) { - DTRACE_PROBE1( - kssl_mblk__ksslinput_data2, - mblk_t *, mp); - tcp_kssl_input(tcp, mp); - } else { - putnext(tcp->tcp_rq, mp); - if (!canputnext(tcp->tcp_rq)) - tcp->tcp_rwnd -= seg_len; - } + putnext(tcp->tcp_rq, mp); + if (!canputnext(tcp->tcp_rq)) + tcp->tcp_rwnd -= seg_len; } } else { /* * Enqueue all packets when processing an mblk * from the co queue and also enqueue normal packets. - * For packets which belong to SSL stream do SSL - * processing first. - */ - if ((tcp->tcp_kssl_ctx != NULL) && - (DB_TYPE(mp) == M_DATA)) { - DTRACE_PROBE1(kssl_mblk__tcpksslin3, - mblk_t *, mp); - tcp_kssl_input(tcp, mp); - } else { - tcp_rcv_enqueue(tcp, mp, seg_len); - } + */ + tcp_rcv_enqueue(tcp, mp, seg_len); } /* * Make sure the timer is running if we have data waiting * for a push bit. This provides resiliency against * implementations that do not correctly generate push bits. - */ - if (tcp->tcp_rcv_list != NULL && tcp->tcp_push_tid == 0) { + * + * Note, for sodirect if Q isn't empty and there's not a + * pending wakeup then we need a timer. Also note that sodp + * is assumed to be still valid after exit()ing the sod_lock + * above and while the SOD state can change it can only change + * such that the Q is empty now even though data was added + * above. + */ + if (((sodp != NULL && !SOD_QEMPTY(sodp) && + (sodp->sod_state & SOD_WAKE_NOT)) || + (sodp == NULL && tcp->tcp_rcv_list != NULL)) && + tcp->tcp_push_tid == 0) { /* * The connection may be closed at this point, so don't * do anything for a detached tcp. @@ -15070,6 +15411,7 @@ tcps->tcps_push_timer_interval)); } } + xmit_check: /* Is there anything left to do? */ ASSERT(!(flags & TH_MARKNEXT_NEEDED)); @@ -15145,13 +15487,26 @@ /* * Send up any queued data and then send the mark message */ - if (tcp->tcp_rcv_list != NULL) { - flags |= tcp_rcv_drain(tcp->tcp_rq, tcp); - } - ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg); + sodirect_t *sodp; + + SOD_PTR_ENTER(tcp, sodp); mp1 = tcp->tcp_urp_mark_mp; tcp->tcp_urp_mark_mp = NULL; + if (sodp != NULL) { + + ASSERT(tcp->tcp_rcv_list == NULL); + + flags |= tcp_rcv_sod_wakeup(tcp, sodp); + /* sod_wakeup() does the mutex_exit() */ + } else if (tcp->tcp_rcv_list != NULL) { + flags |= tcp_rcv_drain(tcp->tcp_rq, tcp); + + ASSERT(tcp->tcp_rcv_list == NULL || + tcp->tcp_fused_sigurg); + + } + putnext(tcp->tcp_rq, mp1); #ifdef DEBUG (void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE, "tcp_rput: sending zero-length %s %s", @@ -15159,7 +15514,6 @@ "MSGNOTMARKNEXT"), tcp_display(tcp, NULL, DISP_PORT_ONLY)); #endif /* DEBUG */ - putnext(tcp->tcp_rq, mp1); flags &= ~TH_SEND_URP_MARK; } if (flags & TH_ACK_NEEDED) { @@ -15197,14 +15551,32 @@ * In the eager case tcp_rsrv will do this when run * after tcp_accept is done. */ + sodirect_t *sodp; + ASSERT(tcp->tcp_listener == NULL); - if (tcp->tcp_rcv_list != NULL) { + + SOD_PTR_ENTER(tcp, sodp); + if (sodp != NULL) { + /* No more sodirect */ + tcp->tcp_sodirect = NULL; + if (!SOD_QEMPTY(sodp)) { + /* Mblk(s) to process, notify */ + flags |= tcp_rcv_sod_wakeup(tcp, sodp); + /* sod_wakeup() does the mutex_exit() */ + } else { + /* Nothing to process */ + mutex_exit(sodp->sod_lock); + } + } else if (tcp->tcp_rcv_list != NULL) { /* * Push any mblk(s) enqueued from co processing. */ flags |= tcp_rcv_drain(tcp->tcp_rq, tcp); - } - ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg); + + ASSERT(tcp->tcp_rcv_list == NULL || + tcp->tcp_fused_sigurg); + } + if ((mp1 = mi_tpi_ordrel_ind()) != NULL) { tcp->tcp_ordrel_done = B_TRUE; putnext(tcp->tcp_rq, mp1); @@ -15974,6 +16346,8 @@ queue_t *q = tcp->tcp_rq; uint_t thwin; tcp_stack_t *tcps = tcp->tcp_tcps; + sodirect_t *sodp; + boolean_t fc; freeb(mp); @@ -16024,7 +16398,27 @@ return; } - if (canputnext(q)) { + SOD_PTR_ENTER(tcp, sodp); + if (sodp != NULL) { + /* An sodirect connection */ + if (SOD_QFULL(sodp)) { + /* Flow-controlled, need another back-enable */ + fc = B_TRUE; + SOD_QSETBE(sodp); + } else { + /* Not flow-controlled */ + fc = B_FALSE; + } + mutex_exit(sodp->sod_lock); + } else if (canputnext(q)) { + /* STREAMS, not flow-controlled */ + fc = B_FALSE; + } else { + /* STREAMS, flow-controlled */ + fc = B_TRUE; + } + if (!fc) { + /* Not flow-controlled, open rwnd */ tcp->tcp_rwnd = q->q_hiwat; thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win)) << tcp->tcp_rcv_ws; @@ -16043,13 +16437,32 @@ BUMP_MIB(&tcps->tcps_mib, tcpOutWinUpdate); } } + /* Handle a failure to allocate a T_ORDREL_IND here */ if (tcp->tcp_fin_rcvd && !tcp->tcp_ordrel_done) { ASSERT(tcp->tcp_listener == NULL); - if (tcp->tcp_rcv_list != NULL) { - (void) tcp_rcv_drain(q, tcp); - } - ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg); + + SOD_PTR_ENTER(tcp, sodp); + if (sodp != NULL) { + /* No more sodirect */ + tcp->tcp_sodirect = NULL; + if (!SOD_QEMPTY(sodp)) { + /* Notify mblk(s) to process */ + (void) tcp_rcv_sod_wakeup(tcp, sodp); + /* sod_wakeup() does the mutex_exit() */ + } else { + /* Nothing to process */ + mutex_exit(sodp->sod_lock); + } + } else if (tcp->tcp_rcv_list != NULL) { + /* + * Push any mblk(s) enqueued from co processing. + */ + (void) tcp_rcv_drain(tcp->tcp_rq, tcp); + ASSERT(tcp->tcp_rcv_list == NULL || + tcp->tcp_fused_sigurg); + } + mp = mi_tpi_ordrel_ind(); if (mp) { tcp->tcp_ordrel_done = B_TRUE; @@ -18097,6 +18510,8 @@ */ if (tcp->tcp_rcv_list != NULL) { /* We drain directly in case of fused tcp loopback */ + sodirect_t *sodp; + if (!tcp->tcp_fused && canputnext(q)) { tcp->tcp_rwnd = q->q_hiwat; thwin = ((uint_t)BE16_TO_U16(tcp->tcp_tcph->th_win)) @@ -18112,7 +18527,26 @@ } } - (void) tcp_rcv_drain(q, tcp); + + SOD_PTR_ENTER(tcp, sodp); + if (sodp != NULL) { + /* Sodirect, move from rcv_list */ + ASSERT(!tcp->tcp_fused); + while ((mp = tcp->tcp_rcv_list) != NULL) { + tcp->tcp_rcv_list = mp->b_next; + mp->b_next = NULL; + (void) tcp_rcv_sod_enqueue(tcp, sodp, mp, + msgdsize(mp)); + } + tcp->tcp_rcv_last_head = NULL; + tcp->tcp_rcv_last_tail = NULL; + tcp->tcp_rcv_cnt = 0; + (void) tcp_rcv_sod_wakeup(tcp, sodp); + /* sod_wakeup() did the mutex_exit() */ + } else { + /* Not sodirect, drain */ + (void) tcp_rcv_drain(q, tcp); + } /* * For fused tcp loopback, back-enable peer endpoint @@ -18304,6 +18738,21 @@ listener = eager->tcp_listener; eager->tcp_issocket = B_TRUE; + /* + * TCP is _D_SODIRECT and sockfs is directly above so + * save shared sodirect_t pointer (if any). + * + * If tcp_fused and sodirect enabled disable it. + */ + eager->tcp_sodirect = SOD_QTOSODP(eager->tcp_rq); + if (eager->tcp_fused && eager->tcp_sodirect != NULL) { + /* Fused, disable sodirect */ + mutex_enter(eager->tcp_sodirect->sod_lock); + SOD_DISABLE(eager->tcp_sodirect); + mutex_exit(eager->tcp_sodirect->sod_lock); + eager->tcp_sodirect = NULL; + } + econnp->conn_zoneid = listener->tcp_connp->conn_zoneid; econnp->conn_allzones = listener->tcp_connp->conn_allzones; ASSERT(econnp->conn_netstack == @@ -22140,6 +22589,7 @@ tcp_fuse_disable_pair(tcp, B_FALSE); } tcp->tcp_issocket = B_FALSE; + tcp->tcp_sodirect = NULL; TCP_STAT(tcps, tcp_sock_fallback); DB_TYPE(mp) = M_IOCACK; @@ -23420,6 +23870,8 @@ conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; tcp_stack_t *tcps = tcp->tcp_tcps; + uint_t flags; + sodirect_t *sodp; TCP_DBGSTAT(tcps, tcp_push_timer_cnt); @@ -23431,9 +23883,17 @@ */ TCP_FUSE_SYNCSTR_PLUG_DRAIN(tcp); tcp->tcp_push_tid = 0; - if ((tcp->tcp_rcv_list != NULL) && - (tcp_rcv_drain(tcp->tcp_rq, tcp) == TH_ACK_NEEDED)) + + SOD_PTR_ENTER(tcp, sodp); + if (sodp != NULL) { + flags = tcp_rcv_sod_wakeup(tcp, sodp); + /* sod_wakeup() does the mutex_exit() */ + } else if (tcp->tcp_rcv_list != NULL) { + flags = tcp_rcv_drain(tcp->tcp_rq, tcp); + } + if (flags == TH_ACK_NEEDED) tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); + TCP_FUSE_SYNCSTR_UNPLUG_DRAIN(tcp); }
--- a/usr/src/uts/common/inet/tcp/tcp6ddi.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/inet/tcp/tcp6ddi.c Fri May 23 20:14:10 2008 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -40,7 +40,7 @@ * for TCP Fusion (loopback); this is why we don't define * D_SYNCSTR here. */ -#define INET_DEVMTFLAGS (D_MP|_D_DIRECT) +#define INET_DEVMTFLAGS (D_MP|_D_DIRECT|_D_SODIRECT) #include "../inetddi.c"
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c Fri May 23 20:14:10 2008 -0700 @@ -287,6 +287,20 @@ if ((mp = allocb(sizeof (*stropt), BPRI_HI)) == NULL) goto failed; + /* If either tcp or peer_tcp sodirect enabled then disable */ + if (tcp->tcp_sodirect != NULL) { + mutex_enter(tcp->tcp_sodirect->sod_lock); + SOD_DISABLE(tcp->tcp_sodirect); + mutex_exit(tcp->tcp_sodirect->sod_lock); + tcp->tcp_sodirect = NULL; + } + if (peer_tcp->tcp_sodirect != NULL) { + mutex_enter(peer_tcp->tcp_sodirect->sod_lock); + SOD_DISABLE(peer_tcp->tcp_sodirect); + mutex_exit(peer_tcp->tcp_sodirect->sod_lock); + peer_tcp->tcp_sodirect = NULL; + } + /* Fuse both endpoints */ peer_tcp->tcp_loopback_peer = tcp; tcp->tcp_loopback_peer = peer_tcp;
--- a/usr/src/uts/common/inet/tcp/tcpddi.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/inet/tcp/tcpddi.c Fri May 23 20:14:10 2008 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -44,7 +44,7 @@ * for TCP Fusion (loopback); this is why we don't define * D_SYNCSTR here. */ -#define INET_DEVMTFLAGS (D_MP|_D_DIRECT) +#define INET_DEVMTFLAGS (D_MP|_D_DIRECT|_D_SODIRECT) #include "../inetddi.c"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/io/dcopy.c Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,938 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * dcopy.c + * dcopy misc module + */ + +#include <sys/conf.h> +#include <sys/kmem.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/modctl.h> +#include <sys/sysmacros.h> +#include <sys/atomic.h> + + +#include <sys/dcopy.h> +#include <sys/dcopy_device.h> + + +/* Number of entries per channel to allocate */ +uint_t dcopy_channel_size = 1024; + + +typedef struct dcopy_list_s { + list_t dl_list; + kmutex_t dl_mutex; + uint_t dl_cnt; /* num entries on list */ +} dcopy_list_t; + +/* device state for register/unregister */ +struct dcopy_device_s { + /* DMA device drivers private pointer */ + void *dc_device_private; + + /* to track list of channels from this DMA device */ + dcopy_list_t dc_devchan_list; + list_node_t dc_device_list_node; + + /* + * dc_removing_cnt track how many channels still have to be freed up + * before it's safe to allow the DMA device driver to detach. + */ + uint_t dc_removing_cnt; + dcopy_device_cb_t *dc_cb; + + dcopy_device_info_t dc_info; + +}; + +typedef struct dcopy_stats_s { + kstat_named_t cs_bytes_xfer; + kstat_named_t cs_cmd_alloc; + kstat_named_t cs_cmd_post; + kstat_named_t cs_cmd_poll; + kstat_named_t cs_notify_poll; + kstat_named_t cs_notify_pending; + kstat_named_t cs_id; + kstat_named_t cs_capabilities; +} dcopy_stats_t; + +/* DMA channel state */ +struct dcopy_channel_s { + /* DMA driver channel private pointer */ + void *ch_channel_private; + + /* shortcut to device callbacks */ + dcopy_device_cb_t *ch_cb; + + /* + * number of outstanding allocs for this channel. used to track when + * it's safe to free up this channel so the DMA device driver can + * detach. + */ + uint64_t ch_ref_cnt; + + /* state for if channel needs to be removed when ch_ref_cnt gets to 0 */ + boolean_t ch_removing; + + list_node_t ch_devchan_list_node; + list_node_t ch_globalchan_list_node; + + /* + * per channel list of commands actively blocking waiting for + * completion. + */ + dcopy_list_t ch_poll_list; + + /* pointer back to our device */ + struct dcopy_device_s *ch_device; + + dcopy_query_channel_t ch_info; + + kstat_t *ch_kstat; + dcopy_stats_t ch_stat; +}; + +/* + * If grabbing both device_list mutex & globalchan_list mutex, + * Always grab globalchan_list mutex before device_list mutex + */ +typedef struct dcopy_state_s { + dcopy_list_t d_device_list; + dcopy_list_t d_globalchan_list; +} dcopy_state_t; +dcopy_state_t *dcopy_statep; + + +/* Module Driver Info */ +static struct modlmisc dcopy_modlmisc = { + &mod_miscops, + "dcopy kernel module" +}; + +/* Module Linkage */ +static struct modlinkage dcopy_modlinkage = { + MODREV_1, + &dcopy_modlmisc, + NULL +}; + +static int dcopy_init(); +static void dcopy_fini(); + +static int dcopy_list_init(dcopy_list_t *list, size_t node_size, + offset_t link_offset); +static void dcopy_list_fini(dcopy_list_t *list); +static void dcopy_list_push(dcopy_list_t *list, void *list_node); +static void *dcopy_list_pop(dcopy_list_t *list); + +static void dcopy_device_cleanup(dcopy_device_handle_t device, + boolean_t do_callback); + +static int dcopy_stats_init(dcopy_handle_t channel); +static void dcopy_stats_fini(dcopy_handle_t channel); + + +/* + * _init() + */ +int +_init() +{ + int e; + + e = dcopy_init(); + if (e != 0) { + return (e); + } + + return (mod_install(&dcopy_modlinkage)); +} + + +/* + * _info() + */ +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&dcopy_modlinkage, modinfop)); +} + + +/* + * _fini() + */ +int +_fini() +{ + int e; + + e = mod_remove(&dcopy_modlinkage); + if (e != 0) { + return (e); + } + + dcopy_fini(); + + return (e); +} + +/* + * dcopy_init() + */ +static int +dcopy_init() +{ + int e; + + + dcopy_statep = kmem_zalloc(sizeof (*dcopy_statep), KM_SLEEP); + + /* Initialize the list we use to track device register/unregister */ + e = dcopy_list_init(&dcopy_statep->d_device_list, + sizeof (struct dcopy_device_s), + offsetof(struct dcopy_device_s, dc_device_list_node)); + if (e != DCOPY_SUCCESS) { + goto dcopyinitfail_device; + } + + /* Initialize the list we use to track all DMA channels */ + e = dcopy_list_init(&dcopy_statep->d_globalchan_list, + sizeof (struct dcopy_channel_s), + offsetof(struct dcopy_channel_s, ch_globalchan_list_node)); + if (e != DCOPY_SUCCESS) { + goto dcopyinitfail_global; + } + + return (0); + +dcopyinitfail_cback: + dcopy_list_fini(&dcopy_statep->d_globalchan_list); +dcopyinitfail_global: + dcopy_list_fini(&dcopy_statep->d_device_list); +dcopyinitfail_device: + kmem_free(dcopy_statep, sizeof (*dcopy_statep)); + + return (-1); +} + + +/* + * dcopy_fini() + */ +static void +dcopy_fini() +{ + /* + * if mod_remove was successfull, we shouldn't have any + * devices/channels to worry about. + */ + ASSERT(list_head(&dcopy_statep->d_globalchan_list.dl_list) == NULL); + ASSERT(list_head(&dcopy_statep->d_device_list.dl_list) == NULL); + + dcopy_list_fini(&dcopy_statep->d_globalchan_list); + dcopy_list_fini(&dcopy_statep->d_device_list); + kmem_free(dcopy_statep, sizeof (*dcopy_statep)); +} + + +/* *** EXTERNAL INTERFACE *** */ +/* + * dcopy_query() + */ +void +dcopy_query(dcopy_query_t *query) +{ + query->dq_version = DCOPY_QUERY_V0; + query->dq_num_channels = dcopy_statep->d_globalchan_list.dl_cnt; +} + + +/* + * dcopy_alloc() + */ +/*ARGSUSED*/ +int +dcopy_alloc(int flags, dcopy_handle_t *handle) +{ + dcopy_handle_t channel; + dcopy_list_t *list; + + + /* + * we don't use the dcopy_list_* code here because we need to due + * some non-standard stuff. + */ + + list = &dcopy_statep->d_globalchan_list; + + /* + * if nothing is on the channel list, return DCOPY_NORESOURCES. This + * can happen if there aren't any DMA device registered. + */ + mutex_enter(&list->dl_mutex); + channel = list_head(&list->dl_list); + if (channel == NULL) { + mutex_exit(&list->dl_mutex); + return (DCOPY_NORESOURCES); + } + + /* + * increment the reference count, and pop the channel off the head and + * push it on the tail. This ensures we rotate through the channels. + * DMA channels are shared. + */ + channel->ch_ref_cnt++; + list_remove(&list->dl_list, channel); + list_insert_tail(&list->dl_list, channel); + mutex_exit(&list->dl_mutex); + + *handle = (dcopy_handle_t)channel; + return (DCOPY_SUCCESS); +} + + +/* + * dcopy_free() + */ +void +dcopy_free(dcopy_handle_t *channel) +{ + dcopy_device_handle_t device; + dcopy_list_t *list; + boolean_t cleanup; + + + ASSERT(*channel != NULL); + + /* + * we don't need to add the channel back to the list since we never + * removed it. decrement the reference count. + */ + list = &dcopy_statep->d_globalchan_list; + mutex_enter(&list->dl_mutex); + (*channel)->ch_ref_cnt--; + + /* + * if we need to remove this channel, and the reference count is down + * to 0, decrement the number of channels which still need to be + * removed on the device. + */ + if ((*channel)->ch_removing && ((*channel)->ch_ref_cnt == 0)) { + cleanup = B_FALSE; + device = (*channel)->ch_device; + mutex_enter(&device->dc_devchan_list.dl_mutex); + device->dc_removing_cnt--; + if (device->dc_removing_cnt == 0) { + cleanup = B_TRUE; + } + mutex_exit(&device->dc_devchan_list.dl_mutex); + } + mutex_exit(&list->dl_mutex); + + /* + * if there are no channels which still need to be removed, cleanup the + * device state and call back into the DMA device driver to tell them + * the device is free. + */ + if (cleanup) { + dcopy_device_cleanup(device, B_TRUE); + } + + *channel = NULL; +} + + +/* + * dcopy_query_channel() + */ +void +dcopy_query_channel(dcopy_handle_t channel, dcopy_query_channel_t *query) +{ + *query = channel->ch_info; +} + + +/* + * dcopy_cmd_alloc() + */ +int +dcopy_cmd_alloc(dcopy_handle_t handle, int flags, dcopy_cmd_t *cmd) +{ + dcopy_handle_t channel; + dcopy_cmd_priv_t priv; + int e; + + + channel = handle; + + atomic_inc_64(&channel->ch_stat.cs_cmd_alloc.value.ui64); + e = channel->ch_cb->cb_cmd_alloc(channel->ch_channel_private, flags, + cmd); + if (e == DCOPY_SUCCESS) { + priv = (*cmd)->dp_private; + priv->pr_channel = channel; + /* + * we won't initialize the blocking state until we actually + * need to block. + */ + priv->pr_block_init = B_FALSE; + } + + return (e); +} + + +/* + * dcopy_cmd_free() + */ +void +dcopy_cmd_free(dcopy_cmd_t *cmd) +{ + dcopy_handle_t channel; + dcopy_cmd_priv_t priv; + + + ASSERT(*cmd != NULL); + + priv = (*cmd)->dp_private; + channel = priv->pr_channel; + + /* if we initialized the blocking state, clean it up too */ + if (priv->pr_block_init) { + cv_destroy(&priv->pr_cv); + mutex_destroy(&priv->pr_mutex); + } + + channel->ch_cb->cb_cmd_free(channel->ch_channel_private, cmd); +} + + +/* + * dcopy_cmd_post() + */ +int +dcopy_cmd_post(dcopy_cmd_t cmd) +{ + dcopy_handle_t channel; + int e; + + + channel = cmd->dp_private->pr_channel; + + atomic_inc_64(&channel->ch_stat.cs_cmd_post.value.ui64); + if (cmd->dp_cmd == DCOPY_CMD_COPY) { + atomic_add_64(&channel->ch_stat.cs_bytes_xfer.value.ui64, + cmd->dp.copy.cc_size); + } + e = channel->ch_cb->cb_cmd_post(channel->ch_channel_private, cmd); + if (e != DCOPY_SUCCESS) { + return (e); + } + + return (DCOPY_SUCCESS); +} + + +/* + * dcopy_cmd_poll() + */ +int +dcopy_cmd_poll(dcopy_cmd_t cmd, int flags) +{ + dcopy_handle_t channel; + dcopy_cmd_priv_t priv; + int e; + + + priv = cmd->dp_private; + channel = priv->pr_channel; + + /* + * if the caller is trying to block, they needed to post the + * command with DCOPY_CMD_INTR set. + */ + if ((flags & DCOPY_POLL_BLOCK) && !(cmd->dp_flags & DCOPY_CMD_INTR)) { + return (DCOPY_FAILURE); + } + + atomic_inc_64(&channel->ch_stat.cs_cmd_poll.value.ui64); + +repoll: + e = channel->ch_cb->cb_cmd_poll(channel->ch_channel_private, cmd); + if (e == DCOPY_PENDING) { + /* + * if the command is still active, and the blocking flag + * is set. + */ + if (flags & DCOPY_POLL_BLOCK) { + + /* + * if we haven't initialized the state, do it now. A + * command can be re-used, so it's possible it's + * already been initialized. + */ + if (!priv->pr_block_init) { + priv->pr_block_init = B_TRUE; + mutex_init(&priv->pr_mutex, NULL, MUTEX_DRIVER, + NULL); + cv_init(&priv->pr_cv, NULL, CV_DRIVER, NULL); + priv->pr_cmd = cmd; + } + + /* push it on the list for blocking commands */ + priv->pr_wait = B_TRUE; + dcopy_list_push(&channel->ch_poll_list, priv); + + mutex_enter(&priv->pr_mutex); + /* + * it's possible we already cleared pr_wait before we + * grabbed the mutex. + */ + if (priv->pr_wait) { + cv_wait(&priv->pr_cv, &priv->pr_mutex); + } + mutex_exit(&priv->pr_mutex); + + /* + * the command has completed, go back and poll so we + * get the status. + */ + goto repoll; + } + } + + return (e); +} + +/* *** END OF EXTERNAL INTERFACE *** */ + +/* + * dcopy_list_init() + */ +static int +dcopy_list_init(dcopy_list_t *list, size_t node_size, offset_t link_offset) +{ + mutex_init(&list->dl_mutex, NULL, MUTEX_DRIVER, NULL); + list_create(&list->dl_list, node_size, link_offset); + list->dl_cnt = 0; + + return (DCOPY_SUCCESS); +} + + +/* + * dcopy_list_fini() + */ +static void +dcopy_list_fini(dcopy_list_t *list) +{ + list_destroy(&list->dl_list); + mutex_destroy(&list->dl_mutex); +} + + +/* + * dcopy_list_push() + */ +static void +dcopy_list_push(dcopy_list_t *list, void *list_node) +{ + mutex_enter(&list->dl_mutex); + list_insert_tail(&list->dl_list, list_node); + list->dl_cnt++; + mutex_exit(&list->dl_mutex); +} + + +/* + * dcopy_list_pop() + */ +static void * +dcopy_list_pop(dcopy_list_t *list) +{ + list_node_t *list_node; + + mutex_enter(&list->dl_mutex); + list_node = list_head(&list->dl_list); + if (list_node == NULL) { + mutex_exit(&list->dl_mutex); + return (list_node); + } + list->dl_cnt--; + list_remove(&list->dl_list, list_node); + mutex_exit(&list->dl_mutex); + + return (list_node); +} + + +/* *** DEVICE INTERFACE *** */ +/* + * dcopy_device_register() + */ +int +dcopy_device_register(void *device_private, dcopy_device_info_t *info, + dcopy_device_handle_t *handle) +{ + struct dcopy_channel_s *channel; + struct dcopy_device_s *device; + int e; + int i; + + + /* initialize the per device state */ + device = kmem_zalloc(sizeof (*device), KM_SLEEP); + device->dc_device_private = device_private; + device->dc_info = *info; + device->dc_removing_cnt = 0; + device->dc_cb = info->di_cb; + + /* + * we have a per device channel list so we can remove a device in the + * future. + */ + e = dcopy_list_init(&device->dc_devchan_list, + sizeof (struct dcopy_channel_s), + offsetof(struct dcopy_channel_s, ch_devchan_list_node)); + if (e != DCOPY_SUCCESS) { + goto registerfail_devchan; + } + + /* + * allocate state for each channel, allocate the channel, and then add + * the devices dma channels to the devices channel list. + */ + for (i = 0; i < info->di_num_dma; i++) { + channel = kmem_zalloc(sizeof (*channel), KM_SLEEP); + channel->ch_device = device; + channel->ch_removing = B_FALSE; + channel->ch_ref_cnt = 0; + channel->ch_cb = info->di_cb; + + e = info->di_cb->cb_channel_alloc(device_private, channel, + DCOPY_SLEEP, dcopy_channel_size, &channel->ch_info, + &channel->ch_channel_private); + if (e != DCOPY_SUCCESS) { + kmem_free(channel, sizeof (*channel)); + goto registerfail_alloc; + } + + e = dcopy_stats_init(channel); + if (e != DCOPY_SUCCESS) { + info->di_cb->cb_channel_free( + &channel->ch_channel_private); + kmem_free(channel, sizeof (*channel)); + goto registerfail_alloc; + } + + e = dcopy_list_init(&channel->ch_poll_list, + sizeof (struct dcopy_cmd_priv_s), + offsetof(struct dcopy_cmd_priv_s, pr_poll_list_node)); + if (e != DCOPY_SUCCESS) { + dcopy_stats_fini(channel); + info->di_cb->cb_channel_free( + &channel->ch_channel_private); + kmem_free(channel, sizeof (*channel)); + goto registerfail_alloc; + } + + dcopy_list_push(&device->dc_devchan_list, channel); + } + + /* add the device to device list */ + dcopy_list_push(&dcopy_statep->d_device_list, device); + + /* + * add the device's dma channels to the global channel list (where + * dcopy_alloc's come from) + */ + mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex); + mutex_enter(&dcopy_statep->d_device_list.dl_mutex); + channel = list_head(&device->dc_devchan_list.dl_list); + while (channel != NULL) { + list_insert_tail(&dcopy_statep->d_globalchan_list.dl_list, + channel); + dcopy_statep->d_globalchan_list.dl_cnt++; + channel = list_next(&device->dc_devchan_list.dl_list, channel); + } + mutex_exit(&dcopy_statep->d_device_list.dl_mutex); + mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex); + + *handle = device; + + /* last call-back into kernel for dcopy KAPI enabled */ + uioa_dcopy_enable(); + + return (DCOPY_SUCCESS); + +registerfail_alloc: + channel = list_head(&device->dc_devchan_list.dl_list); + while (channel != NULL) { + /* remove from the list */ + channel = dcopy_list_pop(&device->dc_devchan_list); + ASSERT(channel != NULL); + + dcopy_list_fini(&channel->ch_poll_list); + dcopy_stats_fini(channel); + info->di_cb->cb_channel_free(&channel->ch_channel_private); + kmem_free(channel, sizeof (*channel)); + } + + dcopy_list_fini(&device->dc_devchan_list); +registerfail_devchan: + kmem_free(device, sizeof (*device)); + + return (DCOPY_FAILURE); +} + + +/* + * dcopy_device_unregister() + */ +/*ARGSUSED*/ +int +dcopy_device_unregister(dcopy_device_handle_t *handle) +{ + struct dcopy_channel_s *channel; + dcopy_device_handle_t device; + boolean_t device_busy; + + /* first call-back into kernel for dcopy KAPI disable */ + uioa_dcopy_disable(); + + device = *handle; + device_busy = B_FALSE; + + /* + * remove the devices dma channels from the global channel list (where + * dcopy_alloc's come from) + */ + mutex_enter(&dcopy_statep->d_globalchan_list.dl_mutex); + mutex_enter(&device->dc_devchan_list.dl_mutex); + channel = list_head(&device->dc_devchan_list.dl_list); + while (channel != NULL) { + /* + * if the channel has outstanding allocs, mark it as having + * to be removed and increment the number of channels which + * need to be removed in the device state too. + */ + if (channel->ch_ref_cnt != 0) { + channel->ch_removing = B_TRUE; + device_busy = B_TRUE; + device->dc_removing_cnt++; + } + dcopy_statep->d_globalchan_list.dl_cnt--; + list_remove(&dcopy_statep->d_globalchan_list.dl_list, channel); + channel = list_next(&device->dc_devchan_list.dl_list, channel); + } + mutex_exit(&device->dc_devchan_list.dl_mutex); + mutex_exit(&dcopy_statep->d_globalchan_list.dl_mutex); + + /* + * if there are channels which still need to be removed, we will clean + * up the device state after they are freed up. + */ + if (device_busy) { + return (DCOPY_PENDING); + } + + dcopy_device_cleanup(device, B_FALSE); + + *handle = NULL; + return (DCOPY_SUCCESS); +} + + +/* + * dcopy_device_cleanup() + */ +static void +dcopy_device_cleanup(dcopy_device_handle_t device, boolean_t do_callback) +{ + struct dcopy_channel_s *channel; + + /* + * remove all the channels in the device list, free them, and clean up + * the state. + */ + mutex_enter(&dcopy_statep->d_device_list.dl_mutex); + channel = list_head(&device->dc_devchan_list.dl_list); + while (channel != NULL) { + device->dc_devchan_list.dl_cnt--; + list_remove(&device->dc_devchan_list.dl_list, channel); + dcopy_list_fini(&channel->ch_poll_list); + dcopy_stats_fini(channel); + channel->ch_cb->cb_channel_free(&channel->ch_channel_private); + kmem_free(channel, sizeof (*channel)); + channel = list_head(&device->dc_devchan_list.dl_list); + } + + /* remove it from the list of devices */ + list_remove(&dcopy_statep->d_device_list.dl_list, device); + + mutex_exit(&dcopy_statep->d_device_list.dl_mutex); + + /* + * notify the DMA device driver that the device is free to be + * detached. + */ + if (do_callback) { + device->dc_cb->cb_unregister_complete( + device->dc_device_private, DCOPY_SUCCESS); + } + + dcopy_list_fini(&device->dc_devchan_list); + kmem_free(device, sizeof (*device)); +} + + +/* + * dcopy_device_channel_notify() + */ +/*ARGSUSED*/ +void +dcopy_device_channel_notify(dcopy_handle_t handle, int status) +{ + struct dcopy_channel_s *channel; + dcopy_list_t *poll_list; + dcopy_cmd_priv_t priv; + int e; + + + ASSERT(status == DCOPY_COMPLETION); + channel = handle; + + poll_list = &channel->ch_poll_list; + + /* + * when we get a completion notification from the device, go through + * all of the commands blocking on this channel and see if they have + * completed. Remove the command and wake up the block thread if they + * have. Once we hit a command which is still pending, we are done + * polling since commands in a channel complete in order. + */ + mutex_enter(&poll_list->dl_mutex); + if (poll_list->dl_cnt != 0) { + priv = list_head(&poll_list->dl_list); + while (priv != NULL) { + atomic_inc_64(&channel-> + ch_stat.cs_notify_poll.value.ui64); + e = channel->ch_cb->cb_cmd_poll( + channel->ch_channel_private, + priv->pr_cmd); + if (e == DCOPY_PENDING) { + atomic_inc_64(&channel-> + ch_stat.cs_notify_pending.value.ui64); + break; + } + + poll_list->dl_cnt--; + list_remove(&poll_list->dl_list, priv); + + mutex_enter(&priv->pr_mutex); + priv->pr_wait = B_FALSE; + cv_signal(&priv->pr_cv); + mutex_exit(&priv->pr_mutex); + + priv = list_head(&poll_list->dl_list); + } + } + + mutex_exit(&poll_list->dl_mutex); +} + + +/* + * dcopy_stats_init() + */ +static int +dcopy_stats_init(dcopy_handle_t channel) +{ +#define CHANSTRSIZE 20 + char chanstr[CHANSTRSIZE]; + dcopy_stats_t *stats; + int instance; + char *name; + + + stats = &channel->ch_stat; + name = (char *)ddi_driver_name(channel->ch_device->dc_info.di_dip); + instance = ddi_get_instance(channel->ch_device->dc_info.di_dip); + + (void) snprintf(chanstr, CHANSTRSIZE, "channel%d", + (uint32_t)channel->ch_info.qc_chan_num); + + channel->ch_kstat = kstat_create(name, instance, chanstr, "misc", + KSTAT_TYPE_NAMED, sizeof (dcopy_stats_t) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + if (channel->ch_kstat == NULL) { + return (DCOPY_FAILURE); + } + channel->ch_kstat->ks_data = stats; + + kstat_named_init(&stats->cs_bytes_xfer, "bytes_xfer", + KSTAT_DATA_UINT64); + kstat_named_init(&stats->cs_cmd_alloc, "cmd_alloc", + KSTAT_DATA_UINT64); + kstat_named_init(&stats->cs_cmd_post, "cmd_post", + KSTAT_DATA_UINT64); + kstat_named_init(&stats->cs_cmd_poll, "cmd_poll", + KSTAT_DATA_UINT64); + kstat_named_init(&stats->cs_notify_poll, "notify_poll", + KSTAT_DATA_UINT64); + kstat_named_init(&stats->cs_notify_pending, "notify_pending", + KSTAT_DATA_UINT64); + kstat_named_init(&stats->cs_id, "id", + KSTAT_DATA_UINT64); + kstat_named_init(&stats->cs_capabilities, "capabilities", + KSTAT_DATA_UINT64); + + kstat_install(channel->ch_kstat); + + channel->ch_stat.cs_id.value.ui64 = channel->ch_info.qc_id; + channel->ch_stat.cs_capabilities.value.ui64 = + channel->ch_info.qc_capabilities; + + return (DCOPY_SUCCESS); +} + + +/* + * dcopy_stats_fini() + */ +static void +dcopy_stats_fini(dcopy_handle_t channel) +{ + kstat_delete(channel->ch_kstat); +} +/* *** END OF DEVICE INTERFACE *** */
--- a/usr/src/uts/common/io/stream.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/io/stream.c Fri May 23 20:14:10 2008 -0700 @@ -23,7 +23,7 @@ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -318,8 +318,8 @@ int offset; mblk_cache = kmem_cache_create("streams_mblk", - sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL, - mblk_kmem_flags); + sizeof (mblk_t), 32, NULL, NULL, NULL, NULL, NULL, + mblk_kmem_flags); for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) { @@ -330,7 +330,7 @@ */ tot_size = size + sizeof (dblk_t); ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t)) - < PAGESIZE); + < PAGESIZE); ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0); } else { @@ -346,9 +346,9 @@ (void) sprintf(name, "streams_dblk_%ld", size); cp = kmem_cache_create(name, tot_size, - DBLK_CACHE_ALIGN, dblk_constructor, - dblk_destructor, NULL, - (void *)(size), NULL, dblk_kmem_flags); + DBLK_CACHE_ALIGN, dblk_constructor, + dblk_destructor, NULL, + (void *)(size), NULL, dblk_kmem_flags); while (lastsize <= size) { dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp; @@ -357,13 +357,13 @@ } dblk_esb_cache = kmem_cache_create("streams_dblk_esb", - sizeof (dblk_t), DBLK_CACHE_ALIGN, - dblk_esb_constructor, dblk_destructor, NULL, - (void *) sizeof (dblk_t), NULL, dblk_kmem_flags); + sizeof (dblk_t), DBLK_CACHE_ALIGN, + dblk_esb_constructor, dblk_destructor, NULL, + (void *) sizeof (dblk_t), NULL, dblk_kmem_flags); fthdr_cache = kmem_cache_create("streams_fthdr", - sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0); + sizeof (fthdr_t), 32, NULL, NULL, NULL, NULL, NULL, 0); ftblk_cache = kmem_cache_create("streams_ftblk", - sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0); + sizeof (ftblk_t), 32, NULL, NULL, NULL, NULL, NULL, 0); /* Initialize Multidata caches */ mmd_init(); @@ -545,8 +545,8 @@ dbp->db_struioflag = 0; dbp->db_struioun.cksum.flags = 0; - /* and the COOKED flag */ - dbp->db_flags &= ~DBLK_COOKED; + /* and the COOKED and/or UIOA flag(s) */ + dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA); kmem_cache_free(dbp->db_cache, dbp); } @@ -739,7 +739,7 @@ */ if (!str_ftnever) { mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), - frp, dblk_lastfree_desb, KM_NOSLEEP); + frp, dblk_lastfree_desb, KM_NOSLEEP); if (mp != NULL) STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size); @@ -857,7 +857,7 @@ (void) sprintf(buffer, "%s_dblk_cache", name); bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t), DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor, - NULL, (void *)bcp, NULL, 0); + NULL, (void *)bcp, NULL, 0); return (bcp); } @@ -1584,7 +1584,7 @@ */ if ((save_bp != mp) && - (save_bp->b_wptr == save_bp->b_rptr)) { + (save_bp->b_wptr == save_bp->b_rptr)) { bcont = save_bp->b_cont; freeb(save_bp); prev_bp->b_cont = bcont; @@ -2129,8 +2129,8 @@ nmp = mp->b_next; mp->b_next = mp->b_prev = NULL; if ((mp->b_band == 0) && - ((flag == FLUSHALL) || - datamsg(mp->b_datap->db_type))) + ((flag == FLUSHALL) || + datamsg(mp->b_datap->db_type))) freemsg(mp); else (void) putq(q, mp); @@ -2242,7 +2242,7 @@ q->q_flag |= QWANTW; mutex_exit(QLOCK(q)); TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, - "bcanput:%p %X %d", q, pri, 0); + "bcanput:%p %X %d", q, pri, 0); return (0); } } else { /* pri != 0 */ @@ -2252,7 +2252,7 @@ */ mutex_exit(QLOCK(q)); TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, - "bcanput:%p %X %d", q, pri, 1); + "bcanput:%p %X %d", q, pri, 1); return (1); } qbp = q->q_bandp; @@ -2262,13 +2262,13 @@ qbp->qb_flag |= QB_WANTW; mutex_exit(QLOCK(q)); TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, - "bcanput:%p %X %d", q, pri, 0); + "bcanput:%p %X %d", q, pri, 0); return (0); } } mutex_exit(QLOCK(q)); TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, - "bcanput:%p %X %d", q, pri, 1); + "bcanput:%p %X %d", q, pri, 1); return (1); } @@ -2847,7 +2847,7 @@ mblk_t *bp; if ((datamsg(type) && (type != M_DELAY)) || - ((bp = allocb_tryhard(1)) == NULL)) + ((bp = allocb_tryhard(1)) == NULL)) return (0); bp->b_datap->db_type = (unsigned char)type; @@ -2864,7 +2864,7 @@ mblk_t *bp; if ((datamsg(type) && (type != M_DELAY)) || - ((bp = allocb_tryhard(0)) == NULL)) + ((bp = allocb_tryhard(0)) == NULL)) return (0); bp->b_datap->db_type = (unsigned char)type;
--- a/usr/src/uts/common/os/move.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/os/move.c Fri May 23 20:14:10 2008 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,6 +44,16 @@ #include <sys/systm.h> #include <sys/uio.h> #include <sys/errno.h> +#include <sys/vmsystm.h> +#include <sys/cmn_err.h> +#include <vm/as.h> +#include <vm/page.h> + +#include <sys/dcopy.h> + +int64_t uioa_maxpoll = -1; /* <0 = noblock, 0 = block, >0 = block after */ +#define UIO_DCOPY_CHANNEL 0 +#define UIO_DCOPY_CMD 1 /* * Move "n" bytes at byte address "p"; "rw" indicates the direction @@ -277,3 +286,386 @@ duio->uio_iov = diov; return (0); } + +/* + * Shadow state for checking if a platform has hardware asynchronous + * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine, + * + * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls + * into dcopy to register and uioa_dcopy_disable() when the device calls + * into dcopy to unregister. + */ +uioasync_t uioasync = {B_FALSE, 1024}; + +void +uioa_dcopy_enable() +{ + uioasync.enabled = B_TRUE; +} + +void +uioa_dcopy_disable() +{ + uioasync.enabled = B_FALSE; +} + +/* + * Schedule an asynchronous move of "n" bytes at byte address "p", + * "rw" indicates the direction of the move, I/O parameters and + * async state are provided in "uioa" which is update to reflect + * the data which is to be moved. + * + * Returns 0 on success or a non-zero errno on failure. + * + * Note, while the uioasync APIs are general purpose in design + * the current implementation is Intel I/OAT specific. + */ +int +uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa) +{ + int soff, doff; + uint64_t pa; + int cnt; + iovec_t *iov; + dcopy_handle_t channel; + dcopy_cmd_t cmd; + int ret = 0; + int dcopy_flags; + + if (!(uioa->uioa_state & UIOA_ENABLED)) { + /* The uioa_t isn't enabled */ + return (ENXIO); + } + + if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) { + /* Only support to user-land from kernel */ + return (ENOTSUP); + } + + + channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL]; + cmd = uioa->uioa_hwst[UIO_DCOPY_CMD]; + dcopy_flags = DCOPY_NOSLEEP; + + /* + * While source bytes and destination bytes. + */ + while (n > 0 && uioa->uio_resid > 0) { + iov = uioa->uio_iov; + if (iov->iov_len == 0l) { + uioa->uio_iov++; + uioa->uio_iovcnt--; + uioa->uioa_lcur++; + uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp; + continue; + } + /* + * While source bytes schedule an async + * dma for destination page by page. + */ + while (n > 0) { + /* Addr offset in page src/dst */ + soff = (uintptr_t)p & PAGEOFFSET; + doff = (uintptr_t)iov->iov_base & PAGEOFFSET; + /* Min copy count src and dst and page sized */ + cnt = MIN(n, iov->iov_len); + cnt = MIN(cnt, PAGESIZE - soff); + cnt = MIN(cnt, PAGESIZE - doff); + /* XXX if next page(s) contiguous could use multipage */ + + /* + * if we have an old command, we want to link all + * other commands to the next command we alloced so + * we only need to track the last command but can + * still free them all. + */ + if (cmd != NULL) { + dcopy_flags |= DCOPY_ALLOC_LINK; + } + ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd); + if (ret != DCOPY_SUCCESS) { + /* Error of some sort */ + return (EIO); + } + uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd; + + ASSERT(cmd->dp_version == DCOPY_CMD_V0); + if (uioa_maxpoll >= 0) { + /* Blocking (>0 may be) used in uioafini() */ + cmd->dp_flags = DCOPY_CMD_INTR; + } else { + /* Non blocking uioafini() so no intr */ + cmd->dp_flags = DCOPY_CMD_NOFLAGS; + } + cmd->dp_cmd = DCOPY_CMD_COPY; + pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p)); + cmd->dp.copy.cc_source = pa + soff; + if (uioa->uioa_lcur->uioa_pfncnt == 0) { + /* Have a (page_t **) */ + pa = ptob((uint64_t)( + *(page_t **)uioa->uioa_lppp)->p_pagenum); + } else { + /* Have a (pfn_t *) */ + pa = ptob((uint64_t)( + *(pfn_t *)uioa->uioa_lppp)); + } + cmd->dp.copy.cc_dest = pa + doff; + cmd->dp.copy.cc_size = cnt; + ret = dcopy_cmd_post(cmd); + if (ret != DCOPY_SUCCESS) { + /* Error of some sort */ + return (EIO); + } + ret = 0; + + /* If UIOA_POLL not set, set it */ + if (!(uioa->uioa_state & UIOA_POLL)) + uioa->uioa_state |= UIOA_POLL; + + /* Update iov, uio, and local pointers/counters */ + iov->iov_base += cnt; + iov->iov_len -= cnt; + uioa->uio_resid -= cnt; + uioa->uio_loffset += cnt; + p = (caddr_t)p + cnt; + n -= cnt; + + /* End of iovec? */ + if (iov->iov_len == 0) { + /* Yup, next iovec */ + break; + } + + /* Next dst addr page? */ + if (doff + cnt == PAGESIZE) { + /* Yup, next page_t */ + uioa->uioa_lppp++; + } + } + } + + return (ret); +} + +/* + * Initialize a uioa_t for a given uio_t for the current user context, + * copy the common uio_t to the uioa_t, walk the shared iovec_t and + * lock down the user-land page(s) containing iovec_t data, then mapin + * user-land pages using segkpm. + */ +int +uioainit(uio_t *uiop, uioa_t *uioap) +{ + caddr_t addr; + page_t **pages; + int off; + int len; + proc_t *procp = ttoproc(curthread); + struct as *as = procp->p_as; + iovec_t *iov = uiop->uio_iov; + int32_t iovcnt = uiop->uio_iovcnt; + uioa_page_t *locked = uioap->uioa_locked; + dcopy_handle_t channel; + int error; + + if (! (uioap->uioa_state & UIOA_ALLOC)) { + /* Can only init() a freshly allocated uioa_t */ + return (EINVAL); + } + + error = dcopy_alloc(DCOPY_NOSLEEP, &channel); + if (error == DCOPY_NORESOURCES) { + /* Turn off uioa */ + uioasync.enabled = B_FALSE; + return (ENODEV); + } + if (error != DCOPY_SUCCESS) { + /* Alloc failed */ + return (EIO); + } + + uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel; + uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; + + /* Indicate uioa_t (will be) initialized */ + uioap->uioa_state = UIOA_INIT; + + /* uio_t/uioa_t uio_t common struct copy */ + *((uio_t *)uioap) = *uiop; + + /* initialize *uiop->uio_iov */ + if (iovcnt > UIOA_IOV_MAX) { + /* Too big? */ + return (E2BIG); + } + uioap->uio_iov = iov; + uioap->uio_iovcnt = iovcnt; + + /* Mark the uioap as such */ + uioap->uio_extflg |= UIO_ASYNC; + + /* + * For each iovec_t, lock-down the page(s) backing the iovec_t + * and save the page_t list for phys addr use in uioamove(). + */ + iov = uiop->uio_iov; + iovcnt = uiop->uio_iovcnt; + while (iovcnt > 0) { + addr = iov->iov_base; + off = (uintptr_t)addr & PAGEOFFSET; + addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); + len = iov->iov_len + off; + + /* Lock down page(s) for the iov span */ + if ((error = as_pagelock(as, &pages, + iov->iov_base, iov->iov_len, S_WRITE)) != 0) { + /* Error */ + goto cleanup; + } + + if (pages == NULL) { + /* + * Need page_t list, really only need + * a pfn list so build one. + */ + pfn_t *pfnp; + int pcnt = len >> PAGESHIFT; + + if (off) + pcnt++; + if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp), + KM_NOSLEEP)) == NULL) { + error = ENOMEM; + goto cleanup; + } + locked->uioa_ppp = (void **)pfnp; + locked->uioa_pfncnt = pcnt; + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + while (pcnt-- > 0) { + *pfnp++ = hat_getpfnum(as->a_hat, addr); + addr += PAGESIZE; + } + AS_LOCK_EXIT(as, &as->a_lock); + } else { + /* Have a page_t list, save it */ + locked->uioa_ppp = (void **)pages; + locked->uioa_pfncnt = 0; + } + /* Save for as_pageunlock() in uioafini() */ + locked->uioa_base = iov->iov_base; + locked->uioa_len = iov->iov_len; + locked++; + + /* Next iovec_t */ + iov++; + iovcnt--; + } + /* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */ + uioap->uioa_lcur = uioap->uioa_locked; + uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp; + return (0); + +cleanup: + /* Unlock any previously locked page_t(s) */ + while (locked > uioap->uioa_locked) { + locked--; + as_pageunlock(as, (page_t **)locked->uioa_ppp, + locked->uioa_base, locked->uioa_len, S_WRITE); + } + + /* Last indicate uioa_t still in alloc state */ + uioap->uioa_state = UIOA_ALLOC; + + return (error); +} + +/* + * Finish processing of a uioa_t by cleanup any pending "uioap" actions. + */ +int +uioafini(uio_t *uiop, uioa_t *uioap) +{ + int32_t iovcnt = uiop->uio_iovcnt; + uioa_page_t *locked = uioap->uioa_locked; + struct as *as = ttoproc(curthread)->p_as; + dcopy_handle_t channel; + dcopy_cmd_t cmd; + int ret = 0; + + ASSERT(uioap->uio_extflg & UIO_ASYNC); + + if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) { + /* Must be an active uioa_t */ + return (EINVAL); + } + + channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL]; + cmd = uioap->uioa_hwst[UIO_DCOPY_CMD]; + + /* XXX - why do we get cmd == NULL sometimes? */ + if (cmd != NULL) { + if (uioap->uioa_state & UIOA_POLL) { + /* Wait for last dcopy() to finish */ + int64_t poll = 1; + int poll_flag = DCOPY_POLL_NOFLAGS; + + do { + if (uioa_maxpoll == 0 || + (uioa_maxpoll > 0 && + poll >= uioa_maxpoll)) { + /* Always block or after maxpoll */ + poll_flag = DCOPY_POLL_BLOCK; + } else { + /* No block, poll */ + poll++; + } + ret = dcopy_cmd_poll(cmd, poll_flag); + } while (ret == DCOPY_PENDING); + + if (ret == DCOPY_COMPLETED) { + /* Poll/block succeeded */ + ret = 0; + } else { + /* Poll/block failed */ + ret = EIO; + } + } + dcopy_cmd_free(&cmd); + } + + dcopy_free(&channel); + + /* Unlock all page(s) iovec_t by iovec_t */ + while (iovcnt-- > 0) { + page_t **pages; + + if (locked->uioa_pfncnt == 0) { + /* A as_pagelock() returned (page_t **) */ + pages = (page_t **)locked->uioa_ppp; + } else { + /* Our pfn_t array */ + pages = NULL; + kmem_free(locked->uioa_ppp, locked->uioa_pfncnt * + sizeof (pfn_t *)); + } + as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len, + S_WRITE); + + locked++; + } + /* uioa_t->uio_t common struct copy */ + *uiop = *((uio_t *)uioap); + + /* + * Last, reset uioa state to alloc. + * + * Note, we only initialize the state here, all other members + * will be initialized in a subsequent uioainit(). + */ + uioap->uioa_state = UIOA_ALLOC; + + uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL; + uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL; + + return (ret); +}
--- a/usr/src/uts/common/os/streamio.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/os/streamio.c Fri May 23 20:14:10 2008 -0700 @@ -144,6 +144,7 @@ static void strcleanall(struct vnode *); static int strwsrv(queue_t *); static int strdocmd(struct stdata *, struct strcmd *, cred_t *); +static void struioainit(queue_t *, sodirect_t *, uio_t *); /* * qinit and module_info structures for stream head read and write queues @@ -189,6 +190,11 @@ * mirror this. * 4. ioctl monitor: sd_lock is gotten to ensure that only one * thread is doing an ioctl at a time. + * + * Note, for sodirect case 3. is extended to (*sodirect_t.sod_enqueue)() + * call-back from below, further the sodirect support is for code paths + * called via kstgetmsg(), all other code paths ASSERT() that sodirect + * uioa generated mblk_t's (i.e. DBLK_UIOA) aren't processed. */ static int @@ -397,6 +403,7 @@ stp->sd_qn_minpsz = 0; stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */ stp->sd_maxblk = INFPSZ; + stp->sd_sodirect = NULL; qp->q_ptr = _WR(qp)->q_ptr = stp; STREAM(qp) = STREAM(_WR(qp)) = stp; vp->v_stream = stp; @@ -970,11 +977,14 @@ * It is the callers responsibility to call qbackenable after * it is finished with the message. The caller should not call * qbackenable until after any putback calls to avoid spurious backenabling. + * + * Also, handle uioa initialization and process any DBLK_UIOA flaged messages. */ mblk_t * strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first, int *errorp) { + sodirect_t *sodp = stp->sd_sodirect; mblk_t *bp; int error; @@ -1063,7 +1073,67 @@ } *errorp = 0; ASSERT(MUTEX_HELD(&stp->sd_lock)); - return (getq_noenab(q)); + if (sodp != NULL && (sodp->sod_state & SOD_ENABLED) && + (sodp->sod_uioa.uioa_state & UIOA_INIT)) { + /* + * First kstrgetmsg() call for an uioa_t so if any + * queued mblk_t's need to consume them before uioa + * from below can occur. + */ + sodp->sod_uioa.uioa_state &= UIOA_CLR; + sodp->sod_uioa.uioa_state |= UIOA_ENABLED; + if (q->q_first != NULL) { + struioainit(q, sodp, uiop); + } + } + + bp = getq_noenab(q); + + if (bp != NULL && (bp->b_datap->db_flags & DBLK_UIOA)) { + /* + * A uioa flaged mblk_t chain, already uio processed, + * add it to the sodirect uioa pending free list. + * + * Note, a b_cont chain headed by a DBLK_UIOA enable + * mblk_t must have all mblk_t(s) DBLK_UIOA enabled. + */ + mblk_t *bpt = sodp->sod_uioaft; + + ASSERT(sodp != NULL); + + /* + * Add first mblk_t of "bp" chain to current sodirect uioa + * free list tail mblk_t, if any, else empty list so new head. + */ + if (bpt == NULL) + sodp->sod_uioafh = bp; + else + bpt->b_cont = bp; + + /* + * Walk mblk_t "bp" chain to find tail and adjust rptr of + * each to reflect that uioamove() has consumed all data. + */ + bpt = bp; + for (;;) { + bpt->b_rptr = bpt->b_wptr; + if (bpt->b_cont == NULL) + break; + bpt = bpt->b_cont; + + ASSERT(bpt->b_datap->db_flags & DBLK_UIOA); + } + /* New sodirect uioa free list tail */ + sodp->sod_uioaft = bpt; + + /* Only 1 strget() with data returned per uioa_t */ + if (sodp->sod_uioa.uioa_state & UIOA_ENABLED) { + sodp->sod_uioa.uioa_state &= UIOA_CLR; + sodp->sod_uioa.uioa_state |= UIOA_FINI; + } + } + + return (bp); } /* @@ -1083,6 +1153,8 @@ ASSERT(bp->b_wptr >= bp->b_rptr); do { + ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); + if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) { ASSERT(n > 0); @@ -1229,8 +1301,10 @@ } first = 0; } + ASSERT(MUTEX_HELD(&stp->sd_lock)); ASSERT(bp); + ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); pri = bp->b_band; /* * Extract any mark information. If the message is not @@ -6650,6 +6724,7 @@ bp = strget(stp, q, uiop, first, &error); ASSERT(MUTEX_HELD(&stp->sd_lock)); if (bp != NULL) { + ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); if (bp->b_datap->db_type == M_SIG) { strsignal_nolock(stp, *bp->b_rptr, (int32_t)bp->b_band); @@ -7288,7 +7363,7 @@ "kstrgetmsg calls strwaitq:%p, %p", vp, uiop); if (((error = strwaitq(stp, waitflag, (ssize_t)0, - fmode, timout, &done)) != 0) || done) { + fmode, timout, &done))) != 0 || done) { TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE, "kstrgetmsg error or done:%p, %p", vp, uiop); @@ -7360,6 +7435,8 @@ * there is indeed a shortage of memory. dupmsg() may fail * if db_ref in any of the messages reaches its limit. */ + + ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) { /* * Restore the state of the stream head since we @@ -7418,6 +7495,7 @@ } } + ASSERT(!(bp->b_datap->db_flags & DBLK_UIOA)); bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp, NULL, NULL, NULL, NULL); @@ -7468,6 +7546,8 @@ */ if (uiop == NULL) { /* Append data to tail of mctlp */ + + ASSERT(bp == NULL || !(bp->b_datap->db_flags & DBLK_UIOA)); if (mctlp != NULL) { mblk_t **mpp = mctlp; @@ -7476,6 +7556,14 @@ *mpp = bp; bp = NULL; } + } else if (bp && (bp->b_datap->db_flags & DBLK_UIOA)) { + /* + * A uioa mblk_t chain, as uio processing has already + * been done we simple skip over processing. + */ + bp = NULL; + pr = 0; + } else if (uiop->uio_resid >= 0 && bp) { size_t oldresid = uiop->uio_resid; @@ -7564,6 +7652,8 @@ * again since the flush logic in strrput_nondata() * may have cleared it while we had sd_lock dropped. */ + + ASSERT(!(savemp->b_datap->db_flags & DBLK_UIOA)); if (type >= QPCTL) { ASSERT(type == M_PCPROTO); if (queclass(savemp) < QPCTL) @@ -8635,3 +8725,85 @@ } return (B_FALSE); } + +/* + * Called on the first strget() of a sodirect/uioa enabled streamhead, + * if any mblk_t(s) enqueued they must first be uioamove()d before uioa + * can be enabled for the underlying transport's use. + */ +void +struioainit(queue_t *q, sodirect_t *sodp, uio_t *uiop) +{ + uioa_t *uioap = (uioa_t *)uiop; + mblk_t *bp = q->q_first; + mblk_t *lbp = NULL; + mblk_t *nbp, *wbp; + int len; + int error; + + ASSERT(MUTEX_HELD(sodp->sod_lock)); + ASSERT(&sodp->sod_uioa == uioap); + + /* + * Walk the b_next/b_prev doubly linked list of b_cont chain(s) + * and schedule any M_DATA mblk_t's for uio asynchronous move. + */ + do { + /* Next mblk_t chain */ + nbp = bp->b_next; + /* Walk the chain */ + wbp = bp; + do { + if (wbp->b_datap->db_type != M_DATA) { + /* Not M_DATA, no more uioa */ + goto nouioa; + } + if ((len = wbp->b_wptr - wbp->b_rptr) > 0) { + /* Have a M_DATA mblk_t with data */ + if (len > uioap->uio_resid) { + /* Not enough uio sapce */ + goto nouioa; + } + error = uioamove(wbp->b_rptr, len, + UIO_READ, uioap); + if (!error) { + /* Scheduled, mark dblk_t as such */ + wbp->b_datap->db_flags |= DBLK_UIOA; + } else { + /* Error of some sort, no more uioa */ + uioap->uioa_state &= UIOA_CLR; + uioap->uioa_state |= UIOA_FINI; + return; + } + } + /* Save last wbp processed */ + lbp = wbp; + } while ((wbp = wbp->b_cont) != NULL); + } while ((bp = nbp) != NULL); + + return; + +nouioa: + /* No more uioa */ + uioap->uioa_state &= UIOA_CLR; + uioap->uioa_state |= UIOA_FINI; + + /* + * If we processed 1 or more mblk_t(s) then we need to split the + * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s) + * are in the current chain and the rest are in the following new + * chain. + */ + if (lbp != NULL) { + /* New end of current chain */ + lbp->b_cont = NULL; + + /* Insert new chain wbp after bp */ + if ((wbp->b_next = nbp) != NULL) + nbp->b_prev = wbp; + else + q->q_last = wbp; + wbp->b_prev = bp; + bp->b_next = wbp; + } +}
--- a/usr/src/uts/common/os/strsubr.c Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/os/strsubr.c Fri May 23 20:14:10 2008 -0700 @@ -2467,13 +2467,18 @@ /* * Private flag used by a transport module to indicate * to sockfs that it supports direct-access mode without - * having to go through STREAMS. - */ - if (devflag & _D_DIRECT) { + * having to go through STREAMS or the transport can use + * sodirect_t sharing to bypass STREAMS for receive-side + * M_DATA processing. + */ + if (devflag & (_D_DIRECT|_D_SODIRECT)) { /* Reject unless the module is fully-MT (no perimeter) */ if ((qflag & QMT_TYPEMASK) != QMTSAFE) goto bad; - qflag |= _QDIRECT; + if (devflag & _D_DIRECT) + qflag |= _QDIRECT; + if (devflag & _D_SODIRECT) + qflag |= _QSODIRECT; } *qflagp = qflag;
--- a/usr/src/uts/common/sys/Makefile Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/sys/Makefile Fri May 23 20:14:10 2008 -0700 @@ -487,6 +487,7 @@ socket_impl.h \ socketvar.h \ sockio.h \ + sodirect.h \ squeue.h \ squeue_impl.h \ srn.h \
--- a/usr/src/uts/common/sys/conf.h Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/sys/conf.h Fri May 23 20:14:10 2008 -0700 @@ -22,7 +22,7 @@ /* All Rights Reserved */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -221,6 +221,9 @@ #define D_OPEN_RETURNS_EINTR 0x100000 /* EINTR expected from open(9E) */ +#define _D_SODIRECT 0x200000 /* Private flag for transport modules used */ + /* to enable _QSODIRECT for a STREAMS Q */ + #endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ #ifdef __cplusplus
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/sys/dcopy.h Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,239 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DCOPY_H +#define _SYS_DCOPY_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> + +/* + * *** This interface is for private use by the IP stack only *** + */ + +/* Private dcopy/uioa interface for dcopy to enable/disable dcopy KAPI */ +extern void uioa_dcopy_enable(); +extern void uioa_dcopy_disable(); + +/* Function return status */ +#define DCOPY_FAILURE (-1) +#define DCOPY_SUCCESS (0) +#define DCOPY_NORESOURCES (1) /* _alloc & _cmd_alloc, _cmd_post only */ +#define DCOPY_PENDING (0x10) /* dcopy_poll(), dcopy_unregister() */ +#define DCOPY_COMPLETED (0x20) /* dcopy_poll() only */ + + +/* dq_version */ +#define DCOPY_QUERY_V0 0 + +typedef struct dcopy_query_s { + int dq_version; /* DCOPY_QUERY_V0 */ + uint_t dq_num_channels; /* number of dma channels */ +} dcopy_query_t; + +/* + * dcopy_query() + * query for the number of DMA engines usable in the system. + */ +void dcopy_query(dcopy_query_t *query); + + +typedef struct dcopy_channel_s *dcopy_handle_t; + +/* dcopy_alloc() and dcopy_cmd_alloc() common flags */ +#define DCOPY_SLEEP (0) +#define DCOPY_NOSLEEP (1 << 0) + +/* + * dcopy_alloc() + * Allocate a DMA channel which is used for posting DMA requests. Note: this + * does not give the caller exclusive access to the DMA engine. Commands + * posted to a channel will complete in order. + * flags - (DCOPY_SLEEP, DCOPY_NOSLEEP) + * returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES + */ +int dcopy_alloc(int flags, dcopy_handle_t *handle); + +/* + * dcopy_free() + * Free the DMA channel. The client can no longer use the handle to post or + * poll for status on posts which were previously done on this channel. + */ +void dcopy_free(dcopy_handle_t *handle); + +/* dq_version */ +#define DCOPY_QUERY_CHANNEL_V0 0 + +/* Per DMA channel info */ +typedef struct dcopy_query_channel_s { + int qc_version; /* DCOPY_QUERY_CHANNEL_V0 */ + + /* Does DMA channel support DCA */ + boolean_t qc_dca_supported; + + /* device id and device specific capabilities */ + uint64_t qc_id; + uint64_t qc_capabilities; + + /* + * DMA channel size. This may not be the same as the number of posts + * that the DMA channel can handle since a post may consume 1 or more + * entries. + */ + uint64_t qc_channel_size; + + /* DMA channel number within the device. Not unique across devices */ + uint64_t qc_chan_num; +} dcopy_query_channel_t; + +/* + * dcopy_query_channel() + * query DMA engines capabilities + */ +void dcopy_query_channel(dcopy_handle_t handle, dcopy_query_channel_t *query); + + +/* dp_version */ +#define DCOPY_CMD_V0 0 + +/* dp_cmd */ +#define DCOPY_CMD_COPY 0x1 + +/* dp_flags */ +/* + * DCOPY_CMD_QUEUE + * Hint to queue up the post but don't notify the DMA engine. This can be + * used as an optimization when multiple posts are going to be queued up and + * you only want notify the DMA engine after the last post. Note, this does + * not mean the DMA engine won't process the request since it could notice + * it anyway. + * DCOPY_CMD_NOSTAT + * Don't generate a status. If this flag is used, You cannot poll for + * completion status on this command. This can be a useful performance + * optimization if your posting multiple commands and just want to poll on + * the last command. + * DCOPY_CMD_DCA + * If DCA is supported, direct this and all future command data (until the + * next command with DCOPY_POST_DCA set) to the processor specified in + * dp_dca_id. This flag is ignored if DCA is not supported. + * DCOPY_CMD_INTR + * Generate an interrupt when command completes. This flag is required if + * the caller is going to call dcopy_cmd_poll(() with DCOPY_POLL_BLOCK set + * for this command. + */ +#define DCOPY_CMD_NOFLAGS (0) +#define DCOPY_CMD_QUEUE (1 << 0) +#define DCOPY_CMD_NOSTAT (1 << 1) +#define DCOPY_CMD_DCA (1 << 2) +#define DCOPY_CMD_INTR (1 << 3) + +typedef struct dcopy_cmd_copy_s { + uint64_t cc_source; /* Source physical address */ + uint64_t cc_dest; /* Destination physical address */ + size_t cc_size; +} dcopy_cmd_copy_t; + +typedef union dcopy_cmd_u { + dcopy_cmd_copy_t copy; +} dcopy_cmd_u_t; + +typedef struct dcopy_cmd_priv_s *dcopy_cmd_priv_t; + +struct dcopy_cmd_s { + uint_t dp_version; /* DCOPY_CMD_V0 */ + uint_t dp_flags; + uint64_t dp_cmd; + dcopy_cmd_u_t dp; + uint32_t dp_dca_id; + dcopy_cmd_priv_t dp_private; +}; +typedef struct dcopy_cmd_s *dcopy_cmd_t; + + +/* + * dcopy_cmd_alloc() specific flags + * DCOPY_ALLOC_LINK - when set, the caller passes in a previously alloced + * command in cmd. dcopy_cmd_alloc() will allocate a new command and + * link it to the old command. The caller can use this to build a + * chain of commands, keeping only the last cmd alloced. calling + * dcopy_cmd_free() with the last cmd alloced in the chain will free all of + * the commands in the chain. dcopy_cmd_post() and dcopy_cmd_poll() have + * no knowledge of a chain of commands. It's only used for alloc/free. + */ +#define DCOPY_ALLOC_LINK (1 << 16) + +/* + * dcopy_cmd_alloc() + * allocate a command. A command can be re-used after it completes. + * flags - (DCOPY_SLEEP || DCOPY_NOSLEEP), DCOPY_ALLOC_LINK + * returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES + */ +int dcopy_cmd_alloc(dcopy_handle_t handle, int flags, dcopy_cmd_t *cmd); + +/* + * dcopy_cmd_free() + * free the command. This call cannot be called after dcopy_free(). + */ +void dcopy_cmd_free(dcopy_cmd_t *cmd); + +/* + * dcopy_cmd_post() + * post a command (allocated from dcopy_cmd_alloc()) to the DMA channel + * returns => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_NORESOURCES + */ +int dcopy_cmd_post(dcopy_cmd_t cmd); + +/* dcopy_cmd_poll() flags */ +#define DCOPY_POLL_NOFLAGS (0) +#define DCOPY_POLL_BLOCK (1 << 0) + +/* + * dcopy_cmd_poll() + * poll on completion status of a previous post. This call cannot be called + * after dcopy_free(). + * + * if flags == DCOPY_POLL_NOFLAGS, return status can be DCOPY_FAILURE, + * DCOPY_PENDING, or DCOPY_COMPLETED. + * + * if flags & DCOPY_POLL_BLOCK, return status can be DCOPY_FAILURE or + * DCOPY_COMPLETED. DCOPY_POLL_BLOCK can only be set in base context. + * + * The command cannot be re-used or freed until the command has completed + * (e.g. DCOPY_FAILURE or DCOPY_COMPLETED). + */ +int dcopy_cmd_poll(dcopy_cmd_t cmd, int flags); + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DCOPY_H */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/sys/dcopy_device.h Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DCOPY_DEVICE_H +#define _SYS_DCOPY_DEVICE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/dcopy.h> + +/* + * private command state. Space for this structure should be allocated during + * (*cb_cmd_alloc). The DMA driver must set dp_private in dcopy_cmd_t to point + * to the memory it allocated. Other than pr_device_cmd_private, the DMA driver + * should not touch any of the fields in this structure. pr_device_cmd_private + * is a private pointer for the DMA engine to use. + */ +struct dcopy_cmd_priv_s { + /* + * we only init the state used to track a command which blocks when it + * actually blocks. pr_block_init tells us when we need to clean it + * up during a cmd_free. + */ + boolean_t pr_block_init; + + /* dcopy_poll blocking state */ + list_node_t pr_poll_list_node; + volatile boolean_t pr_wait; + kmutex_t pr_mutex; + kcondvar_t pr_cv; + + /* back pointer to the command */ + dcopy_cmd_t pr_cmd; + + /* shortcut to the channel we're on */ + struct dcopy_channel_s *pr_channel; + + /* DMA driver private pointer */ + void *pr_device_cmd_private; +}; + +/* cb_version */ +#define DCOPY_DEVICECB_V0 0 + +typedef struct dcopy_device_chaninfo_s { + uint_t di_chan_num; +} dcopy_device_chaninfo_t; + +typedef struct dcopy_device_cb_s { + int cb_version; + int cb_res1; + + /* allocate/free a DMA channel. See dcopy.h for return status */ + int (*cb_channel_alloc)(void *device_private, + dcopy_handle_t handle, int flags, uint_t size, + dcopy_query_channel_t *info, void *channel_private); + void (*cb_channel_free)(void *channel_private); + + /* allocate/free a command. See dcopy.h for return status */ + int (*cb_cmd_alloc)(void *channel_private, int flags, + dcopy_cmd_t *cmd); + void (*cb_cmd_free)(void *channel_private, dcopy_cmd_t *cmd); + + /* + * post a command/poll for command status. See dcopy.h for return + * status + */ + int (*cb_cmd_post)(void *channel_private, dcopy_cmd_t cmd); + int (*cb_cmd_poll)(void *channel_private, dcopy_cmd_t cmd); + + /* + * if dcopy_device_unregister() returns DCOPY_PENDING, dcopy will + * call this routine when all the channels are no longer being + * used and have been free'd up. e.g. it's safe for the DMA driver + * to detach. + * status = DCOPY_SUCCESS || DCOPY_FAILURE + */ + void (*cb_unregister_complete)(void *device_private, int status); +} dcopy_device_cb_t; + + +typedef struct dcopy_device_info_s { + dev_info_t *di_dip; + dcopy_device_cb_t *di_cb; /* must be a static array */ + uint_t di_num_dma; + uint_t di_maxxfer; + uint_t di_capabilities; + uint64_t di_id; +} dcopy_device_info_t; + +typedef struct dcopy_device_s *dcopy_device_handle_t; + +/* dcopy_device_notify() status */ +#define DCOPY_COMPLETION 0 + +/* + * dcopy_device_register() + * register the DMA device with dcopy. + * return status => DCOPY_FAILURE, DCOPY_SUCCESS + */ +int dcopy_device_register(void *device_private, dcopy_device_info_t *info, + dcopy_device_handle_t *handle); + +/* + * dcopy_device_unregister() + * try to unregister the DMA device with dcopy. If the DMA engines are + * still being used by upper layer modules, DCOPY_PENDING will be returned. + * return status => DCOPY_FAILURE, DCOPY_SUCCESS, DCOPY_PENDING + * if DCOPY_PENDING, (*cb_unregister_complete)() will be called when + * completed. + */ +int dcopy_device_unregister(dcopy_device_handle_t *handle); + +/* + * dcopy_device_channel_notify() + * Notify dcopy of an event. + * dcopy_handle_t handle => what was passed into (*cb_alloc)() + * status => DCOPY_COMPLETION + */ +void dcopy_device_channel_notify(dcopy_handle_t handle, int status); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DCOPY_DEVICE_H */
--- a/usr/src/uts/common/sys/socketvar.h Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/sys/socketvar.h Fri May 23 20:14:10 2008 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,14 +50,13 @@ #include <sys/file.h> #include <sys/param.h> #include <sys/zone.h> +#include <sys/sodirect.h> #include <inet/kssl/ksslapi.h> #ifdef __cplusplus extern "C" { #endif - - /* * Internal representation used for addresses. */ @@ -333,6 +332,9 @@ kssl_endpt_type_t so_kssl_type; /* is proxy/is proxied/none */ kssl_ent_t so_kssl_ent; /* SSL config entry */ kssl_ctx_t so_kssl_ctx; /* SSL session context */ + + /* != NULL for sodirect_t enabled socket */ + sodirect_t *so_direct; }; /* flags */ @@ -375,6 +377,7 @@ #define SS_MOREDATA 0x00100000 /* NCAfs: NCA has more data */ #define SS_DIRECT 0x00200000 /* transport is directly below */ +#define SS_SODIRECT 0x00400000 /* transport supports sodirect */ #define SS_LADDR_VALID 0x01000000 /* so_laddr valid for user */ #define SS_FADDR_VALID 0x02000000 /* so_faddr valid for user */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/sys/sodirect.h Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +#ifndef _SYS_SODIRECT_H +#define _SYS_SODIRECT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Sodirect ... + * + * Currently the sodirect_t uses the sockfs streamhead STREAMS Q directly, + * in the future when we have STREAMless sockets a sonode Q will have to + * be implemented however the sodirect KPI shouldn't need to change. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct sodirect_s { + uint32_t sod_state; /* State bits */ + uint32_t sod_want; /* Pending read byte count or 0 */ + queue_t *sod_q; /* Socket Q */ + int (*sod_enqueue)(); /* Call to enqueue an mblk_t */ + void (*sod_wakeup)(); /* Call to awkake a read()er, if any */ + mblk_t *sod_uioafh; /* To be freed list head, or NULL */ + mblk_t *sod_uioaft; /* To be freed list tail */ + kmutex_t *sod_lock; /* Lock needed to protect all members */ + uioa_t sod_uioa; /* Pending uio_t for uioa_t use */ +} sodirect_t; + +/* + * sod_state bits: + */ + +#define SOD_DISABLED 0 /* No more sodirect */ + +#define SOD_ENABLED 0x0001 /* sodirect_t enabled */ + +#define SOD_WAKE_NOT 0x0010 /* Wakeup not needed */ +#define SOD_WAKE_NEED 0x0020 /* Wakeup needed */ +#define SOD_WAKE_DONE 0x0040 /* Wakeup done */ +#define SOD_WAKE_CLR ~(SOD_WAKE_NOT|SOD_WAKE_NEED|SOD_WAKE_DONE) + +/* + * Usefull macros: + */ + +#define SOD_QSETBE(p) ((p)->sod_q->q_flag |= QWANTW) +#define SOD_QCLRBE(p) ((p)->sod_q->q_flag &= ~QWANTW) +#define SOD_QEMPTY(p) ((p)->sod_q->q_first == NULL) +#define SOD_QFULL(p) ((p)->sod_q->q_flag & QFULL) +#define SOD_QCNT(p) ((p)->sod_q->q_count) + +#define SOD_DISABLE(p) (p)->sod_state &= ~SOD_ENABLED + +#define SOD_QTOSODP(q) (q)->q_stream->sd_sodirect + + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SODIRECT_H */
--- a/usr/src/uts/common/sys/stream.h Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/sys/stream.h Fri May 23 20:14:10 2008 -0700 @@ -190,6 +190,8 @@ #define _QASSOCIATED 0x10000000 /* queue is associated with a device */ #define _QDIRECT 0x20000000 /* Private; transport module uses */ /* direct interface to/from sockfs */ +#define _QSODIRECT 0x40000000 /* Private, transport module shares */ + /* an sodirect_t with sockfs */ /* queue sqflags (protected by SQLOCK). */ #define Q_SQQUEUED 0x01 /* Queue is in the syncq list */ @@ -400,6 +402,7 @@ */ #define DBLK_REFMIN 0x01 /* min refcnt stored in low bit */ #define DBLK_COOKED 0x02 /* message has been processed once */ +#define DBLK_UIOA 0x04 /* uioamove() is pending */ /* * db_struioflag values:
--- a/usr/src/uts/common/sys/strsubr.h Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/sys/strsubr.h Fri May 23 20:14:10 2008 -0700 @@ -46,6 +46,7 @@ #include <sys/proc.h> #include <sys/netstack.h> #include <sys/modhash.h> +#include <sys/sodirect.h> #ifdef __cplusplus extern "C" { @@ -94,9 +95,8 @@ * sd_mark * sd_closetime * sd_wakeq - * sd_uiordq - * sd_uiowrq * sd_maxblk + * sd_sodirect * * The following fields are modified only by the allocator, which * has exclusive access to them at that time: @@ -245,6 +245,10 @@ uint_t sd_copyflag; /* copy-related flags */ zoneid_t sd_anchorzone; /* Allow removal from same zone only */ struct msgb *sd_cmdblk; /* reply from _I_CMD */ + /* + * Support for socket direct. + */ + sodirect_t *sd_sodirect; /* pointer to shared sodirect_t */ } stdata_t; /*
--- a/usr/src/uts/common/sys/uio.h Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/common/sys/uio.h Fri May 23 20:14:10 2008 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -101,6 +100,49 @@ ssize_t uio_resid; /* residual count */ } uio_t; +/* + * Extended uio_t uioa_t used for asynchronous uio. + * + * Note: UIOA_IOV_MAX is defined and used as it is in "fs/vncalls.c" + * as there isn't a formal definition of IOV_MAX for the kernel. + */ +#define UIOA_IOV_MAX 16 + +typedef struct uioa_page_s { /* locked uio_iov state */ + int uioa_pfncnt; /* count of pfn_t(s) in *uioa_ppp */ + void **uioa_ppp; /* page_t or pfn_t arrary */ + caddr_t uioa_base; /* address base */ + size_t uioa_len; /* span length */ +} uioa_page_t; + +typedef struct uioa_s { + iovec_t *uio_iov; /* pointer to array of iovecs */ + int uio_iovcnt; /* number of iovecs */ + lloff_t _uio_offset; /* file offset */ + uio_seg_t uio_segflg; /* address space (kernel or user) */ + uint16_t uio_fmode; /* file mode flags */ + uint16_t uio_extflg; /* extended flags */ + lloff_t _uio_limit; /* u-limit (maximum byte offset) */ + ssize_t uio_resid; /* residual count */ + /* + * uioa extended members. + */ + uint32_t uioa_state; /* state of asynch i/o */ + uioa_page_t *uioa_lcur; /* pointer into uioa_locked[] */ + void **uioa_lppp; /* pointer into lcur->uioa_ppp[] */ + void *uioa_hwst[4]; /* opaque hardware state */ + uioa_page_t uioa_locked[UIOA_IOV_MAX]; /* Per iov locked pages */ +} uioa_t; + +#define UIOA_ALLOC 0x0001 /* allocated but not yet initialized */ +#define UIOA_INIT 0x0002 /* initialized but not yet enabled */ +#define UIOA_ENABLED 0x0004 /* enabled, asynch i/o active */ +#define UIOA_FINI 0x0008 /* finished waiting for uioafini() */ + +#define UIOA_CLR (~0x000F) /* clear mutually exclusive bits */ + +#define UIOA_POLL 0x0010 /* need dcopy_poll() */ + #define uio_loffset _uio_offset._f #if !defined(_LP64) #define uio_offset _uio_offset._p._l @@ -127,10 +169,24 @@ * access, ie, access bypassing caches, should be used. Filesystems that * don't initialize this field could experience suboptimal performance due to * the random data the field contains. + * + * NOTE: This flag is also used by uioasync callers to pass an extended + * uio_t (uioa_t), to uioasync enabled consumers. Unlike above all + * consumers of a uioa_t require the uio_extflg to be initialized. */ #define UIO_COPY_DEFAULT 0x0000 /* no special options to copy */ #define UIO_COPY_CACHED 0x0001 /* copy should not bypass caches */ +#define UIO_ASYNC 0x0002 /* uio_t is really a uioa_t */ + +/* + * Global uioasync capability shadow state. + */ +typedef struct uioasync_s { + boolean_t enabled; /* Is uioasync enabled? */ + size_t mincnt; /* Minimum byte count for use of */ +} uioasync_t; + #endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ #if defined(_KERNEL) @@ -141,6 +197,11 @@ void uioskip(uio_t *, size_t); int uiodup(uio_t *, uio_t *, iovec_t *, int); +int uioamove(void *, size_t, enum uio_rw, uioa_t *); +int uioainit(uio_t *, uioa_t *); +int uioafini(uio_t *, uioa_t *); +extern uioasync_t uioasync; + #else /* defined(_KERNEL) */ #if defined(__STDC__)
--- a/usr/src/uts/i86pc/Makefile.files Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/i86pc/Makefile.files Fri May 23 20:14:10 2008 -0700 @@ -161,6 +161,7 @@ # GFX_PRIVATE_OBJS += gfx_private.o gfxp_pci.o gfxp_segmap.o \ gfxp_devmap.o gfxp_vgatext.o gfxp_vm.o vgasubr.o +IOAT_OBJS += ioat.o ioat_rs.o ioat_ioctl.o ioat_chan.o ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o PCI_E_MISC_OBJS += pcie.o pcie_fault.o PCI_E_NEXUS_OBJS += npe.o npe_misc.o
--- a/usr/src/uts/i86pc/Makefile.i86pc.shared Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/i86pc/Makefile.i86pc.shared Fri May 23 20:14:10 2008 -0700 @@ -257,6 +257,7 @@ DRV_KMODS += mc-amd DRV_KMODS += tzmon DRV_KMODS += acpi_drv +DRV_KMODS += ioat DRV_KMODS += cpudrv
--- a/usr/src/uts/i86pc/Makefile.rules Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/i86pc/Makefile.rules Fri May 23 20:14:10 2008 -0700 @@ -73,6 +73,10 @@ $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/ioat/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/mc/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -259,6 +263,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/acpi_drv/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/ioat/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/mc/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/ioat/ioat.c Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,665 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/kmem.h> +#include <sys/ddi.h> +#include <sys/stat.h> +#include <sys/sunddi.h> +#include <sys/file.h> +#include <sys/open.h> +#include <sys/modctl.h> +#include <sys/ddi_impldefs.h> +#include <sys/sysmacros.h> + +#include <sys/ioat.h> + +static int ioat_open(dev_t *devp, int flag, int otyp, cred_t *cred); +static int ioat_close(dev_t devp, int flag, int otyp, cred_t *cred); +static int ioat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd); +static int ioat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd); +static int ioat_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, + void **result); + +static struct cb_ops ioat_cb_ops = { + ioat_open, /* cb_open */ + ioat_close, /* cb_close */ + nodev, /* cb_strategy */ + nodev, /* cb_print */ + nodev, /* cb_dump */ + nodev, /* cb_read */ + nodev, /* cb_write */ + ioat_ioctl, /* cb_ioctl */ + nodev, /* cb_devmap */ + nodev, /* cb_mmap */ + nodev, /* cb_segmap */ + nochpoll, /* cb_chpoll */ + ddi_prop_op, /* cb_prop_op */ + NULL, /* cb_stream */ + D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */ + CB_REV +}; + +static struct dev_ops ioat_dev_ops = { + DEVO_REV, /* devo_rev */ + 0, /* devo_refcnt */ + ioat_getinfo, /* devo_getinfo */ + nulldev, /* devo_identify */ + nulldev, /* devo_probe */ + ioat_attach, /* devo_attach */ + ioat_detach, /* devo_detach */ + nodev, /* devo_reset */ + &ioat_cb_ops, /* devo_cb_ops */ + NULL, /* devo_bus_ops */ + NULL /* power */ +}; + +static struct modldrv ioat_modldrv = { + &mod_driverops, /* Type of module. This one is a driver */ + "ioat driver v%I%", /* Name of the module. */ + &ioat_dev_ops, /* driver ops */ +}; + +static struct modlinkage ioat_modlinkage = { + MODREV_1, + (void *) &ioat_modldrv, + NULL +}; + + +void *ioat_statep; + +static int ioat_chip_init(ioat_state_t *state); +static void ioat_chip_fini(ioat_state_t *state); +static int ioat_drv_init(ioat_state_t *state); +static void ioat_drv_fini(ioat_state_t *state); +static uint_t ioat_isr(caddr_t parm); +static void ioat_intr_enable(ioat_state_t *state); +static void ioat_intr_disable(ioat_state_t *state); +void ioat_detach_finish(ioat_state_t *state); + + +ddi_device_acc_attr_t ioat_acc_attr = { + DDI_DEVICE_ATTR_V0, /* devacc_attr_version */ + DDI_NEVERSWAP_ACC, /* devacc_attr_endian_flags */ + DDI_STORECACHING_OK_ACC, /* devacc_attr_dataorder */ + DDI_DEFAULT_ACC /* devacc_attr_access */ +}; + +/* dcopy callback interface */ +dcopy_device_cb_t ioat_cb = { + DCOPY_DEVICECB_V0, + 0, /* reserved */ + ioat_channel_alloc, + ioat_channel_free, + ioat_cmd_alloc, + ioat_cmd_free, + ioat_cmd_post, + ioat_cmd_poll, + ioat_unregister_complete +}; + +/* + * _init() + */ +int +_init(void) +{ + int e; + + e = ddi_soft_state_init(&ioat_statep, sizeof (ioat_state_t), 1); + if (e != 0) { + return (e); + } + + e = mod_install(&ioat_modlinkage); + if (e != 0) { + ddi_soft_state_fini(&ioat_statep); + return (e); + } + + return (0); +} + +/* + * _info() + */ +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&ioat_modlinkage, modinfop)); +} + +/* + * _fini() + */ +int +_fini(void) +{ + int e; + + e = mod_remove(&ioat_modlinkage); + if (e != 0) { + return (e); + } + + ddi_soft_state_fini(&ioat_statep); + + return (0); +} + +/* + * ioat_attach() + */ +static int +ioat_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + ioat_state_t *state; + int instance; + int e; + + + switch (cmd) { + case DDI_ATTACH: + break; + + case DDI_RESUME: + instance = ddi_get_instance(dip); + state = ddi_get_soft_state(ioat_statep, instance); + if (state == NULL) { + return (DDI_FAILURE); + } + e = ioat_channel_resume(state); + if (e != DDI_SUCCESS) { + return (DDI_FAILURE); + } + ioat_intr_enable(state); + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } + + instance = ddi_get_instance(dip); + e = ddi_soft_state_zalloc(ioat_statep, instance); + if (e != DDI_SUCCESS) { + return (DDI_FAILURE); + } + state = ddi_get_soft_state(ioat_statep, instance); + if (state == NULL) { + goto attachfail_get_soft_state; + } + + state->is_dip = dip; + state->is_instance = instance; + + /* setup the registers, save away some device info */ + e = ioat_chip_init(state); + if (e != DDI_SUCCESS) { + goto attachfail_chip_init; + } + + /* initialize driver state, must be after chip init */ + e = ioat_drv_init(state); + if (e != DDI_SUCCESS) { + goto attachfail_drv_init; + } + + /* create the minor node (for the ioctl) */ + e = ddi_create_minor_node(dip, "ioat", S_IFCHR, instance, DDI_PSEUDO, + 0); + if (e != DDI_SUCCESS) { + goto attachfail_minor_node; + } + + /* Enable device interrupts */ + ioat_intr_enable(state); + + /* Report that driver was loaded */ + ddi_report_dev(dip); + + /* register with dcopy */ + e = dcopy_device_register(state, &state->is_deviceinfo, + &state->is_device_handle); + if (e != DCOPY_SUCCESS) { + goto attachfail_register; + } + + return (DDI_SUCCESS); + +attachfail_register: + ioat_intr_disable(state); + ddi_remove_minor_node(dip, NULL); +attachfail_minor_node: + ioat_drv_fini(state); +attachfail_drv_init: + ioat_chip_fini(state); +attachfail_chip_init: +attachfail_get_soft_state: + (void) ddi_soft_state_free(ioat_statep, instance); + + return (DDI_FAILURE); +} + +/* + * ioat_detach() + */ +static int +ioat_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + ioat_state_t *state; + int instance; + int e; + + + instance = ddi_get_instance(dip); + state = ddi_get_soft_state(ioat_statep, instance); + if (state == NULL) { + return (DDI_FAILURE); + } + + switch (cmd) { + case DDI_DETACH: + break; + + case DDI_SUSPEND: + ioat_channel_suspend(state); + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } + + /* + * try to unregister from dcopy. Since this driver doesn't follow the + * traditional parent/child model, we may still be in use so we can't + * detach yet. + */ + e = dcopy_device_unregister(&state->is_device_handle); + if (e != DCOPY_SUCCESS) { + if (e == DCOPY_PENDING) { + cmn_err(CE_NOTE, "device busy, performing asynchronous" + " detach\n"); + } + return (DDI_FAILURE); + } + + ioat_detach_finish(state); + + return (DDI_SUCCESS); +} + +/* + * ioat_getinfo() + */ +/*ARGSUSED*/ +static int +ioat_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) +{ + ioat_state_t *state; + int instance; + dev_t dev; + int e; + + + dev = (dev_t)arg; + instance = getminor(dev); + + switch (cmd) { + case DDI_INFO_DEVT2DEVINFO: + state = ddi_get_soft_state(ioat_statep, instance); + if (state == NULL) { + return (DDI_FAILURE); + } + *result = (void *)state->is_dip; + e = DDI_SUCCESS; + break; + + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)(uintptr_t)instance; + e = DDI_SUCCESS; + break; + + default: + e = DDI_FAILURE; + break; + } + + return (e); +} + + +/* + * ioat_open() + */ +/*ARGSUSED*/ +static int +ioat_open(dev_t *devp, int flag, int otyp, cred_t *cred) +{ + ioat_state_t *state; + int instance; + + instance = getminor(*devp); + state = ddi_get_soft_state(ioat_statep, instance); + if (state == NULL) { + return (ENXIO); + } + + return (0); +} + + +/* + * ioat_close() + */ +/*ARGSUSED*/ +static int +ioat_close(dev_t devp, int flag, int otyp, cred_t *cred) +{ + return (0); +} + + +/* + * ioat_chip_init() + */ +static int +ioat_chip_init(ioat_state_t *state) +{ + ddi_device_acc_attr_t attr; + int e; + + + attr.devacc_attr_version = DDI_DEVICE_ATTR_V0; + attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; + attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC; + + e = ddi_regs_map_setup(state->is_dip, 1, (caddr_t *)&state->is_genregs, + 0, 0, &attr, &state->is_reg_handle); + if (e != DDI_SUCCESS) { + goto chipinitfail_regsmap; + } + + /* save away ioat chip info */ + state->is_num_channels = (uint_t)ddi_get8(state->is_reg_handle, + &state->is_genregs[IOAT_CHANCNT]); + state->is_maxxfer = (uint_t)ddi_get8(state->is_reg_handle, + &state->is_genregs[IOAT_XFERCAP]); + state->is_chanoff = (uintptr_t)ddi_get16(state->is_reg_handle, + (uint16_t *)&state->is_genregs[IOAT_PERPORT_OFF]); + state->is_cbver = (uint_t)ddi_get8(state->is_reg_handle, + &state->is_genregs[IOAT_CBVER]); + state->is_intrdelay = (uint_t)ddi_get16(state->is_reg_handle, + (uint16_t *)&state->is_genregs[IOAT_INTRDELAY]); + state->is_status = (uint_t)ddi_get16(state->is_reg_handle, + (uint16_t *)&state->is_genregs[IOAT_CSSTATUS]); + state->is_capabilities = (uint_t)ddi_get32(state->is_reg_handle, + (uint32_t *)&state->is_genregs[IOAT_DMACAPABILITY]); + + if (state->is_cbver & 0x10) { + state->is_ver = IOAT_CBv1; + } else if (state->is_cbver & 0x20) { + state->is_ver = IOAT_CBv2; + } else { + goto chipinitfail_version; + } + + return (DDI_SUCCESS); + +chipinitfail_version: + ddi_regs_map_free(&state->is_reg_handle); +chipinitfail_regsmap: + return (DDI_FAILURE); +} + + +/* + * ioat_chip_fini() + */ +static void +ioat_chip_fini(ioat_state_t *state) +{ + ddi_regs_map_free(&state->is_reg_handle); +} + + +/* + * ioat_drv_init() + */ +static int +ioat_drv_init(ioat_state_t *state) +{ + ddi_acc_handle_t handle; + int e; + + + mutex_init(&state->is_mutex, NULL, MUTEX_DRIVER, NULL); + + state->is_deviceinfo.di_dip = state->is_dip; + state->is_deviceinfo.di_num_dma = state->is_num_channels; + state->is_deviceinfo.di_maxxfer = state->is_maxxfer; + state->is_deviceinfo.di_capabilities = state->is_capabilities; + state->is_deviceinfo.di_cb = &ioat_cb; + + e = pci_config_setup(state->is_dip, &handle); + if (e != DDI_SUCCESS) { + goto drvinitfail_config_setup; + } + + /* read in Vendor ID */ + state->is_deviceinfo.di_id = (uint64_t)pci_config_get16(handle, 0); + state->is_deviceinfo.di_id = state->is_deviceinfo.di_id << 16; + + /* read in Device ID */ + state->is_deviceinfo.di_id |= (uint64_t)pci_config_get16(handle, 2); + state->is_deviceinfo.di_id = state->is_deviceinfo.di_id << 32; + + /* Add in chipset version */ + state->is_deviceinfo.di_id |= (uint64_t)state->is_cbver; + pci_config_teardown(&handle); + + e = ddi_intr_hilevel(state->is_dip, 0); + if (e != 0) { + cmn_err(CE_WARN, "hilevel interrupt not supported\n"); + goto drvinitfail_hilevel; + } + + /* we don't support MSIs for v2 yet */ + e = ddi_add_intr(state->is_dip, 0, NULL, NULL, ioat_isr, + (caddr_t)state); + if (e != DDI_SUCCESS) { + goto drvinitfail_add_intr; + } + + e = ddi_get_iblock_cookie(state->is_dip, 0, &state->is_iblock_cookie); + if (e != DDI_SUCCESS) { + goto drvinitfail_iblock_cookie; + } + + e = ioat_channel_init(state); + if (e != DDI_SUCCESS) { + goto drvinitfail_channel_init; + } + + return (DDI_SUCCESS); + +drvinitfail_channel_init: +drvinitfail_iblock_cookie: + ddi_remove_intr(state->is_dip, 0, state->is_iblock_cookie); +drvinitfail_add_intr: +drvinitfail_hilevel: +drvinitfail_config_setup: + mutex_destroy(&state->is_mutex); + + return (DDI_FAILURE); +} + + +/* + * ioat_drv_fini() + */ +static void +ioat_drv_fini(ioat_state_t *state) +{ + ioat_channel_fini(state); + ddi_remove_intr(state->is_dip, 0, state->is_iblock_cookie); + mutex_destroy(&state->is_mutex); +} + + +/* + * ioat_unregister_complete() + */ +void +ioat_unregister_complete(void *device_private, int status) +{ + ioat_state_t *state; + + + state = device_private; + + if (status != DCOPY_SUCCESS) { + cmn_err(CE_WARN, "asynchronous detach aborted\n"); + return; + } + + cmn_err(CE_CONT, "detach completing\n"); + ioat_detach_finish(state); +} + + +/* + * ioat_detach_finish() + */ +void +ioat_detach_finish(ioat_state_t *state) +{ + ioat_intr_disable(state); + ddi_remove_minor_node(state->is_dip, NULL); + ioat_drv_fini(state); + ioat_chip_fini(state); + (void) ddi_soft_state_free(ioat_statep, state->is_instance); +} + + +/* + * ioat_intr_enable() + */ +static void +ioat_intr_enable(ioat_state_t *state) +{ + uint32_t intr_status; + + + /* Clear any pending interrupts */ + intr_status = ddi_get32(state->is_reg_handle, + (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS]); + if (intr_status != 0) { + ddi_put32(state->is_reg_handle, + (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS], + intr_status); + } + + /* Enable interrupts on the device */ + ddi_put8(state->is_reg_handle, &state->is_genregs[IOAT_INTRCTL], + IOAT_INTRCTL_MASTER_EN); +} + + +/* + * ioat_intr_disable() + */ +static void +ioat_intr_disable(ioat_state_t *state) +{ + /* + * disable interrupts on the device. A read of the interrupt control + * register clears the enable bit. + */ + (void) ddi_get8(state->is_reg_handle, + &state->is_genregs[IOAT_INTRCTL]); +} + + +/* + * ioat_isr() + */ +static uint_t +ioat_isr(caddr_t parm) +{ + uint32_t intr_status; + ioat_state_t *state; + uint8_t intrctrl; + uint32_t chan; + uint_t r; + int i; + + state = (ioat_state_t *)parm; + + intrctrl = ddi_get8(state->is_reg_handle, + &state->is_genregs[IOAT_INTRCTL]); + /* master interrupt enable should always be set */ + ASSERT(intrctrl & IOAT_INTRCTL_MASTER_EN); + + /* If the interrupt status bit isn't set, it's not ours */ + if (!(intrctrl & IOAT_INTRCTL_INTR_STAT)) { + /* re-set master interrupt enable (since it clears on read) */ + ddi_put8(state->is_reg_handle, + &state->is_genregs[IOAT_INTRCTL], intrctrl); + return (DDI_INTR_UNCLAIMED); + } + + /* see which channels generated the interrupt */ + intr_status = ddi_get32(state->is_reg_handle, + (uint32_t *)&state->is_genregs[IOAT_ATTNSTATUS]); + + /* call the intr handler for the channels */ + r = DDI_INTR_UNCLAIMED; + chan = 1; + for (i = 0; i < state->is_num_channels; i++) { + if (intr_status & chan) { + ioat_channel_intr(&state->is_channel[i]); + r = DDI_INTR_CLAIMED; + } + chan = chan << 1; + } + + /* + * if interrupt status bit was set, there should have been an + * attention status bit set too. + */ + ASSERT(r == DDI_INTR_CLAIMED); + + /* re-set master interrupt enable (since it clears on read) */ + ddi_put8(state->is_reg_handle, &state->is_genregs[IOAT_INTRCTL], + intrctrl); + + return (r); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/ioat/ioat.conf Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,30 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +# +# force attach this driver to support misc/driver +ddi-forceattach=1; +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/ioat/ioat_chan.c Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,1319 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/kmem.h> +#include <sys/ddi.h> +#include <sys/stat.h> +#include <sys/sunddi.h> +#include <sys/file.h> +#include <sys/open.h> +#include <sys/modctl.h> +#include <sys/ddi_impldefs.h> +#include <sys/sysmacros.h> +#include <vm/hat.h> +#include <vm/as.h> +#include <sys/mach_mmu.h> +#ifdef __xpv +#include <sys/hypervisor.h> +#endif + +#include <sys/ioat.h> + + +extern ddi_device_acc_attr_t ioat_acc_attr; + +/* dma attr for the descriptor rings */ +ddi_dma_attr_t ioat_desc_dma_attr = { + DMA_ATTR_V0, /* dma_attr_version */ + 0x0, /* dma_attr_addr_lo */ + 0xffffffffffffffff, /* dma_attr_addr_hi */ + 0xffffffff, /* dma_attr_count_max */ + 0x1000, /* dma_attr_align */ + 0x1, /* dma_attr_burstsizes */ + 0x1, /* dma_attr_minxfer */ + 0xffffffff, /* dma_attr_maxxfer */ + 0xffffffff, /* dma_attr_seg */ + 0x1, /* dma_attr_sgllen */ + 0x1, /* dma_attr_granular */ + 0x0, /* dma_attr_flags */ +}; + +/* dma attr for the completion buffers */ +ddi_dma_attr_t ioat_cmpl_dma_attr = { + DMA_ATTR_V0, /* dma_attr_version */ + 0x0, /* dma_attr_addr_lo */ + 0xffffffffffffffff, /* dma_attr_addr_hi */ + 0xffffffff, /* dma_attr_count_max */ + 0x40, /* dma_attr_align */ + 0x1, /* dma_attr_burstsizes */ + 0x1, /* dma_attr_minxfer */ + 0xffffffff, /* dma_attr_maxxfer */ + 0xffffffff, /* dma_attr_seg */ + 0x1, /* dma_attr_sgllen */ + 0x1, /* dma_attr_granular */ + 0x0, /* dma_attr_flags */ +}; + +static int ioat_completion_alloc(ioat_channel_t channel); +static void ioat_completion_free(ioat_channel_t channel); +static void ioat_channel_start(ioat_channel_t channel); +static void ioat_channel_reset(ioat_channel_t channel); + +int ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt); +void ioat_ring_free(ioat_channel_t channel); +void ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *desc); +int ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring, + dcopy_cmd_t cmd); + +static void ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr, + uint64_t dest_addr, uint32_t size, uint32_t ctrl); +static void ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id); + + +/* + * ioat_channel_init() + */ +int +ioat_channel_init(ioat_state_t *state) +{ + int i; + + /* + * initialize each dma channel's state which doesn't change across + * channel alloc/free. + */ + state->is_chansize = sizeof (struct ioat_channel_s) * + state->is_num_channels; + state->is_channel = kmem_zalloc(state->is_chansize, KM_SLEEP); + for (i = 0; i < state->is_num_channels; i++) { + state->is_channel[i].ic_state = state; + state->is_channel[i].ic_regs = (uint8_t *) + ((uintptr_t)state->is_genregs + + (uintptr_t)(IOAT_CHANNELREG_OFFSET * (i + 1))); + } + + /* initial the allocator (from 0 to state->is_num_channels) */ + ioat_rs_init(state, 0, state->is_num_channels, &state->is_channel_rs); + + return (DDI_SUCCESS); +} + + +/* + * ioat_channel_fini() + */ +void +ioat_channel_fini(ioat_state_t *state) +{ + ioat_rs_fini(&state->is_channel_rs); + kmem_free(state->is_channel, state->is_chansize); +} + + +/* + * ioat_channel_alloc() + * NOTE: We intentionaly don't handle DCOPY_SLEEP (if no channels are + * available) + */ +/*ARGSUSED*/ +int +ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags, + uint_t size, dcopy_query_channel_t *info, void *channel_private) +{ +#define CHANSTRSIZE 20 + struct ioat_channel_s *channel; + char chanstr[CHANSTRSIZE]; + ioat_channel_t *chan; + ioat_state_t *state; + size_t cmd_size; + uint_t chan_num; + uint32_t estat; + int e; + + + state = (ioat_state_t *)device_private; + chan = (ioat_channel_t *)channel_private; + + /* allocate a H/W channel */ + e = ioat_rs_alloc(state->is_channel_rs, &chan_num); + if (e != DDI_SUCCESS) { + return (DCOPY_NORESOURCES); + } + + channel = &state->is_channel[chan_num]; + channel->ic_inuse = B_TRUE; + channel->ic_chan_num = chan_num; + channel->ic_ver = state->is_ver; + channel->ic_dca_active = B_FALSE; + channel->ic_channel_state = IOAT_CHANNEL_OK; + channel->ic_dcopy_handle = handle; + +#ifdef DEBUG + { + /* if we're cbv2, verify that the V2 compatibility bit is set */ + uint16_t reg; + if (channel->ic_ver == IOAT_CBv2) { + reg = ddi_get16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_CHAN_COMP]); + ASSERT(reg & 0x2); + } + } +#endif + + /* + * Configure DMA channel + * Channel In Use + * Error Interrupt Enable + * Any Error Abort Enable + * Error Completion Enable + */ + ddi_put16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C); + + /* check channel error register, clear any errors */ + estat = ddi_get32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]); + if (estat != 0) { +#ifdef DEBUG + cmn_err(CE_CONT, "cleared errors (0x%x) before channel (%d) " + "enable\n", estat, channel->ic_chan_num); +#endif + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], estat); + } + + /* allocate and initialize the descriptor buf */ + e = ioat_ring_alloc(channel, size); + if (e != DDI_SUCCESS) { + goto chinitfail_desc_alloc; + } + + /* allocate and initialize the completion space */ + e = ioat_completion_alloc(channel); + if (e != DDI_SUCCESS) { + goto chinitfail_completion_alloc; + } + + /* setup kmem_cache for commands */ + cmd_size = sizeof (struct dcopy_cmd_s) + + sizeof (struct dcopy_cmd_priv_s) + + sizeof (struct ioat_cmd_private_s); + (void) snprintf(chanstr, CHANSTRSIZE, "ioat%dchan%dcmd", + state->is_instance, channel->ic_chan_num); + channel->ic_cmd_cache = kmem_cache_create(chanstr, cmd_size, 64, + NULL, NULL, NULL, NULL, NULL, 0); + if (channel->ic_cmd_cache == NULL) { + goto chinitfail_kmem_cache; + } + + /* start-up the channel */ + ioat_channel_start(channel); + + /* fill in the channel info returned to dcopy */ + info->qc_version = DCOPY_QUERY_CHANNEL_V0; + info->qc_id = state->is_deviceinfo.di_id; + info->qc_capabilities = (uint64_t)state->is_capabilities; + info->qc_channel_size = (uint64_t)size; + info->qc_chan_num = (uint64_t)channel->ic_chan_num; + if (channel->ic_ver == IOAT_CBv1) { + info->qc_dca_supported = B_FALSE; + } else { + if (info->qc_capabilities & IOAT_DMACAP_DCA) { + info->qc_dca_supported = B_TRUE; + } else { + info->qc_dca_supported = B_FALSE; + } + } + + *chan = channel; + + return (DCOPY_SUCCESS); + +chinitfail_kmem_cache: + ioat_completion_free(channel); +chinitfail_completion_alloc: + ioat_ring_free(channel); +chinitfail_desc_alloc: + return (DCOPY_FAILURE); +} + + +/* + * ioat_channel_suspend() + */ +/*ARGSUSED*/ +void +ioat_channel_suspend(ioat_state_t *state) +{ + /* + * normally you would disable interrupts and reset the H/W here. But + * since the suspend framework doesn't know who is using us, it may + * not suspend their I/O before us. Since we won't actively be doing + * any DMA or interrupts unless someone asks us to, it's safe to not + * do anything here. + */ +} + + +/* + * ioat_channel_resume() + */ +int +ioat_channel_resume(ioat_state_t *state) +{ + ioat_channel_ring_t *ring; + ioat_channel_t channel; + uint32_t estat; + int i; + + + for (i = 0; i < state->is_num_channels; i++) { + channel = &state->is_channel[i]; + ring = channel->ic_ring; + + if (!channel->ic_inuse) { + continue; + } + + /* + * Configure DMA channel + * Channel In Use + * Error Interrupt Enable + * Any Error Abort Enable + * Error Completion Enable + */ + ddi_put16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C); + + /* check channel error register, clear any errors */ + estat = ddi_get32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]); + if (estat != 0) { +#ifdef DEBUG + cmn_err(CE_CONT, "cleared errors (0x%x) before channel" + " (%d) enable\n", estat, channel->ic_chan_num); +#endif + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], + estat); + } + + /* Re-initialize the ring */ + bzero(ring->cr_desc, channel->ic_desc_alloc_size); + /* write the physical address into the chain address register */ + if (channel->ic_ver == IOAT_CBv1) { + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], + (uint32_t)(ring->cr_phys_desc & 0xffffffff)); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], + (uint32_t)(ring->cr_phys_desc >> 32)); + } else { + ASSERT(channel->ic_ver == IOAT_CBv2); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], + (uint32_t)(ring->cr_phys_desc & 0xffffffff)); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], + (uint32_t)(ring->cr_phys_desc >> 32)); + } + + /* re-initialize the completion buffer */ + bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size); + /* write the phys addr into the completion address register */ + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], + (uint32_t)(channel->ic_phys_cmpl & 0xffffffff)); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], + (uint32_t)(channel->ic_phys_cmpl >> 32)); + + /* start-up the channel */ + ioat_channel_start(channel); + + } + + return (DDI_SUCCESS); +} + + +/* + * ioat_channel_free() + */ +void +ioat_channel_free(void *channel_private) +{ + struct ioat_channel_s *channel; + ioat_channel_t *chan; + ioat_state_t *state; + uint_t chan_num; + + + chan = (ioat_channel_t *)channel_private; + channel = *chan; + + state = channel->ic_state; + chan_num = channel->ic_chan_num; + + /* disable the interrupts */ + ddi_put16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x0); + + ioat_channel_reset(channel); + + /* cleanup command cache */ + kmem_cache_destroy(channel->ic_cmd_cache); + + /* clean-up/free-up the completion space and descriptors */ + ioat_completion_free(channel); + ioat_ring_free(channel); + + channel->ic_inuse = B_FALSE; + + /* free the H/W DMA engine */ + ioat_rs_free(state->is_channel_rs, chan_num); + + *chan = NULL; +} + + +/* + * ioat_channel_intr() + */ +void +ioat_channel_intr(ioat_channel_t channel) +{ + ioat_state_t *state; + uint16_t chanctrl; + uint32_t chanerr; + uint32_t status; + + + state = channel->ic_state; + + if (channel->ic_ver == IOAT_CBv1) { + status = ddi_get32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_STS_LO]); + } else { + ASSERT(channel->ic_ver == IOAT_CBv2); + status = ddi_get32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_STS_LO]); + } + + /* if that status isn't ACTIVE or IDLE, the channel has failed */ + if (status & IOAT_CHAN_STS_FAIL_MASK) { + chanerr = ddi_get32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]); + cmn_err(CE_WARN, "channel(%d) fatal failure! " + "chanstat_lo=0x%X; chanerr=0x%X\n", + channel->ic_chan_num, status, chanerr); + channel->ic_channel_state = IOAT_CHANNEL_IN_FAILURE; + ioat_channel_reset(channel); + + return; + } + + /* + * clear interrupt disable bit if set (it's a RW1C). Read it back to + * ensure the write completes. + */ + chanctrl = ddi_get16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]); + ddi_put16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], chanctrl); + (void) ddi_get16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]); + + /* tell dcopy we have seen a completion on this channel */ + dcopy_device_channel_notify(channel->ic_dcopy_handle, DCOPY_COMPLETION); +} + + +/* + * ioat_channel_start() + */ +void +ioat_channel_start(ioat_channel_t channel) +{ + ioat_chan_dma_desc_t desc; + + /* set the first descriptor up as a NULL descriptor */ + bzero(&desc, sizeof (desc)); + desc.dd_size = 0; + desc.dd_ctrl = IOAT_DESC_CTRL_OP_DMA | IOAT_DESC_DMACTRL_NULL | + IOAT_DESC_CTRL_CMPL; + desc.dd_next_desc = 0x0; + + /* setup the very first descriptor */ + ioat_ring_seed(channel, &desc); +} + + +/* + * ioat_channel_reset() + */ +void +ioat_channel_reset(ioat_channel_t channel) +{ + ioat_state_t *state; + + state = channel->ic_state; + + /* hit the reset bit */ + if (channel->ic_ver == IOAT_CBv1) { + ddi_put8(state->is_reg_handle, + &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x20); + } else { + ASSERT(channel->ic_ver == IOAT_CBv2); + ddi_put8(state->is_reg_handle, + &channel->ic_regs[IOAT_V2_CHAN_CMD], 0x20); + } +} + + +/* + * ioat_completion_alloc() + */ +int +ioat_completion_alloc(ioat_channel_t channel) +{ + ioat_state_t *state; + size_t real_length; + uint_t cookie_cnt; + int e; + + + state = channel->ic_state; + + /* + * allocate memory for the completion status, zero it out, and get + * the paddr. We'll allocate a physically contiguous cache line. + */ + e = ddi_dma_alloc_handle(state->is_dip, &ioat_cmpl_dma_attr, + DDI_DMA_SLEEP, NULL, &channel->ic_cmpl_dma_handle); + if (e != DDI_SUCCESS) { + goto cmplallocfail_alloc_handle; + } + channel->ic_cmpl_alloc_size = 64; + e = ddi_dma_mem_alloc(channel->ic_cmpl_dma_handle, + channel->ic_cmpl_alloc_size, &ioat_acc_attr, + DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, + (caddr_t *)&channel->ic_cmpl, &real_length, + &channel->ic_cmpl_handle); + if (e != DDI_SUCCESS) { + goto cmplallocfail_mem_alloc; + } + bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size); + e = ddi_dma_addr_bind_handle(channel->ic_cmpl_dma_handle, NULL, + (caddr_t)channel->ic_cmpl, channel->ic_cmpl_alloc_size, + DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, + &channel->ic_cmpl_cookie, &cookie_cnt); + if (e != DDI_SUCCESS) { + goto cmplallocfail_addr_bind; + } + ASSERT(cookie_cnt == 1); + ASSERT(channel->ic_cmpl_cookie.dmac_size == + channel->ic_cmpl_alloc_size); + channel->ic_phys_cmpl = channel->ic_cmpl_cookie.dmac_laddress; + + /* write the physical address into the completion address register */ + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], + (uint32_t)(channel->ic_phys_cmpl & 0xffffffff)); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], + (uint32_t)(channel->ic_phys_cmpl >> 32)); + + return (DDI_SUCCESS); + +cmplallocfail_addr_bind: + ddi_dma_mem_free(&channel->ic_desc_handle); +cmplallocfail_mem_alloc: + ddi_dma_free_handle(&channel->ic_desc_dma_handle); +cmplallocfail_alloc_handle: + return (DDI_FAILURE); +} + + +/* + * ioat_completion_free() + */ +void +ioat_completion_free(ioat_channel_t channel) +{ + ioat_state_t *state; + + state = channel->ic_state; + + /* reset the completion address register */ + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], 0x0); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], 0x0); + + /* unbind, then free up the memory, dma handle */ + (void) ddi_dma_unbind_handle(channel->ic_cmpl_dma_handle); + ddi_dma_mem_free(&channel->ic_cmpl_handle); + ddi_dma_free_handle(&channel->ic_cmpl_dma_handle); +} + +/* + * ioat_ring_alloc() + */ +int +ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt) +{ + ioat_channel_ring_t *ring; + ioat_state_t *state; + size_t real_length; + uint_t cookie_cnt; + int e; + + + state = channel->ic_state; + + ring = kmem_zalloc(sizeof (ioat_channel_ring_t), KM_SLEEP); + channel->ic_ring = ring; + ring->cr_chan = channel; + ring->cr_post_cnt = 0; + + mutex_init(&ring->cr_cmpl_mutex, NULL, MUTEX_DRIVER, + channel->ic_state->is_iblock_cookie); + mutex_init(&ring->cr_desc_mutex, NULL, MUTEX_DRIVER, + channel->ic_state->is_iblock_cookie); + + /* + * allocate memory for the ring, zero it out, and get the paddr. + * We'll allocate a physically contiguous chunck of memory which + * simplifies the completion logic. + */ + e = ddi_dma_alloc_handle(state->is_dip, &ioat_desc_dma_attr, + DDI_DMA_SLEEP, NULL, &channel->ic_desc_dma_handle); + if (e != DDI_SUCCESS) { + goto ringallocfail_alloc_handle; + } + /* + * allocate one extra descriptor so we can simplify the empty/full + * logic. Then round that number up to a whole multiple of 4. + */ + channel->ic_chan_desc_cnt = ((desc_cnt + 1) + 3) & ~0x3; + ring->cr_desc_last = channel->ic_chan_desc_cnt - 1; + channel->ic_desc_alloc_size = channel->ic_chan_desc_cnt * + sizeof (ioat_chan_desc_t); + e = ddi_dma_mem_alloc(channel->ic_desc_dma_handle, + channel->ic_desc_alloc_size, &ioat_acc_attr, + DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, + (caddr_t *)&ring->cr_desc, &real_length, &channel->ic_desc_handle); + if (e != DDI_SUCCESS) { + goto ringallocfail_mem_alloc; + } + bzero(ring->cr_desc, channel->ic_desc_alloc_size); + e = ddi_dma_addr_bind_handle(channel->ic_desc_dma_handle, NULL, + (caddr_t)ring->cr_desc, channel->ic_desc_alloc_size, + DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, + &channel->ic_desc_cookies, &cookie_cnt); + if (e != DDI_SUCCESS) { + goto ringallocfail_addr_bind; + } + ASSERT(cookie_cnt == 1); + ASSERT(channel->ic_desc_cookies.dmac_size == + channel->ic_desc_alloc_size); + ring->cr_phys_desc = channel->ic_desc_cookies.dmac_laddress; + + /* write the physical address into the chain address register */ + if (channel->ic_ver == IOAT_CBv1) { + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], + (uint32_t)(ring->cr_phys_desc & 0xffffffff)); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], + (uint32_t)(ring->cr_phys_desc >> 32)); + } else { + ASSERT(channel->ic_ver == IOAT_CBv2); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], + (uint32_t)(ring->cr_phys_desc & 0xffffffff)); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], + (uint32_t)(ring->cr_phys_desc >> 32)); + } + + return (DCOPY_SUCCESS); + +ringallocfail_addr_bind: + ddi_dma_mem_free(&channel->ic_desc_handle); +ringallocfail_mem_alloc: + ddi_dma_free_handle(&channel->ic_desc_dma_handle); +ringallocfail_alloc_handle: + mutex_destroy(&ring->cr_desc_mutex); + mutex_destroy(&ring->cr_cmpl_mutex); + kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t)); + + return (DCOPY_FAILURE); +} + + +/* + * ioat_ring_free() + */ +void +ioat_ring_free(ioat_channel_t channel) +{ + ioat_state_t *state; + + + state = channel->ic_state; + + /* reset the chain address register */ + if (channel->ic_ver == IOAT_CBv1) { + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], 0x0); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], 0x0); + } else { + ASSERT(channel->ic_ver == IOAT_CBv2); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], 0x0); + ddi_put32(state->is_reg_handle, + (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], 0x0); + } + + /* unbind, then free up the memory, dma handle */ + (void) ddi_dma_unbind_handle(channel->ic_desc_dma_handle); + ddi_dma_mem_free(&channel->ic_desc_handle); + ddi_dma_free_handle(&channel->ic_desc_dma_handle); + + mutex_destroy(&channel->ic_ring->cr_desc_mutex); + mutex_destroy(&channel->ic_ring->cr_cmpl_mutex); + kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t)); + +} + + +/* + * ioat_ring_seed() + * write the first descriptor in the ring. + */ +void +ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *in_desc) +{ + ioat_channel_ring_t *ring; + ioat_chan_dma_desc_t *desc; + ioat_chan_dma_desc_t *prev; + ioat_state_t *state; + + + state = channel->ic_state; + ring = channel->ic_ring; + + /* init the completion state */ + ring->cr_cmpl_gen = 0x0; + ring->cr_cmpl_last = 0x0; + + /* write in the descriptor and init the descriptor state */ + ring->cr_post_cnt++; + channel->ic_ring->cr_desc[0] = *(ioat_chan_desc_t *)in_desc; + ring->cr_desc_gen = 0; + ring->cr_desc_prev = 0; + ring->cr_desc_next = 1; + + if (channel->ic_ver == IOAT_CBv1) { + /* hit the start bit */ + ddi_put8(state->is_reg_handle, + &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x1); + } else { + /* + * if this is CBv2, link the descriptor to an empty + * descriptor + */ + ASSERT(ring->cr_chan->ic_ver == IOAT_CBv2); + desc = (ioat_chan_dma_desc_t *) + &ring->cr_desc[ring->cr_desc_next]; + prev = (ioat_chan_dma_desc_t *) + &ring->cr_desc[ring->cr_desc_prev]; + + desc->dd_ctrl = 0; + desc->dd_next_desc = 0x0; + + prev->dd_next_desc = ring->cr_phys_desc + + (ring->cr_desc_next << 6); + + ddi_put16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT], + (uint16_t)1); + } + +} + + +/* + * ioat_cmd_alloc() + */ +int +ioat_cmd_alloc(void *private, int flags, dcopy_cmd_t *cmd) +{ + ioat_cmd_private_t *priv; + ioat_channel_t channel; + dcopy_cmd_t oldcmd; + int kmflag; + + + channel = (ioat_channel_t)private; + + if (flags & DCOPY_NOSLEEP) { + kmflag = KM_NOSLEEP; + } else { + kmflag = KM_SLEEP; + } + + /* save the command passed incase DCOPY_ALLOC_LINK is set */ + oldcmd = *cmd; + + *cmd = kmem_cache_alloc(channel->ic_cmd_cache, kmflag); + if (*cmd == NULL) { + return (DCOPY_NORESOURCES); + } + + /* setup the dcopy and ioat private state pointers */ + (*cmd)->dp_version = DCOPY_CMD_V0; + (*cmd)->dp_cmd = 0; + (*cmd)->dp_private = (struct dcopy_cmd_priv_s *) + ((uintptr_t)(*cmd) + sizeof (struct dcopy_cmd_s)); + (*cmd)->dp_private->pr_device_cmd_private = + (struct ioat_cmd_private_s *)((uintptr_t)(*cmd)->dp_private + + sizeof (struct dcopy_cmd_priv_s)); + + /* + * if DCOPY_ALLOC_LINK is set, link the old command to the new one + * just allocated. + */ + priv = (*cmd)->dp_private->pr_device_cmd_private; + if (flags & DCOPY_ALLOC_LINK) { + priv->ip_next = oldcmd; + } else { + priv->ip_next = NULL; + } + + return (DCOPY_SUCCESS); +} + + +/* + * ioat_cmd_free() + */ +void +ioat_cmd_free(void *private, dcopy_cmd_t *cmdp) +{ + ioat_cmd_private_t *priv; + ioat_channel_t channel; + dcopy_cmd_t next; + dcopy_cmd_t cmd; + + + channel = (ioat_channel_t)private; + cmd = *(cmdp); + + /* + * free all the commands in the chain (see DCOPY_ALLOC_LINK in + * ioat_cmd_alloc() for more info). + */ + while (cmd != NULL) { + priv = cmd->dp_private->pr_device_cmd_private; + next = priv->ip_next; + kmem_cache_free(channel->ic_cmd_cache, cmd); + cmd = next; + } + *cmdp = NULL; +} + + +/* + * ioat_cmd_post() + */ +int +ioat_cmd_post(void *private, dcopy_cmd_t cmd) +{ + ioat_channel_ring_t *ring; + ioat_cmd_private_t *priv; + ioat_channel_t channel; + ioat_state_t *state; + uint64_t dest_paddr; + uint64_t src_paddr; + uint64_t dest_addr; + uint32_t dest_size; + uint64_t src_addr; + uint32_t src_size; + size_t xfer_size; + uint32_t ctrl; + size_t size; + int e; + + + channel = (ioat_channel_t)private; + priv = cmd->dp_private->pr_device_cmd_private; + + state = channel->ic_state; + ring = channel->ic_ring; + + mutex_enter(&ring->cr_desc_mutex); + + /* if the channel has had a fatal failure, return failure */ + if (channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) { + mutex_exit(&ring->cr_cmpl_mutex); + return (DCOPY_FAILURE); + } + + /* make sure we have space for the descriptors */ + e = ioat_ring_reserve(channel, ring, cmd); + if (e != DCOPY_SUCCESS) { + mutex_exit(&ring->cr_cmpl_mutex); + return (DCOPY_NORESOURCES); + } + + /* if we support DCA, and the DCA flag is set, post a DCA desc */ + if ((channel->ic_ver == IOAT_CBv2) && + (cmd->dp_flags & DCOPY_CMD_DCA)) { + ioat_cmd_post_dca(ring, cmd->dp_dca_id); + } + + /* + * the dma copy may have to be broken up into multiple descriptors + * since we can't cross a page boundary. + */ + ASSERT(cmd->dp_version == DCOPY_CMD_V0); + ASSERT(cmd->dp_cmd == DCOPY_CMD_COPY); + src_addr = cmd->dp.copy.cc_source; + dest_addr = cmd->dp.copy.cc_dest; + size = cmd->dp.copy.cc_size; + while (size > 0) { + src_paddr = pa_to_ma(src_addr); + dest_paddr = pa_to_ma(dest_addr); + + /* adjust for any offset into the page */ + if ((src_addr & PAGEOFFSET) == 0) { + src_size = PAGESIZE; + } else { + src_size = PAGESIZE - (src_addr & PAGEOFFSET); + } + if ((dest_addr & PAGEOFFSET) == 0) { + dest_size = PAGESIZE; + } else { + dest_size = PAGESIZE - (dest_addr & PAGEOFFSET); + } + + /* take the smallest of the three */ + xfer_size = MIN(src_size, dest_size); + xfer_size = MIN(xfer_size, size); + + /* + * if this is the last descriptor, and we are supposed to + * generate a completion, generate a completion. same logic + * for interrupt. + */ + ctrl = 0; + if (xfer_size == size) { + if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) { + ctrl |= IOAT_DESC_CTRL_CMPL; + } + if ((cmd->dp_flags & DCOPY_CMD_INTR)) { + ctrl |= IOAT_DESC_CTRL_INTR; + } + } + + ioat_cmd_post_copy(ring, src_paddr, dest_paddr, xfer_size, + ctrl); + + /* go to the next page */ + src_addr += xfer_size; + dest_addr += xfer_size; + size -= xfer_size; + } + + /* + * if we are going to create a completion, save away the state so we + * can poll on it. + */ + if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) { + priv->ip_generation = ring->cr_desc_gen_prev; + priv->ip_index = ring->cr_desc_prev; + } + + /* if queue not defined, tell the DMA engine about it */ + if (!(cmd->dp_flags & DCOPY_CMD_QUEUE)) { + if (channel->ic_ver == IOAT_CBv1) { + ddi_put8(state->is_reg_handle, + (uint8_t *)&channel->ic_regs[IOAT_V1_CHAN_CMD], + 0x2); + } else { + ASSERT(channel->ic_ver == IOAT_CBv2); + ddi_put16(state->is_reg_handle, + (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT], + (uint16_t)(ring->cr_post_cnt & 0xFFFF)); + } + } + + mutex_exit(&ring->cr_desc_mutex); + + return (DCOPY_SUCCESS); +} + + +/* + * ioat_cmd_post_dca() + */ +static void +ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id) +{ + ioat_chan_dca_desc_t *desc; + ioat_chan_dca_desc_t *prev; + ioat_channel_t channel; + + + channel = ring->cr_chan; + desc = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_next]; + prev = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_prev]; + + /* keep track of the number of descs posted for cbv2 */ + ring->cr_post_cnt++; + + /* + * post a context change desriptor. If dca has never been used on + * this channel, or if the id doesn't match the last id used on this + * channel, set CONTEXT_CHANGE bit and dca id, set dca state to active, + * and save away the id we're using. + */ + desc->dd_ctrl = IOAT_DESC_CTRL_OP_CNTX; + desc->dd_next_desc = 0x0; + if (!channel->ic_dca_active || (channel->ic_dca_current != dca_id)) { + channel->ic_dca_active = B_TRUE; + channel->ic_dca_current = dca_id; + desc->dd_ctrl |= IOAT_DESC_CTRL_CNTX_CHNG; + desc->dd_cntx = dca_id; + } + + /* Put the descriptors physical address in the previous descriptor */ + /*LINTED:E_TRUE_LOGICAL_EXPR*/ + ASSERT(sizeof (ioat_chan_dca_desc_t) == 64); + + /* sync the current desc */ + (void) ddi_dma_sync(channel->ic_desc_dma_handle, + ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV); + + /* update the previous desc and sync it too */ + prev->dd_next_desc = ring->cr_phys_desc + + (ring->cr_desc_next << 6); + (void) ddi_dma_sync(channel->ic_desc_dma_handle, + ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV); + + /* save the current desc_next and desc_last for the completion */ + ring->cr_desc_prev = ring->cr_desc_next; + ring->cr_desc_gen_prev = ring->cr_desc_gen; + + /* increment next/gen so it points to the next free desc */ + ring->cr_desc_next++; + if (ring->cr_desc_next > ring->cr_desc_last) { + ring->cr_desc_next = 0; + ring->cr_desc_gen++; + } + + /* + * if this is CBv2, link the descriptor to an empty descriptor. Since + * we always leave on desc empty to detect full, this works out. + */ + if (ring->cr_chan->ic_ver == IOAT_CBv2) { + desc = (ioat_chan_dca_desc_t *) + &ring->cr_desc[ring->cr_desc_next]; + prev = (ioat_chan_dca_desc_t *) + &ring->cr_desc[ring->cr_desc_prev]; + desc->dd_ctrl = 0; + desc->dd_next_desc = 0x0; + + prev->dd_next_desc = ring->cr_phys_desc + + (ring->cr_desc_next << 6); + } +} + + +/* + * ioat_cmd_post_copy() + * + */ +static void +ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr, + uint64_t dest_addr, uint32_t size, uint32_t ctrl) +{ + ioat_chan_dma_desc_t *desc; + ioat_chan_dma_desc_t *prev; + ioat_channel_t channel; + + + channel = ring->cr_chan; + desc = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_next]; + prev = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev]; + + /* keep track of the number of descs posted for cbv2 */ + ring->cr_post_cnt++; + + /* write in the DMA desc */ + desc->dd_ctrl = IOAT_DESC_CTRL_OP_DMA | ctrl; + desc->dd_size = size; + desc->dd_src_paddr = src_addr; + desc->dd_dest_paddr = dest_addr; + desc->dd_next_desc = 0x0; + + /* Put the descriptors physical address in the previous descriptor */ + /*LINTED:E_TRUE_LOGICAL_EXPR*/ + ASSERT(sizeof (ioat_chan_dma_desc_t) == 64); + + /* sync the current desc */ + (void) ddi_dma_sync(channel->ic_desc_dma_handle, + ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV); + + /* update the previous desc and sync it too */ + prev->dd_next_desc = ring->cr_phys_desc + + (ring->cr_desc_next << 6); + (void) ddi_dma_sync(channel->ic_desc_dma_handle, + ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV); + + /* increment next/gen so it points to the next free desc */ + ring->cr_desc_prev = ring->cr_desc_next; + ring->cr_desc_gen_prev = ring->cr_desc_gen; + + /* increment next/gen so it points to the next free desc */ + ring->cr_desc_next++; + if (ring->cr_desc_next > ring->cr_desc_last) { + ring->cr_desc_next = 0; + ring->cr_desc_gen++; + } + + /* + * if this is CBv2, link the descriptor to an empty descriptor. Since + * we always leave on desc empty to detect full, this works out. + */ + if (ring->cr_chan->ic_ver == IOAT_CBv2) { + desc = (ioat_chan_dma_desc_t *) + &ring->cr_desc[ring->cr_desc_next]; + prev = (ioat_chan_dma_desc_t *) + &ring->cr_desc[ring->cr_desc_prev]; + desc->dd_size = 0; + desc->dd_ctrl = 0; + desc->dd_next_desc = 0x0; + + prev->dd_next_desc = ring->cr_phys_desc + + (ring->cr_desc_next << 6); + } +} + + +/* + * ioat_cmd_poll() + */ +int +ioat_cmd_poll(void *private, dcopy_cmd_t cmd) +{ + ioat_channel_ring_t *ring; + ioat_cmd_private_t *priv; + ioat_channel_t channel; + uint64_t generation; + uint64_t last_cmpl; + + + channel = (ioat_channel_t)private; + priv = cmd->dp_private->pr_device_cmd_private; + + ring = channel->ic_ring; + ASSERT(ring != NULL); + + mutex_enter(&ring->cr_cmpl_mutex); + + /* if the channel had a fatal failure, fail all polls */ + if ((channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) || + IOAT_CMPL_FAILED(channel)) { + mutex_exit(&ring->cr_cmpl_mutex); + return (DCOPY_FAILURE); + } + + /* + * if the current completion is the same as the last time we read one, + * post is still pending, nothing further to do. We track completions + * as indexes into the ring since post uses VAs and the H/W returns + * PAs. We grab a snapshot of generation and last_cmpl in the mutex. + */ + (void) ddi_dma_sync(channel->ic_cmpl_dma_handle, 0, 0, + DDI_DMA_SYNC_FORCPU); + last_cmpl = IOAT_CMPL_INDEX(channel); + if (last_cmpl != ring->cr_cmpl_last) { + /* + * if we wrapped the ring, increment the generation. Store + * the last cmpl. This logic assumes a physically contiguous + * ring. + */ + if (last_cmpl < ring->cr_cmpl_last) { + ring->cr_cmpl_gen++; + } + ring->cr_cmpl_last = last_cmpl; + generation = ring->cr_cmpl_gen; + + } else { + generation = ring->cr_cmpl_gen; + } + + mutex_exit(&ring->cr_cmpl_mutex); + + /* + * if cmd isn't passed in, well return. Useful for updating the + * consumer pointer (ring->cr_cmpl_last). + */ + if (cmd == NULL) { + return (DCOPY_PENDING); + } + + /* + * if the post's generation is old, this post has completed. No reason + * to go check the last completion. if the generation is the same + * and if the post is before or = to the last completion processed, + * the post has completed. + */ + if (priv->ip_generation < generation) { + return (DCOPY_COMPLETED); + } else if ((priv->ip_generation == generation) && + (priv->ip_index <= last_cmpl)) { + return (DCOPY_COMPLETED); + } + + return (DCOPY_PENDING); +} + + +/* + * ioat_ring_reserve() + */ +int +ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring, + dcopy_cmd_t cmd) +{ + uint64_t dest_addr; + uint32_t dest_size; + uint64_t src_addr; + uint32_t src_size; + size_t xfer_size; + uint64_t desc; + int num_desc; + size_t size; + int i; + + + /* + * figure out how many descriptors we need. This can include a dca + * desc and multiple desc for a dma copy. + */ + num_desc = 0; + if ((channel->ic_ver == IOAT_CBv2) && + (cmd->dp_flags & DCOPY_CMD_DCA)) { + num_desc++; + } + src_addr = cmd->dp.copy.cc_source; + dest_addr = cmd->dp.copy.cc_dest; + size = cmd->dp.copy.cc_size; + while (size > 0) { + num_desc++; + + /* adjust for any offset into the page */ + if ((src_addr & PAGEOFFSET) == 0) { + src_size = PAGESIZE; + } else { + src_size = PAGESIZE - (src_addr & PAGEOFFSET); + } + if ((dest_addr & PAGEOFFSET) == 0) { + dest_size = PAGESIZE; + } else { + dest_size = PAGESIZE - (dest_addr & PAGEOFFSET); + } + + /* take the smallest of the three */ + xfer_size = MIN(src_size, dest_size); + xfer_size = MIN(xfer_size, size); + + /* go to the next page */ + src_addr += xfer_size; + dest_addr += xfer_size; + size -= xfer_size; + } + + /* Make sure we have space for these descriptors */ + desc = ring->cr_desc_next; + for (i = 0; i < num_desc; i++) { + + /* + * if this is the last descriptor in the ring, see if the + * last completed descriptor is #0. + */ + if (desc == ring->cr_desc_last) { + if (ring->cr_cmpl_last == 0) { + /* + * if we think the ring is full, update where + * the H/W really is and check for full again. + */ + (void) ioat_cmd_poll(channel, NULL); + if (ring->cr_cmpl_last == 0) { + return (DCOPY_NORESOURCES); + } + } + + /* + * go to the next descriptor which is zero in this + * case. + */ + desc = 0; + + /* + * if this is not the last descriptor in the ring, see if + * the last completion we saw was the next descriptor. + */ + } else { + if ((desc + 1) == ring->cr_cmpl_last) { + /* + * if we think the ring is full, update where + * the H/W really is and check for full again. + */ + (void) ioat_cmd_poll(channel, NULL); + if ((desc + 1) == ring->cr_cmpl_last) { + return (DCOPY_NORESOURCES); + } + } + + /* go to the next descriptor */ + desc++; + } + } + + return (DCOPY_SUCCESS); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/ioat/ioat_ioctl.c Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,343 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/errno.h> +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/kmem.h> +#include <sys/ddi.h> +#include <sys/stat.h> +#include <sys/sunddi.h> +#include <sys/file.h> +#include <sys/open.h> +#include <sys/modctl.h> +#include <sys/ddi_impldefs.h> +#include <sys/sysmacros.h> + +#include <vm/hat.h> +#include <vm/as.h> + +#include <sys/ioat.h> + + +extern void *ioat_statep; +#define ptob64(x) (((uint64_t)(x)) << PAGESHIFT) + +static int ioat_ioctl_rdreg(ioat_state_t *state, void *arg, int mode); +#ifdef DEBUG +static int ioat_ioctl_wrreg(ioat_state_t *state, void *arg, int mode); +static int ioat_ioctl_test(ioat_state_t *state, void *arg, int mode); +#endif + +/* + * ioat_ioctl() + */ +/*ARGSUSED*/ +int +ioat_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval) +{ + ioat_state_t *state; + int instance; + int e; + + + e = drv_priv(cred); + if (e != 0) { + return (EPERM); + } + instance = getminor(dev); + if (instance == -1) { + return (EBADF); + } + state = ddi_get_soft_state(ioat_statep, instance); + if (state == NULL) { + return (EBADF); + } + + switch (cmd) { + case IOAT_IOCTL_READ_REG: + e = ioat_ioctl_rdreg(state, (void *)arg, mode); + break; +#ifdef DEBUG + case IOAT_IOCTL_WRITE_REG: + e = ioat_ioctl_wrreg(state, (void *)arg, mode); + break; + case IOAT_IOCTL_TEST: + e = ioat_ioctl_test(state, (void *)arg, mode); + break; +#endif + + default: + e = ENXIO; + } + + return (e); +} + + +/* + * ioat_ioctl_rdreg() + */ +static int +ioat_ioctl_rdreg(ioat_state_t *state, void *arg, int mode) +{ + ioat_ioctl_rdreg_t rdreg; + int e; + + + e = ddi_copyin(arg, &rdreg, sizeof (ioat_ioctl_rdreg_t), mode); + if (e != 0) { + return (EFAULT); + } + + /* + * read a device register, where size is read size in bits, addr is + * the offset into MMIO registers. + */ + switch (rdreg.size) { + case 8: + rdreg.data = (uint64_t)ddi_get8(state->is_reg_handle, + (uint8_t *)&state->is_genregs[rdreg.addr]); + break; + case 16: + rdreg.data = (uint64_t)ddi_get16(state->is_reg_handle, + (uint16_t *)&state->is_genregs[rdreg.addr]); + break; + case 32: + rdreg.data = (uint64_t)ddi_get32(state->is_reg_handle, + (uint32_t *)&state->is_genregs[rdreg.addr]); + break; + case 64: + rdreg.data = (uint64_t)ddi_get64(state->is_reg_handle, + (uint64_t *)&state->is_genregs[rdreg.addr]); + break; + default: + return (EFAULT); + } + + e = ddi_copyout(&rdreg, arg, sizeof (ioat_ioctl_rdreg_t), mode); + if (e != 0) { + return (EFAULT); + } + + return (0); +} + + +#ifdef DEBUG +/* + * ioat_ioctl_wrreg() + */ +static int +ioat_ioctl_wrreg(ioat_state_t *state, void *arg, int mode) +{ + ioat_ioctl_wrreg_t wrreg; + int e; + + + e = ddi_copyin(arg, &wrreg, sizeof (ioat_ioctl_wrreg_t), mode); + if (e != 0) { + return (EFAULT); + } + + /* + * write a device register, where size is write size in bits, addr is + * the offset into MMIO registers. + */ + switch (wrreg.size) { + case 8: + ddi_put8(state->is_reg_handle, + (uint8_t *)&state->is_genregs[wrreg.addr], + (uint8_t)wrreg.data); + break; + case 16: + ddi_put16(state->is_reg_handle, + (uint16_t *)&state->is_genregs[wrreg.addr], + (uint16_t)wrreg.data); + break; + case 32: + ddi_put32(state->is_reg_handle, + (uint32_t *)&state->is_genregs[wrreg.addr], + (uint32_t)wrreg.data); + break; + case 64: + ddi_put64(state->is_reg_handle, + (uint64_t *)&state->is_genregs[wrreg.addr], + (uint64_t)wrreg.data); + break; + default: + return (EFAULT); + } + + return (0); +} + + +/* + * ioat_ioctl_test() + */ +/*ARGSUSED*/ +static int +ioat_ioctl_test(ioat_state_t *state, void *arg, int mode) +{ + dcopy_handle_t channel; + dcopy_cmd_t cmd; + uint8_t *source; + uint_t buf_size; + uint_t poll_cnt; + uint8_t *dest; + uint8_t *buf; + int flags; + int i; + int e; + + + /* allocate 2 paged aligned 4k pages */ + buf_size = 0x1000; + buf = kmem_zalloc((buf_size * 2) + 0x1000, KM_SLEEP); + source = (uint8_t *)(((uintptr_t)buf + PAGEOFFSET) & PAGEMASK); + dest = source + buf_size; + + /* Init source buffer */ + for (i = 0; i < buf_size; i++) { + source[i] = (uint8_t)(i & 0xFF); + } + + /* allocate a DMA channel */ + e = dcopy_alloc(DCOPY_SLEEP, &channel); + if (e != DCOPY_SUCCESS) { + cmn_err(CE_CONT, "dcopy_alloc() failed\n"); + goto testfail_alloc; + } + + /* + * post 32 DMA copy's from dest to dest. These will complete in order + * so they won't stomp on each other. We don't care about the data + * right now which is why we go dest to dest. + */ + flags = DCOPY_SLEEP; + for (i = 0; i < 32; i++) { + /* + * if this is the second command, link the commands from here + * on out. We only want to keep track of the last command. We + * will poll on the last command completing (which infers that + * the other commands completed). If any of the previous + * commands fail, so will the last one. Linking the commands + * also allows us to only call free for the last command. free + * will free up the entire chain of commands. + */ + if (i == 1) { + flags |= DCOPY_ALLOC_LINK; + } + e = dcopy_cmd_alloc(channel, flags, &cmd); + if (e != DCOPY_SUCCESS) { + cmn_err(CE_CONT, "dcopy_cmd_alloc() failed\n"); + goto testfail_alloc; + } + + ASSERT(cmd->dp_version == DCOPY_CMD_V0); + cmd->dp_cmd = DCOPY_CMD_COPY; + cmd->dp_flags = DCOPY_CMD_NOFLAGS; + + /* do a bunch of dest to dest DMA's */ + cmd->dp.copy.cc_source = ptob64(hat_getpfnum(kas.a_hat, + (caddr_t)source)) + ((uintptr_t)dest & PAGEOFFSET); + cmd->dp.copy.cc_dest = ptob64(hat_getpfnum(kas.a_hat, + (caddr_t)dest)) + ((uintptr_t)dest & PAGEOFFSET); + cmd->dp.copy.cc_size = PAGESIZE; + + e = dcopy_cmd_post(cmd); + if (e != DCOPY_SUCCESS) { + cmn_err(CE_CONT, "dcopy_post() failed\n"); + goto testfail_post; + } + } + + e = dcopy_cmd_alloc(channel, flags, &cmd); + if (e != DCOPY_SUCCESS) { + cmn_err(CE_CONT, "dcopy_cmd_alloc() failed\n"); + goto testfail_alloc; + } + + /* now queue up the DMA we are going to check status and data for */ + cmd->dp_cmd = DCOPY_CMD_COPY; + cmd->dp_flags = DCOPY_CMD_INTR; + cmd->dp.copy.cc_source = ptob64(hat_getpfnum(kas.a_hat, + (caddr_t)source)) + ((uintptr_t)source & PAGEOFFSET); + cmd->dp.copy.cc_dest = ptob64(hat_getpfnum(kas.a_hat, + (caddr_t)dest)) + ((uintptr_t)dest & PAGEOFFSET); + cmd->dp.copy.cc_size = PAGESIZE; + e = dcopy_cmd_post(cmd); + if (e != DCOPY_SUCCESS) { + cmn_err(CE_CONT, "dcopy_post() failed\n"); + goto testfail_post; + } + + /* check the status of the last command */ + poll_cnt = 0; + flags = DCOPY_POLL_NOFLAGS; + while ((e = dcopy_cmd_poll(cmd, flags)) == DCOPY_PENDING) { + poll_cnt++; + if (poll_cnt >= 16) { + flags |= DCOPY_POLL_BLOCK; + } + } + if (e != DCOPY_COMPLETED) { + cmn_err(CE_CONT, "dcopy_poll() failed\n"); + goto testfail_poll; + } + + /* since the cmd's are linked we only need to pass in the last cmd */ + dcopy_cmd_free(&cmd); + dcopy_free(&channel); + + /* verify the data */ + for (i = 0; i < PAGESIZE; i++) { + if (dest[i] != (uint8_t)(i & 0xFF)) { + cmn_err(CE_CONT, + "dcopy_data_compare() failed, %p[%d]: %x, %x\n", + (void *)dest, i, dest[i], i & 0xFF); + return (-1); + } + } + + kmem_free(buf, (buf_size * 2) + 0x1000); + + return (0); + +testfail_data_compare: +testfail_poll: +testfail_post: + dcopy_cmd_free(&cmd); + dcopy_free(&channel); +testfail_alloc: + kmem_free(buf, (buf_size * 2) + 0x1000); + + return (-1); +} +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/io/ioat/ioat_rs.c Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,246 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/kmem.h> +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> + +#include <sys/ioat.h> + + +/* structure used to keep track of resources */ +typedef struct ioat_rs_s { + /* + * Bounds of resource allocation. We will start allocating at rs_min + * and rollover at rs_max+1 (rs_max is included). e.g. for rs_min=0 + * and rs_max=7, we will have 8 total resources which can be alloced. + */ + uint_t rs_min; + uint_t rs_max; + + /* + * rs_free points to an array of 64-bit values used to track resource + * allocation. rs_free_size is the free buffer size in bytes. + */ + uint64_t *rs_free; + uint_t rs_free_size; + + /* + * last tracks the last alloc'd resource. This allows us to do a round + * robin allocation. + */ + uint_t rs_last; + + kmutex_t rs_mutex; +} ioat_rs_t; + + +/* + * ioat_rs_init() + * Initialize the resource structure. This structure will be protected + * by a mutex at the iblock_cookie passed in. init() returns a handle to be + * used for the rest of the resource functions. This code is written assuming + * that min_val will be close to 0. Therefore, we will allocate the free + * buffer only taking max_val into account. + */ +void +ioat_rs_init(ioat_state_t *state, uint_t min_val, uint_t max_val, + ioat_rs_hdl_t *handle) +{ + ioat_rs_t *rstruct; + uint_t array_size; + uint_t index; + + + ASSERT(handle != NULL); + ASSERT(min_val < max_val); + + /* alloc space for resource structure */ + rstruct = kmem_alloc(sizeof (ioat_rs_t), KM_SLEEP); + + /* + * Test to see if the max value is 64-bit aligned. If so, we don't need + * to allocate an extra 64-bit word. alloc space for free buffer + * (8 bytes per uint64_t). + */ + if ((max_val & 0x3F) == 0) { + rstruct->rs_free_size = (max_val >> 6) * 8; + } else { + rstruct->rs_free_size = ((max_val >> 6) + 1) * 8; + } + rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP); + + /* Initialize resource structure */ + rstruct->rs_min = min_val; + rstruct->rs_last = min_val; + rstruct->rs_max = max_val; + mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, + state->is_iblock_cookie); + + /* Mark all resources as free */ + array_size = rstruct->rs_free_size >> 3; + for (index = 0; index < array_size; index++) { + rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF; + } + + /* setup handle which is returned from this function */ + *handle = rstruct; +} + + +/* + * ioat_rs_fini() + * Frees up the space allocated in init(). Notice that a pointer to the + * handle is used for the parameter. fini() will set the handle to NULL + * before returning. + */ +void +ioat_rs_fini(ioat_rs_hdl_t *handle) +{ + ioat_rs_t *rstruct; + + + ASSERT(handle != NULL); + + rstruct = (ioat_rs_t *)*handle; + + mutex_destroy(&rstruct->rs_mutex); + kmem_free(rstruct->rs_free, rstruct->rs_free_size); + kmem_free(rstruct, sizeof (ioat_rs_t)); + + /* set handle to null. This helps catch bugs. */ + *handle = NULL; +} + + +/* + * ioat_rs_alloc() + * alloc a resource. If alloc fails, we are out of resources. + */ +int +ioat_rs_alloc(ioat_rs_hdl_t handle, uint_t *resource) +{ + ioat_rs_t *rstruct; + uint_t array_idx; + uint64_t free; + uint_t index; + uint_t last; + uint_t min; + uint_t max; + + + ASSERT(handle != NULL); + ASSERT(resource != NULL); + + rstruct = (ioat_rs_t *)handle; + + mutex_enter(&rstruct->rs_mutex); + min = rstruct->rs_min; + max = rstruct->rs_max; + + /* + * Find a free resource. This will return out of the loop once it finds + * a free resource. There are a total of 'max'-'min'+1 resources. + * Performs a round robin allocation. + */ + for (index = min; index <= max; index++) { + + array_idx = rstruct->rs_last >> 6; + free = rstruct->rs_free[array_idx]; + last = rstruct->rs_last & 0x3F; + + /* if the next resource to check is free */ + if ((free & ((uint64_t)1 << last)) != 0) { + /* we are using this resource */ + *resource = rstruct->rs_last; + + /* take it out of the free list */ + rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last); + + /* + * increment the last count so we start checking the + * next resource on the next alloc(). Note the rollover + * at 'max'+1. + */ + rstruct->rs_last++; + if (rstruct->rs_last > max) { + rstruct->rs_last = rstruct->rs_min; + } + + /* unlock the resource structure */ + mutex_exit(&rstruct->rs_mutex); + + return (DDI_SUCCESS); + } + + /* + * This resource is not free, lets go to the next one. Note the + * rollover at 'max'. + */ + rstruct->rs_last++; + if (rstruct->rs_last > max) { + rstruct->rs_last = rstruct->rs_min; + } + } + + mutex_exit(&rstruct->rs_mutex); + + return (DDI_FAILURE); +} + + +/* + * ioat_rs_free() + * Free the previously alloc'd resource. Once a resource has been free'd, + * it can be used again when alloc is called. + */ +void +ioat_rs_free(ioat_rs_hdl_t handle, uint_t resource) +{ + ioat_rs_t *rstruct; + uint_t array_idx; + uint_t offset; + + + ASSERT(handle != NULL); + + rstruct = (ioat_rs_t *)handle; + ASSERT(resource >= rstruct->rs_min); + ASSERT(resource <= rstruct->rs_max); + + mutex_enter(&rstruct->rs_mutex); + + /* Put the resource back in the free list */ + array_idx = resource >> 6; + offset = resource & 0x3F; + rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset); + + mutex_exit(&rstruct->rs_mutex); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/ioat/Makefile Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,97 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/ioat/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the ioat driver kernel +# module. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = ioat +OBJECTS = $(IOAT_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(IOAT_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/i86pc/io/ioat + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/Makefile.i86pc + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +# +# Dependency +# +LDFLAGS += -dy -Nmisc/dcopy + +# +# Override defaults to build a unique, local modstubs.o. +# +MODSTUBS_DIR = $(OBJS_DIR) +CLEANFILES += $(MODSTUBS_O) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/Makefile.targ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86pc/sys/ioat.h Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,359 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_IOAT_H +#define _SYS_IOAT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/dcopy.h> +#include <sys/dcopy_device.h> + + +/* ioat ioctls */ +#define IOATIOC ('T'<< 8) +typedef enum { + IOAT_IOCTL_WRITE_REG = (IOATIOC | 0x0), + IOAT_IOCTL_READ_REG = (IOATIOC | 0x1), + IOAT_IOCTL_TEST = (IOATIOC | 0x2) +} ioat_ioctl_enum_t; + +typedef struct ioat_ioctl_reg_s { + uint_t size; + uint_t addr; + uint64_t data; +} ioat_ioctl_reg_t; +typedef ioat_ioctl_reg_t ioat_ioctl_wrreg_t; +typedef ioat_ioctl_reg_t ioat_ioctl_rdreg_t; + +#ifdef _KERNEL +/* *** Driver Private Below *** */ + +/* IOAT_DMACAPABILITY flags */ +#define IOAT_DMACAP_PAGEBREAK 0x1 +#define IOAT_DMACAP_CRC 0x2 +#define IOAT_DMACAP_MARKERSKIP 0x4 +#define IOAT_DMACAP_XOR 0x8 +#define IOAT_DMACAP_DCA 0x10 + +/* IOAT_INTRCTL bits */ +#define IOAT_INTRCTL_MASTER_EN 0x1 +#define IOAT_INTRCTL_INTR_STAT 0x2 + +/* MMIO Registers */ +#define IOAT_CHANCNT 0x0 /* 8-bit */ +#define IOAT_XFERCAP 0x1 /* 8-bit */ +#define IOAT_GENCTRL 0x2 /* 8-bit */ +#define IOAT_INTRCTL 0x3 /* 8-bit */ +#define IOAT_ATTNSTATUS 0x4 /* 32-bit */ +#define IOAT_CBVER 0x8 /* 8-bit */ +#define IOAT_PERPORT_OFF 0xA /* 16-bit */ +#define IOAT_INTRDELAY 0xC /* 16-bit */ +#define IOAT_CSSTATUS 0xE /* 16-bit */ +#define IOAT_DMACAPABILITY 0x10 /* 32-bit */ + +#define IOAT_CHANNELREG_OFFSET 0x80 + +/* Channel Registers */ +#define IOAT_CHAN_CTL 0x0 /* 16-bit */ +#define IOAT_CHAN_COMP 0x2 /* 16-bit */ +#define IOAT_CHAN_CMPL_LO 0x18 /* 32-bit */ +#define IOAT_CHAN_CMPL_HI 0x1C /* 32-bit */ +#define IOAT_CHAN_ERR 0x28 /* 32-bit */ +#define IOAT_CHAN_ERRMASK 0x2C /* 32-bit */ +#define IOAT_CHAN_DCACTRL 0x30 /* 32-bit */ + +#define IOAT_V1_CHAN_STS_LO 0x4 /* 32-bit */ +#define IOAT_V1_CHAN_STS_HI 0x8 /* 32-bit */ +#define IOAT_V1_CHAN_ADDR_LO 0x0C /* 32-bit */ +#define IOAT_V1_CHAN_ADDR_HI 0x10 /* 32-bit */ +#define IOAT_V1_CHAN_CMD 0x14 /* 8-bit */ + +#define IOAT_V2_CHAN_CMD 0x4 /* 8-bit */ +#define IOAT_V2_CHAN_CNT 0x6 /* 16-bit */ +#define IOAT_V2_CHAN_STS_LO 0x8 /* 32-bit */ +#define IOAT_V2_CHAN_STS_HI 0xC /* 32-bit */ +#define IOAT_V2_CHAN_ADDR_LO 0x10 /* 32-bit */ +#define IOAT_V2_CHAN_ADDR_HI 0x14 /* 32-bit */ + +#define IOAT_CHAN_STS_ADDR_MASK 0xFFFFFFFFFFFFFFC0 +#define IOAT_CHAN_STS_XFER_MASK 0x3F +#define IOAT_CHAN_STS_FAIL_MASK 0x6 +#define IOAT_CMPL_INDEX(channel) \ + (((*channel->ic_cmpl & IOAT_CHAN_STS_ADDR_MASK) - \ + ring->cr_phys_desc) >> 6) +#define IOAT_CMPL_FAILED(channel) \ + (*channel->ic_cmpl & IOAT_CHAN_STS_FAIL_MASK) + + +typedef struct ioat_chan_desc_s { + uint32_t dd_res0; + uint32_t dd_ctrl; + uint64_t dd_res1; + uint64_t dd_res2; + uint64_t dd_next_desc; + uint64_t dd_res4; + uint64_t dd_res5; + uint64_t dd_res6; + uint64_t dd_res7; +} ioat_chan_desc_t; + +/* dca dd_ctrl bits */ +#define IOAT_DESC_CTRL_OP_CNTX ((uint32_t)0xFF << 24) +#define IOAT_DESC_CTRL_CNTX_CHNG 0x1 +typedef struct ioat_chan_dca_desc_s { + uint32_t dd_cntx; + uint32_t dd_ctrl; + uint64_t dd_res1; + uint64_t dd_res2; + uint64_t dd_next_desc; + uint64_t dd_res4; + uint64_t dd_res5; + uint64_t dd_res6; + uint64_t dd_res7; +} ioat_chan_dca_desc_t; + +/* dma dd_ctrl bits */ +#define IOAT_DESC_CTRL_OP_DMA (0x0 << 24) +#define IOAT_DESC_DMACTRL_NULL 0x20 +#define IOAT_DESC_CTRL_FENCE 0x10 +#define IOAT_DESC_CTRL_CMPL 0x8 +#define IOAT_DESC_CTRL_INTR 0x1 +typedef struct ioat_chan_dma_desc_s { + uint32_t dd_size; + uint32_t dd_ctrl; + uint64_t dd_src_paddr; + uint64_t dd_dest_paddr; + uint64_t dd_next_desc; + uint64_t dd_next_src_paddr; /* v2 only */ + uint64_t dd_next_dest_paddr; /* v2 only */ + uint64_t dd_res6; + uint64_t dd_res7; +} ioat_chan_dma_desc_t; + + +typedef enum { + IOAT_CBv1, + IOAT_CBv2 +} ioat_version_t; + +/* ioat private data per command */ +typedef struct ioat_cmd_private_s { + uint64_t ip_generation; + uint64_t ip_index; + dcopy_cmd_t ip_next; +} ioat_cmd_private_t; + +/* descriptor ring state */ +typedef struct ioat_channel_ring_s { + /* protects cr_cmpl_gen & cr_cmpl_last */ + kmutex_t cr_cmpl_mutex; + + /* desc ring generation for the last completion we saw */ + uint64_t cr_cmpl_gen; + + /* last descriptor index we saw complete */ + uint64_t cr_cmpl_last; + + /* protects cr_desc_* */ + kmutex_t cr_desc_mutex; + + /* + * last descriptor posted. used to update its next pointer when we + * add a new desc. Also used to tack the completion (See comment for + * cr_desc_gen_prev). + */ + uint64_t cr_desc_prev; + + /* where to put the next descriptor */ + uint64_t cr_desc_next; + + /* what the current desc ring generation is */ + uint64_t cr_desc_gen; + + /* + * used during cmd_post to track the last desc posted. cr_desc_next + * and cr_desc_gen will be pointing to the next free desc after + * writing the descriptor to the ring. But we want to track the + * completion for the last descriptor posted. + */ + uint64_t cr_desc_gen_prev; + + /* the last desc in the ring (for wrap) */ + uint64_t cr_desc_last; + + /* pointer to the head of the ring */ + ioat_chan_desc_t *cr_desc; + + /* physical address of the head of the ring */ + uint64_t cr_phys_desc; + + /* back pointer to the channel state */ + struct ioat_channel_s *cr_chan; + + /* for CB v2, number of desc posted (written to IOAT_V2_CHAN_CNT) */ + uint_t cr_post_cnt; +} ioat_channel_ring_t; + +/* track channel state so we can handle a failure */ +typedef enum { + IOAT_CHANNEL_OK = 0, + IOAT_CHANNEL_IN_FAILURE = 1 +} ic_channel_state_t; + +typedef struct ioat_channel_s *ioat_channel_t; +struct ioat_channel_s { + /* channel's ring state */ + ioat_channel_ring_t *ic_ring; + + /* IOAT_CBv1 || IOAT_CBv2 */ + ioat_version_t ic_ver; + + /* + * state to determine if it's OK to post the the channel and if all + * future polls should return failure. + */ + ic_channel_state_t ic_channel_state; + + /* channel command cache (*_cmd_alloc, *_cmd_free, etc) */ + kmem_cache_t *ic_cmd_cache; + + /* dcopy state for dcopy_device_channel_notify() call */ + dcopy_handle_t ic_dcopy_handle; + + /* location in memory where completions are DMA'ed into */ + volatile uint64_t *ic_cmpl; + + /* channel specific registers */ + uint8_t *ic_regs; + + /* if this channel is using DCA */ + boolean_t ic_dca_active; + + /* DCA ID the channel is currently pointing to */ + uint32_t ic_dca_current; + + /* devices channel number */ + uint_t ic_chan_num; + + /* number of descriptors in ring */ + uint_t ic_chan_desc_cnt; + + /* descriptor ring alloc state */ + ddi_dma_handle_t ic_desc_dma_handle; + size_t ic_desc_alloc_size; + ddi_acc_handle_t ic_desc_handle; + ddi_dma_cookie_t ic_desc_cookies; + + /* completion buffer alloc state */ + ddi_dma_handle_t ic_cmpl_dma_handle; + size_t ic_cmpl_alloc_size; + ddi_acc_handle_t ic_cmpl_handle; + ddi_dma_cookie_t ic_cmpl_cookie; + uint64_t ic_phys_cmpl; + + /* if inuse, we need to re-init the channel during resume */ + boolean_t ic_inuse; + + /* backpointer to driver state */ + struct ioat_state_s *ic_state; +}; + +typedef struct ioat_rs_s *ioat_rs_hdl_t; + +/* driver state */ +typedef struct ioat_state_s { + dev_info_t *is_dip; + int is_instance; + + kmutex_t is_mutex; + + /* register handle and pointer to registers */ + ddi_acc_handle_t is_reg_handle; + uint8_t *is_genregs; + + /* IOAT_CBv1 || IOAT_CBv2 */ + ioat_version_t is_ver; + + /* channel state */ + ioat_channel_t is_channel; + size_t is_chansize; + ioat_rs_hdl_t is_channel_rs; + + ddi_iblock_cookie_t is_iblock_cookie; + + /* device info */ + uint_t is_chanoff; + uint_t is_num_channels; + uint_t is_maxxfer; + uint_t is_cbver; + uint_t is_intrdelay; + uint_t is_status; + uint_t is_capabilities; + + /* dcopy_device_register()/dcopy_device_unregister() state */ + dcopy_device_handle_t is_device_handle; + dcopy_device_info_t is_deviceinfo; +} ioat_state_t; + + +int ioat_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, + int *rval); + +void ioat_rs_init(ioat_state_t *state, uint_t min_val, uint_t max_val, + ioat_rs_hdl_t *handle); +void ioat_rs_fini(ioat_rs_hdl_t *handle); +int ioat_rs_alloc(ioat_rs_hdl_t handle, uint_t *rs); +void ioat_rs_free(ioat_rs_hdl_t handle, uint_t rs); + +int ioat_channel_init(ioat_state_t *state); +void ioat_channel_fini(ioat_state_t *state); +void ioat_channel_suspend(ioat_state_t *state); +int ioat_channel_resume(ioat_state_t *state); + +int ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags, + uint_t size, dcopy_query_channel_t *info, void *channel_private); +void ioat_channel_free(void *channel_private); +void ioat_channel_intr(ioat_channel_t channel); +int ioat_cmd_alloc(void *channel, int flags, dcopy_cmd_t *cmd); +void ioat_cmd_free(void *channel, dcopy_cmd_t *cmd); +int ioat_cmd_post(void *channel, dcopy_cmd_t cmd); +int ioat_cmd_poll(void *channel, dcopy_cmd_t cmd); +void ioat_unregister_complete(void *device_private, int status); + + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_IOAT_H */
--- a/usr/src/uts/i86xpv/Makefile.files Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/i86xpv/Makefile.files Fri May 23 20:14:10 2008 -0700 @@ -179,12 +179,13 @@ # # driver & misc modules # -ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o +BALLOON_OBJS += balloon_drv.o DOMCAPS_OBJS += domcaps.o -BALLOON_OBJS += balloon_drv.o EVTCHN_OBJS += evtchn_dev.o GFX_PRIVATE_OBJS += gfx_private.o gfxp_pci.o gfxp_segmap.o \ gfxp_devmap.o gfxp_vgatext.o gfxp_vm.o vgasubr.o +IOAT_OBJS += ioat.o ioat_rs.o ioat_ioctl.o ioat_chan.o +ISANEXUS_OBJS += isa.o dma_engine.o i8237A.o PCI_E_MISC_OBJS += pcie.o pcie_fault.o PCI_E_NEXUS_OBJS += npe.o npe_misc.o PCI_E_NEXUS_OBJS += pci_common.o pci_kstats.o pci_tools.o
--- a/usr/src/uts/i86xpv/Makefile.i86xpv.shared Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/i86xpv/Makefile.i86xpv.shared Fri May 23 20:14:10 2008 -0700 @@ -240,6 +240,7 @@ # DRV_KMODS += rootnex +DRV_KMODS += ioat DRV_KMODS += isa DRV_KMODS += pci DRV_KMODS += npe
--- a/usr/src/uts/i86xpv/Makefile.rules Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/i86xpv/Makefile.rules Fri May 23 20:14:10 2008 -0700 @@ -57,6 +57,10 @@ $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/ioat/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/pci/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -215,6 +219,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/cpr/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/ioat/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/pci/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/i86xpv/ioat/Makefile Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,97 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86xpv/ioat/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the ioat driver kernel +# module. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = ioat +OBJECTS = $(IOAT_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(IOAT_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/i86pc/io/ioat + +# +# Include common rules. +# +include $(UTSBASE)/i86xpv/Makefile.i86xpv + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +# +# Dependency +# +LDFLAGS += -dy -Nmisc/dcopy + +# +# Override defaults to build a unique, local modstubs.o. +# +MODSTUBS_DIR = $(OBJS_DIR) +CLEANFILES += $(MODSTUBS_O) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86xpv/Makefile.targ +
--- a/usr/src/uts/intel/Makefile.files Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/intel/Makefile.files Fri May 23 20:14:10 2008 -0700 @@ -138,6 +138,7 @@ CMLB_OBJS += cmlb.o CPUNEX_OBJS += cpunex.o DADK_OBJS += dadk.o +DCOPY_OBJS += dcopy.o DNET_OBJS += dnet.o mii.o FD_OBJS += fd.o GDA_OBJS += gda.o
--- a/usr/src/uts/intel/Makefile.intel.shared Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/intel/Makefile.intel.shared Fri May 23 20:14:10 2008 -0700 @@ -528,6 +528,7 @@ MISC_KMODS += consconfig MISC_KMODS += ctf MISC_KMODS += dadk +MISC_KMODS += dcopy MISC_KMODS += dls MISC_KMODS += drm MISC_KMODS += fssnap_if
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/intel/dcopy/Makefile Fri May 23 20:14:10 2008 -0700 @@ -0,0 +1,84 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/dcopy/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the dcopy +# kernel module. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = dcopy +OBJECTS = $(DCOPY_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(DCOPY_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ
--- a/usr/src/uts/intel/ia32/ml/modstubs.s Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/intel/ia32/ml/modstubs.s Fri May 23 20:14:10 2008 -0700 @@ -1313,6 +1313,22 @@ END_MODULE(kssl); #endif +/* + * Stubs for dcopy, for Intel IOAT KAPIs + */ +#ifndef DCOPY_MODULE + MODULE(dcopy,misc); + NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one); + END_MODULE(dcopy); +#endif + / this is just a marker for the area of text that contains stubs ENTRY_NP(stubs_end)
--- a/usr/src/uts/sparc/ml/modstubs.s Fri May 23 18:47:44 2008 -0700 +++ b/usr/src/uts/sparc/ml/modstubs.s Fri May 23 20:14:10 2008 -0700 @@ -1265,6 +1265,22 @@ END_MODULE(kssl); #endif +/* + * Stubs for dcopy, for Intel IOAT KAPIs + */ +#ifndef DCOPY_MODULE + MODULE(dcopy,misc); + NO_UNLOAD_STUB(dcopy, dcopy_query, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_query_channel, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_alloc, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_free, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_alloc, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_free, nomod_void); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_post, nomod_minus_one); + NO_UNLOAD_STUB(dcopy, dcopy_cmd_poll, nomod_minus_one); + END_MODULE(dcopy); +#endif + ! this is just a marker for the area of text that contains stubs .seg ".text" .global stubs_end