Mercurial > illumos > s390-betelgeuse
changeset 10861:43a614b9a540
Branch merge + libscf/Makfile.com s390 standardize flag names
author | tide@localhost |
---|---|
date | Mon, 17 Aug 2009 09:14:35 -0400 |
parents | b1ec56e41796 (current diff) 3928a1af021c (diff) |
children | b5304f1daa92 |
files | usr/src/cmd/isns/isnsd/Makefile usr/src/lib/libscf/Makefile.com |
diffstat | 189 files changed, 12267 insertions(+), 4065 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/cmd-inet/lib/nwamd/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/cmd-inet/lib/nwamd/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -20,7 +20,7 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # usr/src/cmd/cmd-inet/lib/nwamd/Makefile @@ -43,7 +43,7 @@ ROOTCMDDIR= $(ROOTFS_LIBDIR)/inet LDLIBS += -lsocket -lnsl -linetcfg -linetutil -lumem -lscf -ldladm \ - -lgen -ldoor -lsecdb -lbsm -lsysevent -lnvpair + -lgen -lsecdb -lbsm -lsysevent -lnvpair .KEEP_STATE:
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_probe.c Mon Aug 17 09:14:35 2009 -0400 @@ -1,3 +1,4 @@ + /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -103,11 +104,11 @@ * The state of a phyint that is capable of being probed, is completely * specified by the 3-tuple <pi_state, pg_state, I>. * - * A phyint starts in either PI_RUNNING or PI_FAILED, depending on the state - * of the link (according to the driver). If the phyint is also configured - * with a test address (the common case) and probe targets, then a phyint must - * also successfully be able to send and receive probes in order to remain in - * the PI_RUNNING state (otherwise, it transitions to PI_FAILED). + * A phyint starts in either PI_RUNNING or PI_OFFLINE, depending on whether + * IFF_OFFLINE is set. If the phyint is also configured with a test address + * (the common case) and probe targets, then a phyint must also successfully + * be able to send and receive probes in order to remain in the PI_RUNNING + * state (otherwise, it transitions to PI_FAILED). * * Further, if a PI_RUNNING phyint is configured with a test address but is * unable to find any probe targets, it will transition to the PI_NOTARGETS @@ -1360,11 +1361,11 @@ } /* - * Transition a phyint back to PI_RUNNING (from PI_FAILED or PI_OFFLINE). The - * caller must ensure that the transition is appropriate. Clears IFF_OFFLINE - * or IFF_FAILED, as appropriate. Also sets IFF_INACTIVE on this or other - * interfaces as appropriate (see comment below). Finally, also updates the - * phyint's group state to account for the change. + * Transition a phyint to PI_RUNNING. The caller must ensure that the + * transition is appropriate. Clears IFF_OFFLINE or IFF_FAILED if + * appropriate. Also sets IFF_INACTIVE on this or other interfaces as + * appropriate (see comment below). Finally, also updates the phyint's group + * state to account for the change. */ void phyint_transition_to_running(struct phyint *pi) @@ -1373,6 +1374,7 @@ struct phyint *actstandbypi = NULL; uint_t nactive = 0, nnonstandby = 0; boolean_t onlining = (pi->pi_state == PI_OFFLINE); + boolean_t initial = (pi->pi_state == PI_INIT); uint64_t set, clear; /* @@ -1422,7 +1424,7 @@ } else if (onlining || failback_enabled) { /* case 2 */ if (nactive >= nnonstandby && actstandbypi != NULL) (void) change_pif_flags(actstandbypi, IFF_INACTIVE, 0); - } else if (!GROUP_FAILED(pi->pi_group)) { /* case 3 */ + } else if (!initial && !GROUP_FAILED(pi->pi_group)) { /* case 3 */ set |= IFF_INACTIVE; } (void) change_pif_flags(pi, set, clear);
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c Mon Aug 17 09:14:35 2009 -0400 @@ -386,15 +386,7 @@ pi->pi_ifindex = ifindex; pi->pi_icmpid = htons(((getpid() & 0xFF) << 8) | (ifindex & 0xFF)); - /* - * If the interface is offline, we set the state to PI_OFFLINE. - * Otherwise, we optimistically start in the PI_RUNNING state. Later - * (in process_link_state_changes()), we will adjust this to match the - * current state of the link. Further, if test addresses are - * subsequently assigned, we will transition to PI_NOTARGETS and then - * to either PI_RUNNING or PI_FAILED depending on the probe results. - */ - pi->pi_state = (flags & IFF_OFFLINE) ? PI_OFFLINE : PI_RUNNING; + pi->pi_state = PI_INIT; pi->pi_flags = PHYINT_FLAGS(flags); /* @@ -417,6 +409,19 @@ */ phyint_insert(pi, pg); + /* + * If the interface is offline, we set the state to PI_OFFLINE. + * Otherwise, optimistically consider this interface running. Later + * (in process_link_state_changes()), we will adjust this to match the + * current state of the link. Further, if test addresses are + * subsequently assigned, we will transition to PI_NOTARGETS and then + * to either PI_RUNNING or PI_FAILED depending on the probe results. + */ + if (flags & IFF_OFFLINE) + phyint_chstate(pi, PI_OFFLINE); + else + phyint_transition_to_running(pi); /* calls phyint_chstate() */ + return (pi); } @@ -2677,6 +2682,9 @@ ifstate(struct phyint *pi) { switch (pi->pi_state) { + case PI_INIT: + return (IPMP_IF_UNKNOWN); + case PI_NOTARGETS: if (pi->pi_flags & IFF_FAILED) return (IPMP_IF_FAILED);
--- a/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.h Mon Aug 17 09:14:35 2009 -0400 @@ -167,6 +167,7 @@ * Phyint states; see below for the phyint definition. */ enum pi_state { + PI_INIT = 0, /* Phyint is being initialized */ PI_NOTARGETS = 1, /* Phyint has no targets */ PI_RUNNING = 2, /* Phyint is functioning */ PI_FAILED = 3, /* Phyint is failed */
--- a/usr/src/cmd/devfsadm/Makefile.com Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/devfsadm/Makefile.com Mon Aug 17 09:14:35 2009 -0400 @@ -22,6 +22,11 @@ # Use is subject to license terms. # +# This target builds both a command (daemon) and various shared objects. This +# isn't a typical target, and the inclusion of both library and command +# Makefiles were probably not in their original design. However, there doesn't +# presently seem to be a clash of any required definitions. +include ../../../lib/Makefile.lib include ../../Makefile.cmd COMMON = .. @@ -90,10 +95,15 @@ LINTFLAGS += -erroff=E_NAME_DEF_NOT_USED2 LINTFLAGS += -erroff=E_NAME_MULTIPLY_DEF2 -LDLIBS += -ldevinfo -lgen -lsysevent -lnvpair -ldoor -lzonecfg -lbsm +# Define the dependencies required by devfsadm and all shared objects. +LDLIBS += -ldevinfo +devfsadm := LDLIBS += -lgen -lsysevent -lnvpair -lzonecfg -lbsm +SUNW_md_link.so := LDLIBS += -lmeta -LINK_MOD_LDLIBS= -SUNW_md_link.so := LINK_MOD_LDLIBS= -lmeta +# All libraries are built from the same SUNW_%.so rule (see below), and define +# their own SONAME using -h explicitly. Null the generic -h macro that gets +# inherited from Makefile.lib, otherwise we'll get two -h definitions. +HSONAME = SRCS = $(DEVFSADM_SRC) $(LINK_SRCS) OBJS = $(DEVFSADM_OBJ) $(LINK_OBJS) @@ -164,8 +174,8 @@ $(LINK.c) -o $@ $< $(DEVFSADM_OBJ) $(LDLIBS) $(POST_PROCESS) -SUNW_%.so: %.o - $(LINK.c) -o $@ $(GSHARED) -h $@ $< $(LINK_MOD_LDLIBS) +SUNW_%.so: %.o $(MAPFILES) + $(CC) -o $@ $(GSHARED) $(DYNFLAGS) -h $@ $< $(LDLIBS) -lc $(POST_PROCESS_SO) %.o: $(COMMON)/%.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/cmd/devfsadm/mapfile-vers Mon Aug 17 09:14:35 2009 -0400 @@ -0,0 +1,64 @@ +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +# External interface requirements +{ + global: + devfsadm_devlink_cache = EXTERN; + devfsadm_enumerate_char_start = EXTERN; + devfsadm_enumerate_int = EXTERN; + devfsadm_errprint = EXTERN; + devfsadm_free_dev_names = EXTERN; + devfsadm_have_reserved = EXTERN; + devfsadm_is_reserved = EXTERN; + devfsadm_link_valid = EXTERN; + devfsadm_lookup_dev_names = EXTERN; + devfsadm_mklink = EXTERN; + devfsadm_noupdate = EXTERN; + devfsadm_print = EXTERN; + devfsadm_read_link = EXTERN; + devfsadm_reserve_id_cache = EXTERN; + devfsadm_rm_all = EXTERN; + devfsadm_rm_link = EXTERN; + devfsadm_rm_stale_links = EXTERN; + devfsadm_root_path = EXTERN; + devfsadm_secondary_link = EXTERN; + disk_enumerate_int = EXTERN; + s_strdup = EXTERN; + system_labeled = EXTERN; +};
--- a/usr/src/cmd/fps/fpsd/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/fps/fpsd/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -20,7 +20,7 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -44,7 +44,7 @@ CFLAGS += $(CCMT) -LDLIBS += -lc -ldevinfo -ldoor -lkstat -lscf -lgen +LDLIBS += -lc -ldevinfo -lkstat -lscf -lgen LDFLAGS += $(ZIGNORE)
--- a/usr/src/cmd/fs.d/autofs/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/fs.d/autofs/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -82,9 +82,9 @@ GREP= egrep SED= sed -$(AUTO) := LDLIBS += -lnsl -lsldap -ldoor +$(AUTO) := LDLIBS += -lnsl -lsldap $(MOUNT):= LDLIBS += -lnsl -$(TYPEPROG) := LDLIBS += -lrpcsvc -lsocket -lnsl -lsldap -lkstat -ldoor +$(TYPEPROG) := LDLIBS += -lrpcsvc -lsocket -lnsl -lsldap -lkstat CFLAGS += $(CCVERBOSE) -D_FILE_OFFSET_BITS=64 CPPFLAGS= -I. -I.. -I../nfs/lib $(CPPFLAGS.master) -D_REENTRANT \
--- a/usr/src/cmd/fs.d/nfs/mountd/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/fs.d/nfs/mountd/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -35,7 +35,7 @@ OBJS = $(LOCAL) $(COMMON) SRCS = $(LOCAL:%.o=%.c) $(FSLIBSRC) ../lib/nfs_sec.c \ ../lib/sharetab.c ../lib/daemon.c -LDLIBS += -lrpcsvc -lnsl -lbsm -lsocket -ldoor -ltsnet -ltsol +LDLIBS += -lrpcsvc -lnsl -lbsm -lsocket -ltsnet -ltsol CPPFLAGS += -D_REENTRANT $(TYPEPROG): $(OBJS)
--- a/usr/src/cmd/fs.d/nfs/nfsmapid/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/fs.d/nfs/nfsmapid/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -19,11 +19,9 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# FSTYPE = nfs TYPEPROG = nfsmapid @@ -33,7 +31,7 @@ include ../../Makefile.fstype LDLIBS += -L$(ROOT)/usr/lib/nfs -R/usr/lib/nfs -LDLIBS += -lnsl -lmapid -ldoor -ldtrace -lidmap +LDLIBS += -lnsl -lmapid -ldtrace -lidmap SRCS = nfsmapid.c nfsmapid_server.c DSRC = nfsmapid_dt.d DOBJ = $(DSRC:%.d=%.o)
--- a/usr/src/cmd/fs.d/smbclnt/smbiod/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/fs.d/smbclnt/smbiod/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -44,7 +44,7 @@ # This is a multi-thread program but Nevada # no longer needs -lthread -LDLIBS += -lsmbfs -ldoor +LDLIBS += -lsmbfs CPPFLAGS += -I$(SRC)/lib/libsmbfs \ -I$(SRC)/uts/common/smbclnt -I$(SRC)/uts/common
--- a/usr/src/cmd/iscsi/iscsitgtd/Makefile.com Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/iscsi/iscsitgtd/Makefile.com Mon Aug 17 09:14:35 2009 -0400 @@ -56,7 +56,7 @@ all: $(PROG) -LDLIBS += -lumem -luuid -lsocket -lnsl -ldoor -lavl -lmd5 -ladm -lefi +LDLIBS += -lumem -luuid -lsocket -lnsl -lavl -lmd5 -ladm -lefi LDLIBS += -liscsitgt -lzfs -ldlpi -lscf -lsasl XMLLIB = -lxml2
--- a/usr/src/cmd/iscsid/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/iscsid/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -40,7 +40,7 @@ include ../Makefile.cmd CCVERBOSE = -LDLIBS += -ldoor -lnsl +LDLIBS += -lnsl ROOTMANIFESTDIR= $(ROOTSVCNETWORKISCSI) $(ROOTSVCNETWORKISCSI)/iscsi-initiator.xml := FILEMODE = 0444
--- a/usr/src/cmd/isns/isnsadm/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/isns/isnsadm/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -19,7 +19,7 @@ # CDDL HEADER END # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -29,7 +29,7 @@ include ../../Makefile.cmd -LDLIBS += -lxml2 -ldoor -lscf -lc +LDLIBS += -lxml2 -lscf -lc CFLAGS += $(CCVERBOSE) CPPFLAGS += -I. -I/usr/include/libxml2 -I../isnsd POFILE = isnsadm_all.po
--- a/usr/src/cmd/isns/isnsd/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/isns/isnsd/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -58,7 +58,7 @@ obj.o := CERRWARN += -erroff=E_CONST_OBJ_SHOULD_HAVE_INITIZR obj.o := CERRWARN += -erroff=E_IMPLICIT_DECL_FUNC_RETURN_INT -LDLIBS += -lsecdb -lsocket -lnsl -lscf -lxml2 -ldoor +LDLIBS += -lsecdb -lsocket -lnsl -lscf -lxml2 .KEEP_STATE:
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c Mon Aug 17 09:14:35 2009 -0400 @@ -241,7 +241,7 @@ objset_name(uintptr_t addr, char *buf) { static int gotid; - static mdb_ctf_id_t osi_id, ds_id; + static mdb_ctf_id_t os_id, ds_id; uintptr_t os_dsl_dataset; char ds_snapname[MAXNAMELEN]; uintptr_t ds_dir; @@ -249,9 +249,9 @@ buf[0] = '\0'; if (!gotid) { - if (mdb_ctf_lookup_by_name("struct objset_impl", - &osi_id) == -1) { - mdb_warn("couldn't find struct objset_impl"); + if (mdb_ctf_lookup_by_name("struct objset", + &os_id) == -1) { + mdb_warn("couldn't find struct objset"); return (DCMD_ERR); } if (mdb_ctf_lookup_by_name("struct dsl_dataset", @@ -263,7 +263,7 @@ gotid = TRUE; } - if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset)) + if (GETMEMBID(addr, &os_id, os_dsl_dataset, os_dsl_dataset)) return (DCMD_ERR); if (os_dsl_dataset == 0) { @@ -2158,7 +2158,7 @@ { "dbuf", ":", "print dmu_buf_impl_t", dbuf }, { "dbuf_stats", ":", "dbuf stats", dbuf_stats }, { "dbufs", - "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] " + "\t[-O objset_t*] [-n objset_name | \"mos\"] " "[-o object | \"mdn\"] \n" "\t[-l level] [-b blkid | \"bonus\"]", "find dmu_buf_impl_t's that match specified criteria", dbufs },
--- a/usr/src/cmd/mv/mv.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/mv/mv.c Mon Aug 17 09:14:35 2009 -0400 @@ -66,6 +66,7 @@ #define ISDEV(A) ((A.st_mode & S_IFMT) == S_IFCHR || \ (A.st_mode & S_IFMT) == S_IFBLK || \ (A.st_mode & S_IFMT) == S_IFIFO) +#define ISSOCK(A) ((A.st_mode & S_IFMT) == S_IFSOCK) #define BLKSIZE 4096 #define PATHSIZE 1024 @@ -477,7 +478,7 @@ /* * Make sure source file is not a directory, - * we can't link directories... + * we cannot link directories... */ if (ISDIR(s1)) { @@ -643,15 +644,24 @@ return (n); } - /* doors can't be moved across filesystems */ + /* doors cannot be moved across filesystems */ if (ISDOOR(s1)) { (void) fprintf(stderr, - gettext("%s: %s: can't move door " + gettext("%s: %s: cannot move door " "across file systems\n"), cmd, source); return (1); } + + /* sockets cannot be moved across filesystems */ + if (ISSOCK(s1)) { + (void) fprintf(stderr, + gettext("%s: %s: cannot move socket " + "across file systems\n"), cmd, source); + return (1); + } + /* - * File can't be renamed, try to recreate the symbolic + * File cannot be renamed, try to recreate the symbolic * link or special device, or copy the file wholesale * between file systems. */ @@ -1212,7 +1222,7 @@ if (realpath(path, rpath) == NULL) { int errno_save = errno; (void) fprintf(stderr, gettext( - "%s: can't resolve path %s: "), cmd, path); + "%s: cannot resolve path %s: "), cmd, path); errno = errno_save; perror(""); return (0);
--- a/usr/src/cmd/prctl/prctl.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/prctl/prctl.c Mon Aug 17 09:14:35 2009 -0400 @@ -77,6 +77,7 @@ typedef struct prctl_list { char *name; + rctl_qty_t *usage; prctl_value_t *val_list; struct prctl_list *next; } prctl_list_t; @@ -1220,6 +1221,34 @@ rblk1 = rblk2; rblk2 = rblk_tmp; } + + /* + * Get the current usage for the resource control if it matched the + * privilege and entity type criteria. + */ + if (list != NULL) { + if (pr_getrctl(Pr, rctlname, NULL, rblk2, RCTL_USAGE) == 0) { + list->usage = (rctl_qty_t *)malloc(sizeof (rctl_qty_t)); + if (list->usage == NULL) { + preserve_error(gettext("malloc failed: %s"), + strerror(errno)); + free(rblk1); + free(rblk2); + return (1); + } + *list->usage = rctlblk_get_value(rblk2); + } else { + list->usage = NULL; + if (errno != ENOTSUP) { + preserve_error(gettext("failed to get " + "resource control usage for %s: %s"), + rctlname, strerror(errno)); + free(rblk1); + free(rblk2); + return (1); + } + } + } free(rblk1); free(rblk2); return (0); @@ -1323,6 +1352,7 @@ old_val = new_val; } free(old_list->name); + free(old_list->usage); new_list = old_list->next; free(old_list); old_list = new_list; @@ -1432,6 +1462,23 @@ unit = SCALED_UNIT_NONE; scale = scale_metric; } + + /* print the current usage for the rctl if available */ + if (iter_list->usage != NULL) { + rblk_value = *(iter_list->usage); + if (!arg_parseable_mode) { + (void) uint64toscaled(rblk_value, 4, "E", + rctl_valuestring, NULL, NULL, + scale, NULL, 0); + + (void) fprintf(stdout, "%8s%-16s%5s%-4s\n", + "", "usage", rctl_valuestring, unit); + } else { + (void) fprintf(stdout, "%s %s %llu - - -\n", + iter_list->name, "usage", rblk_value); + } + } + /* iterate over an print all control values */ while (iter_val != NULL) {
--- a/usr/src/cmd/ssh/include/authfd.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/ssh/include/authfd.h Mon Aug 17 09:14:35 2009 -0400 @@ -3,8 +3,6 @@ #ifndef _AUTHFD_H #define _AUTHFD_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -58,6 +56,7 @@ #define SSH2_AGENTC_ADD_ID_CONSTRAINED 25 #define SSH_AGENT_CONSTRAIN_LIFETIME 1 +#define SSH_AGENT_CONSTRAIN_CONFIRM 2 /* extended failure messages */ #define SSH2_AGENT_FAILURE 30
--- a/usr/src/cmd/ssh/include/readpass.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/ssh/include/readpass.h Mon Aug 17 09:14:35 2009 -0400 @@ -3,8 +3,6 @@ #ifndef _READPASS_H #define _READPASS_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -25,8 +23,12 @@ #define RP_ECHO 0x0001 #define RP_ALLOW_STDIN 0x0002 #define RP_ALLOW_EOF 0x0004 +#define RP_USE_ASKPASS 0x0008 char *read_passphrase(const char *, int); +int ask_permission(const char *, ...) + __attribute__((format(printf, 1, 2))); +int read_keyfile_line(FILE *, const char *, char *, size_t, u_long *); #ifdef __cplusplus }
--- a/usr/src/cmd/ssh/include/ssh.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/ssh/include/ssh.h Mon Aug 17 09:14:35 2009 -0400 @@ -3,8 +3,6 @@ #ifndef _SSH_H #define _SSH_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -22,7 +20,7 @@ * called by a name other than "ssh" or "Secure Shell". */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -118,6 +116,9 @@ /* Minimum modulus size (n) for RSA keys. */ #define SSH_RSA_MINIMUM_MODULUS_SIZE 768 +/* Listen backlog for sshd, ssh-agent and forwarding sockets */ +#define SSH_LISTEN_BACKLOG 128 + /* * Do not display banner when in remote command mode only. Note that RFC 4254 * uses "exec" as a mode name for the channel opened for the execution of the
--- a/usr/src/cmd/ssh/libssh/common/readpass.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/ssh/libssh/common/readpass.c Mon Aug 17 09:14:35 2009 -0400 @@ -1,3 +1,8 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + /* * Copyright (c) 2001 Markus Friedl. All rights reserved. * @@ -25,13 +30,12 @@ #include "includes.h" RCSID("$OpenBSD: readpass.c,v 1.27 2002/03/26 15:58:46 markus Exp $"); -#pragma ident "%Z%%M% %I% %E% SMI" - #include "xmalloc.h" #include "readpass.h" #include "pathnames.h" #include "log.h" #include "ssh.h" +#include <langinfo.h> static char * ssh_askpass(char *askpass, const char *msg) @@ -130,3 +134,30 @@ memset(buf, 'x', sizeof buf); return ret; } + +int +ask_permission(const char *fmt, ...) +{ + va_list args; + char *p, prompt[1024]; + int allowed = 0; + char *yeschar = nl_langinfo(YESSTR); + + va_start(args, fmt); + vsnprintf(prompt, sizeof(prompt), fmt, args); + va_end(args); + + p = read_passphrase(prompt, RP_USE_ASKPASS|RP_ALLOW_EOF); + if (p != NULL) { + /* + * Accept empty responses and responses consisting + * of the word "yes" as affirmative. + */ + if (*p == '\0' || *p == '\n' || + strncasecmp(p, yeschar, 1) == 0) + allowed = 1; + xfree(p); + } + + return (allowed); +}
--- a/usr/src/cmd/ssh/ssh-agent/ssh-agent.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/ssh/ssh-agent/ssh-agent.c Mon Aug 17 09:14:35 2009 -0400 @@ -40,7 +40,7 @@ #include "includes.h" #include "sys-queue.h" -RCSID("$OpenBSD: ssh-agent.c,v 1.105 2002/10/01 20:34:12 markus Exp $"); +RCSID("$OpenBSD: ssh-agent.c,v 1.159 2008/06/28 14:05:15 djm Exp $"); #ifdef HAVE_SOLARIS_PRIVILEGE #include <priv.h> @@ -54,11 +54,12 @@ #include "buffer.h" #include "bufaux.h" #include "xmalloc.h" -#include "getput.h" #include "key.h" #include "authfd.h" #include "compat.h" #include "log.h" +#include "readpass.h" +#include "misc.h" typedef enum { AUTH_UNUSED, @@ -82,6 +83,7 @@ Key *key; char *comment; u_int death; + u_int confirm; } Identity; typedef struct { @@ -96,10 +98,11 @@ /* pid of shell == parent of agent */ pid_t parent_pid = -1; +u_int parent_alive_interval = 0; /* pathname and directory for AUTH_SOCKET */ -char socket_name[1024]; -char socket_dir[1024]; +char socket_name[MAXPATHLEN]; +char socket_dir[MAXPATHLEN]; /* locking */ int locked = 0; @@ -111,6 +114,9 @@ char *__progname; #endif +/* Default lifetime (0 == forever) */ +static int lifetime = 0; + static void close_socket(SocketEntry *e) { @@ -164,6 +170,23 @@ return (NULL); } +/* Check confirmation of keysign request */ +static int +confirm_key(Identity *id) +{ + char *p; + int ret = -1; + + p = key_fingerprint(id->key, SSH_FP_MD5, SSH_FP_HEX); + if (ask_permission( + gettext("Allow use of key %s?\nKey fingerprint %s."), + id->comment, p)) + ret = 0; + xfree(p); + + return (ret); +} + /* send list of supported public keys to 'client' */ static void process_request_identities(SocketEntry *e, int version) @@ -227,7 +250,7 @@ goto failure; id = lookup_identity(key, 1); - if (id != NULL) { + if (id != NULL && (!id->confirm || confirm_key(id) == 0)) { Key *private = id->key; /* Decrypt the challenge using the private key. */ if (rsa_private_decrypt(challenge, challenge, private->rsa) <= 0) @@ -270,6 +293,8 @@ { u_char *blob, *data, *signature = NULL; u_int blen, dlen, slen = 0; + extern uint32_t datafellows; + int odatafellows; int ok = -1, flags; Buffer msg; Key *key; @@ -280,16 +305,17 @@ data = buffer_get_string(&e->request, &dlen); flags = buffer_get_int(&e->request); + odatafellows = datafellows; if (flags & SSH_AGENT_OLD_SIGNATURE) datafellows = SSH_BUG_SIGBLOB; key = key_from_blob(blob, blen); if (key != NULL) { Identity *id = lookup_identity(key, 2); - if (id != NULL) + if (id != NULL && (!id->confirm || confirm_key(id) == 0)) ok = key_sign(id->key, &signature, &slen, data, dlen); + key_free(key); } - key_free(key); buffer_init(&msg); if (ok == 0) { buffer_put_char(&msg, SSH2_AGENT_SIGN_RESPONSE); @@ -305,6 +331,7 @@ xfree(blob); if (signature != NULL) xfree(signature); + datafellows = odatafellows; } /* shared */ @@ -338,7 +365,7 @@ if (id != NULL) { /* * We have this key. Free the old key. Since we - * don\'t want to leave empty slots in the middle of + * don't want to leave empty slots in the middle of * the array, we actually free the key there and move * all the entries between the empty slot and the end * of the array. @@ -381,10 +408,11 @@ buffer_put_char(&e->output, SSH_AGENT_SUCCESS); } -static void +/* removes expired keys and returns number of seconds until the next expiry */ +static u_int reaper(void) { - u_int now = time(NULL); + u_int deadline = 0, now = time(NULL); Identity *id, *nxt; int version; Idtab *tab; @@ -393,20 +421,30 @@ tab = idtab_lookup(version); for (id = TAILQ_FIRST(&tab->idlist); id; id = nxt) { nxt = TAILQ_NEXT(id, next); - if (id->death != 0 && now >= id->death) { + if (id->death == 0) + continue; + if (now >= id->death) { + debug("expiring key '%s'", id->comment); TAILQ_REMOVE(&tab->idlist, id, next); free_identity(id); tab->nentries--; - } + } else + deadline = (deadline == 0) ? id->death : + MIN(deadline, id->death); } } + if (deadline == 0 || deadline <= now) + return 0; + else + return (deadline - now); } static void process_add_identity(SocketEntry *e, int version) { Idtab *tab = idtab_lookup(version); - int type, success = 0, death = 0; + Identity *id; + int type, success = 0, death = 0, confirm = 0; char *type_name, *comment; Key *k = NULL; @@ -457,33 +495,54 @@ } break; } + /* enable blinding */ + switch (k->type) { + case KEY_RSA: + case KEY_RSA1: + if (RSA_blinding_on(k->rsa, NULL) != 1) { + error("process_add_identity: RSA_blinding_on failed"); + key_free(k); + goto send; + } + break; + } comment = buffer_get_string(&e->request, NULL); if (k == NULL) { xfree(comment); goto send; } - success = 1; while (buffer_len(&e->request)) { - switch (buffer_get_char(&e->request)) { + switch ((type = buffer_get_char(&e->request))) { case SSH_AGENT_CONSTRAIN_LIFETIME: death = time(NULL) + buffer_get_int(&e->request); break; + case SSH_AGENT_CONSTRAIN_CONFIRM: + confirm = 1; + break; default: - break; + error("process_add_identity: " + "Unknown constraint type %d", type); + xfree(comment); + key_free(k); + goto send; } } - if (lookup_identity(k, version) == NULL) { - Identity *id = xmalloc(sizeof(Identity)); + success = 1; + if (lifetime && !death) + death = time(NULL) + lifetime; + if ((id = lookup_identity(k, version)) == NULL) { + id = xmalloc(sizeof(Identity)); id->key = k; - id->comment = comment; - id->death = death; TAILQ_INSERT_TAIL(&tab->idlist, id, next); /* Increment the number of identities. */ tab->nentries++; } else { key_free(k); - xfree(comment); + xfree(id->comment); } + id->comment = comment; + id->death = death; + id->confirm = confirm; send: buffer_put_int(&e->output, 1); buffer_put_char(&e->output, @@ -540,13 +599,10 @@ u_int msg_len, type; u_char *cp; - /* kill dead keys */ - reaper(); - if (buffer_len(&e->input) < 5) return; /* Incomplete message. */ cp = buffer_ptr(&e->input); - msg_len = GET_32BIT(cp); + msg_len = get_u32(cp); if (msg_len > 256 * 1024) { close_socket(e); return; @@ -631,10 +687,9 @@ static void new_socket(sock_type type, int fd) { - u_int i, old_alloc; + u_int i, old_alloc, new_alloc; - if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) - error("fcntl O_NONBLOCK: %s", strerror(errno)); + set_nonblock(fd); if (fd > max_fd) max_fd = fd; @@ -642,32 +697,32 @@ for (i = 0; i < sockets_alloc; i++) if (sockets[i].type == AUTH_UNUSED) { sockets[i].fd = fd; - sockets[i].type = type; buffer_init(&sockets[i].input); buffer_init(&sockets[i].output); buffer_init(&sockets[i].request); + sockets[i].type = type; return; } old_alloc = sockets_alloc; - sockets_alloc += 10; - if (sockets) - sockets = xrealloc(sockets, sockets_alloc * sizeof(sockets[0])); - else - sockets = xmalloc(sockets_alloc * sizeof(sockets[0])); - for (i = old_alloc; i < sockets_alloc; i++) + new_alloc = sockets_alloc + 10; + sockets = xrealloc(sockets, new_alloc * sizeof(sockets[0])); + for (i = old_alloc; i < new_alloc; i++) sockets[i].type = AUTH_UNUSED; - sockets[old_alloc].type = type; + sockets_alloc = new_alloc; sockets[old_alloc].fd = fd; buffer_init(&sockets[old_alloc].input); buffer_init(&sockets[old_alloc].output); buffer_init(&sockets[old_alloc].request); + sockets[old_alloc].type = type; } static int -prepare_select(fd_set **fdrp, fd_set **fdwp, int *fdl, int *nallocp) +prepare_select(fd_set **fdrp, fd_set **fdwp, int *fdl, u_int *nallocp, + struct timeval **tvpp) { - u_int i, sz; + u_int i, sz, deadline; int n = 0; + static struct timeval tv; for (i = 0; i < sockets_alloc; i++) { switch (sockets[i].type) { @@ -711,6 +766,17 @@ break; } } + deadline = reaper(); + if (parent_alive_interval != 0) + deadline = (deadline == 0) ? parent_alive_interval : + MIN(deadline, parent_alive_interval); + if (deadline == 0) { + *tvpp = NULL; + } else { + tv.tv_sec = deadline; + tv.tv_usec = 0; + *tvpp = &tv; + } return (1); } @@ -733,7 +799,7 @@ if (FD_ISSET(sockets[i].fd, readset)) { slen = sizeof(sunaddr); sock = accept(sockets[i].fd, - (struct sockaddr *) &sunaddr, &slen); + (struct sockaddr *)&sunaddr, &slen); if (sock < 0) { error("accept from AUTH_SOCKET: %s", strerror(errno)); @@ -763,7 +829,8 @@ buffer_ptr(&sockets[i].output), buffer_len(&sockets[i].output)); if (len == -1 && (errno == EAGAIN || - errno == EINTR)) + errno == EINTR || + errno == EWOULDBLOCK)) continue; break; } while (1); @@ -777,7 +844,8 @@ do { len = read(sockets[i].fd, buf, sizeof(buf)); if (len == -1 && (errno == EAGAIN || - errno == EINTR)) + errno == EINTR || + errno == EWOULDBLOCK)) continue; break; } while (1); @@ -795,7 +863,7 @@ } static void -cleanup_socket(void *p) +cleanup_socket(void) { if (socket_name[0]) unlink(socket_name); @@ -803,32 +871,29 @@ rmdir(socket_dir); } -static void +void cleanup_exit(int i) { - cleanup_socket(NULL); - exit(i); + cleanup_socket(); + _exit(i); } +/*ARGSUSED*/ static void cleanup_handler(int sig) { - cleanup_socket(NULL); + cleanup_socket(); _exit(2); } static void -check_parent_exists(int sig) +check_parent_exists(void) { - int save_errno = errno; - - if (parent_pid != -1 && getppid() != parent_pid) { + if (parent_pid != -1 && kill(parent_pid, 0) < 0) { /* printf("Parent has died - Authentication agent exiting.\n"); */ - cleanup_handler(sig); /* safe */ + cleanup_socket(); + _exit(2); } - signal(SIGALRM, check_parent_exists); - alarm(10); - errno = save_errno; } static void @@ -841,7 +906,8 @@ " -s Generate Bourne shell commands on stdout.\n" " -k Kill the current agent.\n" " -d Debug mode.\n" - " -a socket Bind agent socket to given name.\n"), + " -a socket Bind agent socket to given name.\n" + " -t life Default identity lifetime (seconds).\n"), __progname); exit(1); } @@ -849,7 +915,9 @@ int main(int ac, char **av) { - int sock, c_flag = 0, d_flag = 0, k_flag = 0, s_flag = 0, ch, nalloc; + int c_flag = 0, d_flag = 0, k_flag = 0, s_flag = 0; + int sock, fd, ch, result, saved_errno; + u_int nalloc; char *shell, *pidstr, *agentsocket = NULL; const char *format; fd_set *readsetp = NULL, *writesetp = NULL; @@ -857,30 +925,30 @@ #ifdef HAVE_SETRLIMIT struct rlimit rlim; #endif -#ifdef HAVE_CYGWIN int prev_mask; -#endif extern int optind; extern char *optarg; pid_t pid; char pidstrbuf[1 + 3 * sizeof pid]; + struct timeval *tvp = NULL; #ifdef HAVE_SOLARIS_PRIVILEGE priv_set_t *myprivs; #endif /* HAVE_SOLARIS_PRIVILEGE */ + /* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */ + sanitise_stdfd(); + /* drop */ setegid(getgid()); setgid(getgid()); - (void) g11n_setlocale(LC_ALL, ""); - SSLeay_add_all_algorithms(); __progname = get_progname(av[0]); init_rng(); seed_rng(); - while ((ch = getopt(ac, av, "cdksa:")) != -1) { + while ((ch = getopt(ac, av, "cdksa:t:")) != -1) { switch (ch) { case 'c': if (s_flag) @@ -903,6 +971,12 @@ case 'a': agentsocket = optarg; break; + case 't': + if ((lifetime = convtime(optarg)) == -1) { + fprintf(stderr, gettext("Invalid lifetime\n")); + usage(); + } + break; default: usage(); } @@ -915,7 +989,8 @@ if (ac == 0 && !c_flag && !s_flag) { shell = getenv("SHELL"); - if (shell != NULL && strncmp(shell + strlen(shell) - 3, "csh", 3) == 0) + if (shell != NULL && + strncmp(shell + strlen(shell) - 3, "csh", 3) == 0) c_flag = 1; } if (k_flag) { @@ -929,8 +1004,8 @@ pid = atoi(pidstr); if (pid < 1) { fprintf(stderr, - gettext("%s=\")%s\", which is not a good PID\n"), - SSH_AGENTPID_ENV_NAME, pidstr); + gettext("%s not set, cannot kill agent\n"), + SSH_AGENTPID_ENV_NAME); exit(1); } if (kill(pid, SIGTERM) == -1) { @@ -948,7 +1023,7 @@ if (agentsocket == NULL) { /* Create private directory for agent socket */ - strlcpy(socket_dir, "/tmp/ssh-XXXXXXXX", sizeof socket_dir); + strlcpy(socket_dir, "/tmp/ssh-XXXXXXXXXX", sizeof socket_dir); if (mkdtemp(socket_dir) == NULL) { perror("mkdtemp: private socket dir"); exit(1); @@ -968,25 +1043,21 @@ sock = socket(AF_UNIX, SOCK_STREAM, 0); if (sock < 0) { perror("socket"); + *socket_name = '\0'; /* Don't unlink any existing file */ cleanup_exit(1); } memset(&sunaddr, 0, sizeof(sunaddr)); sunaddr.sun_family = AF_UNIX; strlcpy(sunaddr.sun_path, socket_name, sizeof(sunaddr.sun_path)); -#ifdef HAVE_CYGWIN prev_mask = umask(0177); -#endif - if (bind(sock, (struct sockaddr *) & sunaddr, sizeof(sunaddr)) < 0) { + if (bind(sock, (struct sockaddr *) &sunaddr, sizeof(sunaddr)) < 0) { perror("bind"); -#ifdef HAVE_CYGWIN + *socket_name = '\0'; /* Don't unlink any existing file */ umask(prev_mask); -#endif cleanup_exit(1); } -#ifdef HAVE_CYGWIN umask(prev_mask); -#endif - if (listen(sock, 128) < 0) { + if (listen(sock, SSH_LISTEN_BACKLOG) < 0) { perror("listen"); cleanup_exit(1); } @@ -1019,7 +1090,7 @@ printf(format, SSH_AGENTPID_ENV_NAME, pidstrbuf, SSH_AGENTPID_ENV_NAME); printf("echo "); - printf(gettext("Agent pid %ld;\n"), (long)pid); + printf(gettext("Agent pid %ld;\n"), (long)pid); exit(0); } if (setenv(SSH_AUTHSOCKET_ENV_NAME, socket_name, 1) == -1 || @@ -1062,9 +1133,14 @@ } (void)chdir("/"); - close(0); - close(1); - close(2); + if ((fd = open(_PATH_DEVNULL, O_RDWR, 0)) != -1) { + /* XXX might close listen socket */ + (void)dup2(fd, STDIN_FILENO); + (void)dup2(fd, STDOUT_FILENO); + (void)dup2(fd, STDERR_FILENO); + if (fd > 2) + close(fd); + } #ifdef HAVE_SETRLIMIT /* deny core dumps, since memory contains unencrypted private keys */ @@ -1076,12 +1152,9 @@ #endif skip: - fatal_add_cleanup(cleanup_socket, NULL); new_socket(AUTH_SOCKET, sock); - if (ac > 0) { - signal(SIGALRM, check_parent_exists); - alarm(10); - } + if (ac > 0) + parent_alive_interval = 10; idtab_init(); if (!d_flag) signal(SIGINT, SIG_IGN); @@ -1091,13 +1164,18 @@ nalloc = 0; while (1) { - prepare_select(&readsetp, &writesetp, &max_fd, &nalloc); - if (select(max_fd + 1, readsetp, writesetp, NULL, NULL) < 0) { - if (errno == EINTR) + prepare_select(&readsetp, &writesetp, &max_fd, &nalloc, &tvp); + result = select(max_fd + 1, readsetp, writesetp, NULL, tvp); + saved_errno = errno; + if (parent_alive_interval != 0) + check_parent_exists(); + (void) reaper(); /* remove expired keys */ + if (result < 0) { + if (saved_errno == EINTR) continue; - fatal("select: %s", strerror(errno)); - } - after_select(readsetp, writesetp); + fatal("select: %s", strerror(saved_errno)); + } else if (result > 0) + after_select(readsetp, writesetp); } /* NOTREACHED */ return (0); /* keep lint happy */
--- a/usr/src/cmd/svc/configd/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/svc/configd/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -71,8 +71,7 @@ $(NATIVE_BUILD)CPPFLAGS = $(MYCPPFLAGS) -I$(LIBUUTIL)/common -I$(LIBSCF)/inc $(NATIVE_BUILD)CPPFLAGS += -DNATIVE_BUILD $(NATIVE_BUILD)LDFLAGS = -$(NATIVE_BUILD)LDLIBS = -L$(LIBUUTIL)/native -R $(LIBUUTIL)/native \ - $(MYLDLIBS) -ldoor +$(NATIVE_BUILD)LDLIBS = -L$(LIBUUTIL)/native -R $(LIBUUTIL)/native $(MYLDLIBS) DIRMODE = 0755 FILEMODE = 0555
--- a/usr/src/cmd/svc/startd/graph.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/svc/startd/graph.c Mon Aug 17 09:14:35 2009 -0400 @@ -6076,9 +6076,9 @@ vertex = vertex_get_by_name(inst_name); if (vertex == NULL) { MUTEX_UNLOCK(&dgraph_lock); - startd_free(inst_name, max_scf_fmri_size); log_framework(LOG_DEBUG, "%s: Can't find graph vertex. " "The instance must have been removed.\n", inst_name); + startd_free(inst_name, max_scf_fmri_size); return (0); }
--- a/usr/src/cmd/svc/svccfg/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/svc/svccfg/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -20,7 +20,7 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -105,7 +105,7 @@ $(NATIVE_BUILD)LDLIBS = \ -L$(LIBUUTIL)/native -R $(LIBUUTIL)/native \ -L$(LIBSCF)/native -R $(LIBSCF)/native \ - $(SVCCFG_EXTRA_LIBS) -ldoor + $(SVCCFG_EXTRA_LIBS) svccfg_lex.o svccfg_grammar.o := CCVERBOSE =
--- a/usr/src/cmd/vt/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/vt/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. #
--- a/usr/src/cmd/wusbadm/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/wusbadm/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -39,7 +39,7 @@ CPPFLAGS += -I$(SRC)/uts/common -LDLIBS += -lpkcs11 -lkmf -ldoor -lsysevent -lnvpair -lsecdb +LDLIBS += -lpkcs11 -lkmf -lsysevent -lnvpair -lsecdb .KEEP_STATE:
--- a/usr/src/cmd/zdb/zdb.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/zdb/zdb.c Mon Aug 17 09:14:35 2009 -0400 @@ -724,11 +724,11 @@ (void) printf("Indirect blocks:\n"); - SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os), + SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset), dn->dn_object, dnp->dn_nlevels - 1, 0); for (j = 0; j < dnp->dn_nblkptr; j++) { czb.zb_blkid = j; - (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp, + (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp, &dnp->dn_blkptr[j], &czb); } @@ -1073,7 +1073,7 @@ } if (object == 0) { - dn = os->os->os_meta_dnode; + dn = os->os_meta_dnode; } else { error = dmu_bonus_hold(os, object, FTAG, &db); if (error) @@ -1191,21 +1191,21 @@ if (dds.dds_type == DMU_OST_META) { dds.dds_creation_txg = TXG_INITIAL; - usedobjs = os->os->os_rootbp->blk_fill; - refdbytes = os->os->os_spa->spa_dsl_pool-> + usedobjs = os->os_rootbp->blk_fill; + refdbytes = os->os_spa->spa_dsl_pool-> dp_mos_dir->dd_phys->dd_used_bytes; } else { dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); } - ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill); + ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill); nicenum(refdbytes, numbuf); if (verbosity >= 4) { (void) sprintf(blkbuf + strlen(blkbuf), ", rootbp "); (void) sprintf_blkptr(blkbuf + strlen(blkbuf), - BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp); + BP_SPRINTF_LEN - strlen(blkbuf), os->os_rootbp); } else { blkbuf[0] = '\0'; } @@ -1227,7 +1227,7 @@ if (verbosity < 2) return; - if (os->os->os_rootbp->blk_birth == 0) + if (os->os_rootbp->blk_birth == 0) return; if (zopt_objects != 0) { @@ -1240,8 +1240,8 @@ dump_object(os, 0, verbosity, &print_header); object_count = 0; - if (os->os->os_userused_dnode && - os->os->os_userused_dnode->dn_type != 0) { + if (os->os_userused_dnode && + os->os_userused_dnode->dn_type != 0) { dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header); dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header); } @@ -1398,14 +1398,13 @@ int error; objset_t *os; - error = dmu_objset_open(dsname, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &os); + error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os); if (error) { (void) printf("Could not open %s\n", dsname); return (0); } dump_dir(os); - dmu_objset_close(os); + dmu_objset_disown(os, FTAG); fuid_table_destroy(); return (0); } @@ -2474,8 +2473,8 @@ if (error == 0) { if (strchr(argv[0], '/') != NULL) { - error = dmu_objset_open(argv[0], DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &os); + error = dmu_objset_own(argv[0], DMU_OST_ANY, + B_TRUE, FTAG, &os); } else { error = spa_open(argv[0], &spa, FTAG); } @@ -2499,7 +2498,7 @@ if (os != NULL) { dump_dir(os); - dmu_objset_close(os); + dmu_objset_disown(os, FTAG); } else { dump_zpool(spa); spa_close(spa, FTAG);
--- a/usr/src/cmd/zinject/translate.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/zinject/translate.c Mon Aug 17 09:14:35 2009 -0400 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -162,8 +162,8 @@ */ sync(); - if ((err = dmu_objset_open(dataset, DMU_OST_ZFS, - DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) { + err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); + if (err != 0) { (void) fprintf(stderr, "cannot open dataset '%s': %s\n", dataset, strerror(err)); return (-1); @@ -172,7 +172,7 @@ record->zi_objset = dmu_objset_id(os); record->zi_object = statbuf->st_ino; - dmu_objset_close(os); + dmu_objset_disown(os, FTAG); return (0); } @@ -247,17 +247,17 @@ * Get the dnode associated with object, so we can calculate the block * size. */ - if ((err = dmu_objset_open(dataset, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) { + if ((err = dmu_objset_own(dataset, DMU_OST_ANY, + B_TRUE, FTAG, &os)) != 0) { (void) fprintf(stderr, "cannot open dataset '%s': %s\n", dataset, strerror(err)); goto out; } if (record->zi_object == 0) { - dn = os->os->os_meta_dnode; + dn = os->os_meta_dnode; } else { - err = dnode_hold(os->os, record->zi_object, FTAG, &dn); + err = dnode_hold(os, record->zi_object, FTAG, &dn); if (err != 0) { (void) fprintf(stderr, "failed to hold dnode " "for object %llu\n", @@ -306,11 +306,11 @@ ret = 0; out: if (dn) { - if (dn != os->os->os_meta_dnode) + if (dn != os->os_meta_dnode) dnode_rele(dn, FTAG); } if (os) - dmu_objset_close(os); + dmu_objset_disown(os, FTAG); return (ret); }
--- a/usr/src/cmd/ztest/ztest.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/cmd/ztest/ztest.c Mon Aug 17 09:14:35 2009 -0400 @@ -1372,8 +1372,7 @@ /* * Verify that the dataset contains a directory object. */ - error = dmu_objset_open(name, DMU_OST_OTHER, - DS_MODE_USER | DS_MODE_READONLY, &os); + error = dmu_objset_hold(name, FTAG, &os); ASSERT3U(error, ==, 0); error = dmu_object_info(os, ZTEST_DIROBJ, doi); if (error != ENOENT) { @@ -1382,16 +1381,15 @@ ASSERT3U(doi->doi_type, ==, DMU_OT_UINT64_OTHER); ASSERT3S(doi->doi_physical_blks, >=, 0); } - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); /* * Destroy the dataset. */ error = dmu_objset_destroy(name, B_FALSE); if (error) { - (void) dmu_objset_open(name, DMU_OST_OTHER, - DS_MODE_USER | DS_MODE_READONLY, &os); - fatal(0, "dmu_objset_destroy(os=%p) = %d\n", &os, error); + (void) dmu_objset_hold(name, FTAG, &os); + fatal(0, "dmu_objset_destroy(os=%p) = %d\n", os, error); } return (0); } @@ -1434,7 +1432,6 @@ int error; objset_t *os, *os2; char name[100]; - int basemode, expected_error; zilog_t *zilog; uint64_t seq; uint64_t objects; @@ -1443,19 +1440,15 @@ (void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool, (u_longlong_t)za->za_instance); - basemode = DS_MODE_TYPE(za->za_instance); - if (basemode != DS_MODE_USER && basemode != DS_MODE_OWNER) - basemode = DS_MODE_USER; - /* * If this dataset exists from a previous run, process its replay log * half of the time. If we don't replay it, then dmu_objset_destroy() * (invoked from ztest_destroy_cb() below) should just throw it away. */ if (ztest_random(2) == 0 && - dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) { + dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { zil_replay(os, os, ztest_replay_vector); - dmu_objset_close(os); + dmu_objset_disown(os, FTAG); } /* @@ -1469,7 +1462,7 @@ /* * Verify that the destroyed dataset is no longer in the namespace. */ - error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os); + error = dmu_objset_hold(name, FTAG, &os); if (error != ENOENT) fatal(1, "dmu_objset_open(%s) found destroyed dataset %p", name, os); @@ -1488,7 +1481,7 @@ fatal(0, "dmu_objset_create(%s) = %d", name, error); } - error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os); + error = dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os); if (error) { fatal(0, "dmu_objset_open(%s) = %d", name, error); } @@ -1538,27 +1531,23 @@ fatal(0, "created existing dataset, error = %d", error); /* - * Verify that multiple dataset holds are allowed, but only when - * the new access mode is compatible with the base mode. + * Verify that we can hold an objset that is also owned. */ - if (basemode == DS_MODE_OWNER) { - error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_USER, - &os2); - if (error) - fatal(0, "dmu_objset_open('%s') = %d", name, error); - else - dmu_objset_close(os2); - } - error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os2); - expected_error = (basemode == DS_MODE_OWNER) ? EBUSY : 0; - if (error != expected_error) - fatal(0, "dmu_objset_open('%s') = %d, expected %d", - name, error, expected_error); - if (error == 0) - dmu_objset_close(os2); + error = dmu_objset_hold(name, FTAG, &os2); + if (error) + fatal(0, "dmu_objset_open('%s') = %d", name, error); + dmu_objset_rele(os2, FTAG); + + /* + * Verify that we can not own an objset that is already owned. + */ + error = dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2); + if (error != EBUSY) + fatal(0, "dmu_objset_open('%s') = %d, expected EBUSY", + name, error); zil_close(zilog); - dmu_objset_close(os); + dmu_objset_disown(os, FTAG); error = dmu_objset_destroy(name, B_FALSE); if (error) @@ -1670,13 +1659,12 @@ fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error); } - error = dmu_objset_open(snap1name, DMU_OST_OTHER, - DS_MODE_USER | DS_MODE_READONLY, &clone); + error = dmu_objset_hold(snap1name, FTAG, &clone); if (error) fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error); error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0); - dmu_objset_close(clone); + dmu_objset_rele(clone, FTAG); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_create"); @@ -1705,13 +1693,12 @@ fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); } - error = dmu_objset_open(snap3name, DMU_OST_OTHER, - DS_MODE_USER | DS_MODE_READONLY, &clone); + error = dmu_objset_hold(snap3name, FTAG, &clone); if (error) fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error); error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0); - dmu_objset_close(clone); + dmu_objset_rele(clone, FTAG); if (error) { if (error == ENOSPC) { ztest_record_enospc("dmu_objset_create"); @@ -1720,7 +1707,7 @@ fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); } - error = dsl_dataset_own(snap1name, DS_MODE_READONLY, FTAG, &ds); + error = dsl_dataset_own(snap1name, B_FALSE, FTAG, &ds); if (error) fatal(0, "dsl_dataset_own(%s) = %d", snap1name, error); error = dsl_dataset_promote(clone2name); @@ -3806,8 +3793,7 @@ fatal(0, "dmu_objset_create(%s) = %d", name, error); } - error = dmu_objset_open(name, DMU_OST_OTHER, - DS_MODE_USER, &za[d].za_os); + error = dmu_objset_hold(name, FTAG, &za[d].za_os); if (error) fatal(0, "dmu_objset_open('%s') = %d", name, error); @@ -3827,7 +3813,7 @@ VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0); if (t < zopt_datasets) { zil_close(za[t].za_zilog); - dmu_objset_close(za[t].za_os); + dmu_objset_rele(za[t].za_os, FTAG); } }
--- a/usr/src/common/zfs/zfs_prop.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/common/zfs/zfs_prop.c Mon Aug 17 09:14:35 2009 -0400 @@ -152,6 +152,12 @@ { NULL } }; + static zprop_index_t logbias_table[] = { + { "latency", ZFS_LOGBIAS_LATENCY }, + { "throughput", ZFS_LOGBIAS_THROUGHPUT }, + { NULL } + }; + static zprop_index_t canmount_table[] = { { "off", ZFS_CANMOUNT_OFF }, { "on", ZFS_CANMOUNT_ON }, @@ -196,6 +202,9 @@ ZFS_CACHE_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "all | none | metadata", "SECONDARYCACHE", cache_table); + register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "latency | throughput", "LOGBIAS", logbias_table); /* inherit index (boolean) properties */ register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
--- a/usr/src/lib/cfgadm_plugins/sata/common/cfga_sata.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/lib/cfgadm_plugins/sata/common/cfga_sata.c Mon Aug 17 09:14:35 2009 -0400 @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -59,7 +59,7 @@ NULL, "SATA specific commands:\n", " cfgadm -c [configure|unconfigure|disconnect|connect] ap_id " - "[ap_id...]\n", + "[ap_id...]\n", " cfgadm -x sata_reset_port ap_id [ap_id...]\n", " cfgadm -x sata_reset_device ap_id [ap_id...]\n", " cfgadm -x sata_reset_all ap_id\n", @@ -114,7 +114,7 @@ /* CFGA_SATA_DEVCTL / CFGA_LIB_ERROR -> "Library error" */ { CVT, CFGA_LIB_ERROR, "Internal error: " - "Cannot allocate devctl handle " }, + "Cannot allocate devctl handle " }, /* * CFGA_SATA_DEV_CONFIGURE / @@ -226,15 +226,15 @@ static cfga_sata_ret_t setup_for_devctl_cmd(const char *ap_id, devctl_hdl_t *devctl_hdl, - nvlist_t **user_nvlistp, uint_t oflag); + nvlist_t **user_nvlistp, uint_t oflag); static cfga_sata_ret_t port_state(devctl_hdl_t hdl, nvlist_t *list, - ap_rstate_t *rstate, ap_ostate_t *ostate); + ap_rstate_t *rstate, ap_ostate_t *ostate); static cfga_sata_ret_t do_control_ioctl(const char *ap_id, sata_cfga_apctl_t subcommand, uint_t arg, - void **descrp, size_t *sizep); + void **descrp, size_t *sizep); static void cleanup_after_devctl_cmd(devctl_hdl_t devctl_hdl, nvlist_t *user_nvlist); @@ -245,6 +245,8 @@ static int sata_confirm(struct cfga_confirm *confp, char *msg); +static cfga_sata_ret_t +get_port_num(const char *ap_id, uint32_t *port); /* Utilities */ @@ -399,8 +401,8 @@ } return ((msg_tbl[msg_index].intl) ? - dgettext(TEXT_DOMAIN, msg_tbl[msg_index].msgstr) : - msg_tbl[msg_index].msgstr); + dgettext(TEXT_DOMAIN, msg_tbl[msg_index].msgstr) : + msg_tbl[msg_index].msgstr); } /* @@ -453,10 +455,10 @@ cfga_err_t sata_err_msg( - char **errstring, - cfga_sata_ret_t rv, - const char *ap_id, - int l_errno) + char **errstring, + cfga_sata_ret_t rv, + const char *ap_id, + int l_errno) { if (errstring == NULL) { return (sata_msgs[rv].cfga_err); @@ -488,7 +490,7 @@ /* hardware-specific help needed */ set_msg(errstring, ERR_STR(rv), NULL); set_msg(errstring, "\n", - dgettext(TEXT_DOMAIN, sata_help[HELP_HEADER]), NULL); + dgettext(TEXT_DOMAIN, sata_help[HELP_HEADER]), NULL); set_msg(errstring, sata_help[HELP_RESET_PORT], NULL); set_msg(errstring, sata_help[HELP_RESET_DEVICE], NULL); set_msg(errstring, sata_help[HELP_RESET_ALL], NULL); @@ -559,13 +561,13 @@ /*ARGSUSED*/ cfga_err_t cfga_change_state( - cfga_cmd_t state_change_cmd, - const char *ap_id, - const char *options, - struct cfga_confirm *confp, - struct cfga_msg *msgp, - char **errstring, - cfga_flags_t flags) + cfga_cmd_t state_change_cmd, + const char *ap_id, + const char *options, + struct cfga_confirm *confp, + struct cfga_msg *msgp, + char **errstring, + cfga_flags_t flags) { int ret; int len; @@ -577,6 +579,9 @@ devctl_hdl_t hdl = NULL; cfga_sata_ret_t rv = CFGA_SATA_OK; char *pdyn; + char *str_type; + size_t size; + boolean_t pmult = B_FALSE; /* * All sub-commands which can change state of device require @@ -593,12 +598,30 @@ } if ((rv = setup_for_devctl_cmd(ap_id, &hdl, &nvl, - DC_RDONLY)) != CFGA_SATA_OK) { + DC_RDONLY)) != CFGA_SATA_OK) { goto bailout; } + /* + * Checking device type. A port multiplier is not configurable - it is + * already configured as soon as it is connected. + */ + if ((rv = do_control_ioctl(ap_id, SATA_CFGA_GET_AP_TYPE, NULL, + (void **)&str_type, &size)) != CFGA_SATA_OK) { + /* no such deivce */ + goto bailout; + } + if (strncmp(str_type, "sata-pmult", sizeof ("sata-pmult")) == 0) { + pmult = B_TRUE; + } + switch (state_change_cmd) { case CFGA_CMD_CONFIGURE: + if (pmult == B_TRUE) { + rv = CFGA_SATA_HWOPNOTSUPP; + goto bailout; + } + if ((rv = port_state(hdl, nvl, &rstate, &ostate)) != CFGA_SATA_OK) goto bailout; @@ -647,6 +670,11 @@ break; case CFGA_CMD_UNCONFIGURE: + if (pmult == B_TRUE) { + rv = CFGA_SATA_HWOPNOTSUPP; + goto bailout; + } + if ((rv = port_state(hdl, nvl, &rstate, &ostate)) != CFGA_SATA_OK) goto bailout; @@ -668,13 +696,13 @@ rv = CFGA_SATA_OK; len = strlen(SATA_CONFIRM_DEVICE) + - strlen(SATA_CONFIRM_DEVICE_SUSPEND) + - strlen("Unconfigure") + strlen(ap_id); + strlen(SATA_CONFIRM_DEVICE_SUSPEND) + + strlen("Unconfigure") + strlen(ap_id); if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len + 3, "Unconfigure" - " %s%s\n%s", - SATA_CONFIRM_DEVICE, ap_id, - SATA_CONFIRM_DEVICE_SUSPEND); + " %s%s\n%s", + SATA_CONFIRM_DEVICE, ap_id, + SATA_CONFIRM_DEVICE_SUSPEND); } if (!sata_confirm(confp, msg)) { @@ -687,7 +715,7 @@ devpath = sata_get_devicepath(ap_id); if (devpath == NULL) { (void) printf( - "cfga_change_state: get device path failed\n"); + "cfga_change_state: get device path failed\n"); rv = CFGA_SATA_DEV_UNCONFIGURE; break; } @@ -705,10 +733,10 @@ rv = CFGA_SATA_BUSY; } (void) sata_rcm_online(ap_id, errstring, devpath, - flags); + flags); } else { (void) sata_rcm_remove(ap_id, errstring, devpath, - flags); + flags); } S_FREE(devpath); @@ -733,15 +761,14 @@ rv = CFGA_SATA_OK; /* other statuses don't matter */ - /* * If the port originally with device attached and was * unconfigured already, the devicepath for the sd will be * removed. sata_get_devicepath in this case is not necessary. */ - /* only call rcm_offline if the state was CONFIGURED */ - if (ostate == AP_OSTATE_CONFIGURED) { + if (ostate == AP_OSTATE_CONFIGURED && + pmult == B_FALSE) { devpath = sata_get_devicepath(ap_id); if (devpath == NULL) { (void) printf( @@ -751,14 +778,14 @@ } len = strlen(SATA_CONFIRM_DEVICE) + - strlen(SATA_CONFIRM_DEVICE_SUSPEND) + - strlen("Disconnect") + strlen(ap_id); + strlen(SATA_CONFIRM_DEVICE_SUSPEND) + + strlen("Disconnect") + strlen(ap_id); if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len + 3, - "Disconnect" - " %s%s\n%s", - SATA_CONFIRM_DEVICE, ap_id, - SATA_CONFIRM_DEVICE_SUSPEND); + "Disconnect" + " %s%s\n%s", + SATA_CONFIRM_DEVICE, ap_id, + SATA_CONFIRM_DEVICE_SUSPEND); } if (!sata_confirm(confp, msg)) { free(msg); @@ -780,7 +807,7 @@ if (errno == EBUSY) rv = CFGA_SATA_BUSY; (void) sata_rcm_online(ap_id, errstring, - devpath, flags); + devpath, flags); S_FREE(devpath); /* @@ -796,20 +823,20 @@ (void) printf("%s\n", ERR_STR(CFGA_SATA_DEVICE_UNCONFIGURED)); (void) sata_rcm_remove(ap_id, errstring, - devpath, flags); + devpath, flags); } S_FREE(devpath); } else if (rstate == AP_RSTATE_CONNECTED || rstate == AP_RSTATE_EMPTY) { len = strlen(SATA_CONFIRM_PORT) + - strlen(SATA_CONFIRM_PORT_DISABLE) + - strlen("Deactivate Port") + strlen(ap_id); + strlen(SATA_CONFIRM_PORT_DISABLE) + + strlen("Deactivate Port") + strlen(ap_id); if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len +3, - "Disconnect" - " %s%s\n%s", - SATA_CONFIRM_PORT, ap_id, - SATA_CONFIRM_PORT_DISABLE); + "Disconnect" + " %s%s\n%s", + SATA_CONFIRM_PORT, ap_id, + SATA_CONFIRM_PORT_DISABLE); } if (!sata_confirm(confp, msg)) { free(msg); @@ -837,13 +864,13 @@ } len = strlen(SATA_CONFIRM_PORT) + - strlen(SATA_CONFIRM_PORT_ENABLE) + - strlen("Activate Port") + strlen(ap_id); + strlen(SATA_CONFIRM_PORT_ENABLE) + + strlen("Activate Port") + strlen(ap_id); if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len +3, "Activate" - " %s%s\n%s", - SATA_CONFIRM_PORT, ap_id, - SATA_CONFIRM_PORT_ENABLE); + " %s%s\n%s", + SATA_CONFIRM_PORT, ap_id, + SATA_CONFIRM_PORT_ENABLE); } if (!sata_confirm(confp, msg)) { rv = CFGA_SATA_NACK; @@ -886,13 +913,13 @@ /* cfgadm entry point */ cfga_err_t cfga_private_func( - const char *func, - const char *ap_id, - const char *options, - struct cfga_confirm *confp, - struct cfga_msg *msgp, - char **errstring, - cfga_flags_t flags) + const char *func, + const char *ap_id, + const char *options, + struct cfga_confirm *confp, + struct cfga_msg *msgp, + char **errstring, + cfga_flags_t flags) { int len; char *msg; @@ -943,9 +970,9 @@ if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len +3, "Reset" - " %s%s\n%s", - SATA_CONFIRM_PORT, ap_id, - SATA_CONFIRM_DEVICE_ABORT); + " %s%s\n%s", + SATA_CONFIRM_PORT, ap_id, + SATA_CONFIRM_DEVICE_ABORT); } else { rv = CFGA_SATA_NACK; goto bailout; @@ -957,7 +984,7 @@ } rv = do_control_ioctl(ap_id, SATA_CFGA_RESET_PORT, NULL, - (void **)&str_p, &size); + (void **)&str_p, &size); } else if (strcmp(func, SATA_RESET_DEVICE) == 0) { if ((rv = port_state(hdl, list, &rstate, &ostate)) != @@ -977,9 +1004,9 @@ if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len +3, "Reset" - " %s%s\n%s", - SATA_CONFIRM_DEVICE, ap_id, - SATA_CONFIRM_DEVICE_ABORT); + " %s%s\n%s", + SATA_CONFIRM_DEVICE, ap_id, + SATA_CONFIRM_DEVICE_ABORT); } else { rv = CFGA_SATA_NACK; goto bailout; @@ -1000,9 +1027,9 @@ if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len +3, "Reset" - " %s%s\n%s", - SATA_CONFIRM_CONTROLLER, ap_id, - SATA_CONFIRM_CONTROLLER_ABORT); + " %s%s\n%s", + SATA_CONFIRM_CONTROLLER, ap_id, + SATA_CONFIRM_CONTROLLER_ABORT); } else { rv = CFGA_SATA_NACK; goto bailout; @@ -1013,7 +1040,7 @@ goto bailout; } rv = do_control_ioctl(ap_id, SATA_CFGA_RESET_ALL, NULL, - (void **)&str_p, &size); + (void **)&str_p, &size); } else if (strcmp(func, SATA_PORT_DEACTIVATE) == 0) { len = strlen(SATA_CONFIRM_PORT) + @@ -1022,9 +1049,9 @@ if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len +3, "Deactivate" - " %s%s\n%s", - SATA_CONFIRM_PORT, ap_id, - SATA_CONFIRM_PORT_DISABLE); + " %s%s\n%s", + SATA_CONFIRM_PORT, ap_id, + SATA_CONFIRM_PORT_DISABLE); } else { rv = CFGA_SATA_NACK; goto bailout; @@ -1035,7 +1062,7 @@ } rv = do_control_ioctl(ap_id, SATA_CFGA_PORT_DEACTIVATE, NULL, - (void **)&str_p, &size); + (void **)&str_p, &size); } else if (strcmp(func, SATA_PORT_ACTIVATE) == 0) { len = strlen(SATA_CONFIRM_PORT) + @@ -1044,9 +1071,9 @@ if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len +3, "Activate" - " %s%s\n%s", - SATA_CONFIRM_PORT, ap_id, - SATA_CONFIRM_PORT_ENABLE); + " %s%s\n%s", + SATA_CONFIRM_PORT, ap_id, + SATA_CONFIRM_PORT_ENABLE); } else { rv = CFGA_SATA_NACK; goto bailout; @@ -1058,7 +1085,7 @@ rv = do_control_ioctl(ap_id, SATA_CFGA_PORT_ACTIVATE, NULL, (void **)&str_p, &size); - goto bailout; + goto bailout; } else if (strcmp(func, SATA_PORT_SELF_TEST) == 0) { len = strlen(SATA_CONFIRM_PORT) + @@ -1067,9 +1094,9 @@ if ((msg = (char *)calloc(len +3, 1)) != NULL) { (void) snprintf(msg, len +3, "Self Test" - " %s%s\n%s", - SATA_CONFIRM_PORT, ap_id, - SATA_CONFIRM_DEVICE_SUSPEND); + " %s%s\n%s", + SATA_CONFIRM_PORT, ap_id, + SATA_CONFIRM_DEVICE_SUSPEND); } else { rv = CFGA_SATA_NACK; goto bailout; @@ -1080,7 +1107,7 @@ } rv = do_control_ioctl(ap_id, SATA_CFGA_PORT_SELF_TEST, - NULL, (void **)&str_p, &size); + NULL, (void **)&str_p, &size); } else { /* Unrecognized operation request */ rv = CFGA_SATA_HWOPNOTSUPP; @@ -1097,11 +1124,11 @@ /*ARGSUSED*/ cfga_err_t cfga_test( - const char *ap_id, - const char *options, - struct cfga_msg *msgp, - char **errstring, - cfga_flags_t flags) + const char *ap_id, + const char *options, + struct cfga_msg *msgp, + char **errstring, + cfga_flags_t flags) { /* Should call ioctl for self test - phase 2 */ return (CFGA_OPNOTSUPP); @@ -1382,13 +1409,13 @@ /*ARGSUSED*/ cfga_err_t cfga_list_ext( - const char *ap_id, - cfga_list_data_t **ap_id_list, - int *nlistp, - const char *options, - const char *listopts, - char **errstring, - cfga_flags_t flags) + const char *ap_id, + cfga_list_data_t **ap_id_list, + int *nlistp, + const char *options, + const char *listopts, + char **errstring, + cfga_flags_t flags) { int l_errno; char *ap_id_log = NULL; @@ -1398,6 +1425,8 @@ cfga_sata_ret_t rv = CFGA_SATA_OK; devctl_ap_state_t devctl_ap_state; char *pdyn; + boolean_t pmult = B_FALSE; + uint32_t port; if ((rv = verify_params(ap_id, options, errstring)) != CFGA_SATA_OK) { @@ -1472,61 +1501,61 @@ sizeof ((*ap_id_list)->ap_phys_id)); switch (devctl_ap_state.ap_rstate) { - case AP_RSTATE_EMPTY: - (*ap_id_list)->ap_r_state = CFGA_STAT_EMPTY; - break; + case AP_RSTATE_EMPTY: + (*ap_id_list)->ap_r_state = CFGA_STAT_EMPTY; + break; - case AP_RSTATE_DISCONNECTED: - (*ap_id_list)->ap_r_state = CFGA_STAT_DISCONNECTED; - break; + case AP_RSTATE_DISCONNECTED: + (*ap_id_list)->ap_r_state = CFGA_STAT_DISCONNECTED; + break; - case AP_RSTATE_CONNECTED: - (*ap_id_list)->ap_r_state = CFGA_STAT_CONNECTED; - break; + case AP_RSTATE_CONNECTED: + (*ap_id_list)->ap_r_state = CFGA_STAT_CONNECTED; + break; - default: - rv = CFGA_SATA_STATE; - goto bailout; + default: + rv = CFGA_SATA_STATE; + goto bailout; } switch (devctl_ap_state.ap_ostate) { - case AP_OSTATE_CONFIGURED: - (*ap_id_list)->ap_o_state = CFGA_STAT_CONFIGURED; - break; + case AP_OSTATE_CONFIGURED: + (*ap_id_list)->ap_o_state = CFGA_STAT_CONFIGURED; + break; - case AP_OSTATE_UNCONFIGURED: - (*ap_id_list)->ap_o_state = CFGA_STAT_UNCONFIGURED; - break; + case AP_OSTATE_UNCONFIGURED: + (*ap_id_list)->ap_o_state = CFGA_STAT_UNCONFIGURED; + break; - default: - rv = CFGA_SATA_STATE; - goto bailout; + default: + rv = CFGA_SATA_STATE; + goto bailout; } switch (devctl_ap_state.ap_condition) { - case AP_COND_OK: - (*ap_id_list)->ap_cond = CFGA_COND_OK; - break; + case AP_COND_OK: + (*ap_id_list)->ap_cond = CFGA_COND_OK; + break; - case AP_COND_FAILING: - (*ap_id_list)->ap_cond = CFGA_COND_FAILING; - break; + case AP_COND_FAILING: + (*ap_id_list)->ap_cond = CFGA_COND_FAILING; + break; - case AP_COND_FAILED: - (*ap_id_list)->ap_cond = CFGA_COND_FAILED; - break; + case AP_COND_FAILED: + (*ap_id_list)->ap_cond = CFGA_COND_FAILED; + break; - case AP_COND_UNUSABLE: - (*ap_id_list)->ap_cond = CFGA_COND_UNUSABLE; - break; + case AP_COND_UNUSABLE: + (*ap_id_list)->ap_cond = CFGA_COND_UNUSABLE; + break; - case AP_COND_UNKNOWN: - (*ap_id_list)->ap_cond = CFGA_COND_UNKNOWN; - break; + case AP_COND_UNKNOWN: + (*ap_id_list)->ap_cond = CFGA_COND_UNKNOWN; + break; - default: - rv = CFGA_SATA_STATE; - goto bailout; + default: + rv = CFGA_SATA_STATE; + goto bailout; } (*ap_id_list)->ap_class[0] = '\0'; /* Filled by libcfgadm */ @@ -1545,7 +1574,7 @@ if ((rv = do_control_ioctl(ap_id, SATA_CFGA_GET_MODEL_INFO, NULL, (void **)&str_p, &size)) != CFGA_SATA_OK) { (void) printf( - "SATA_CFGA_GET_MODULE_INFO ioctl failed\n"); + "SATA_CFGA_GET_MODULE_INFO ioctl failed\n"); goto bailout; } /* drop leading and trailing spaces */ @@ -1558,9 +1587,9 @@ } (void) strlcpy((*ap_id_list)->ap_info, "Mod: ", - sizeof ((*ap_id_list)->ap_info)); + sizeof ((*ap_id_list)->ap_info)); (void) strlcat((*ap_id_list)->ap_info, str_p + skip, - sizeof ((*ap_id_list)->ap_info)); + sizeof ((*ap_id_list)->ap_info)); free(str_p); @@ -1584,9 +1613,9 @@ break; } (void) strlcat((*ap_id_list)->ap_info, " FRev: ", - sizeof ((*ap_id_list)->ap_info)); + sizeof ((*ap_id_list)->ap_info)); (void) strlcat((*ap_id_list)->ap_info, str_p + skip, - sizeof ((*ap_id_list)->ap_info)); + sizeof ((*ap_id_list)->ap_info)); free(str_p); @@ -1611,9 +1640,9 @@ break; } (void) strlcat((*ap_id_list)->ap_info, " SN: ", - sizeof ((*ap_id_list)->ap_info)); + sizeof ((*ap_id_list)->ap_info)); (void) strlcat((*ap_id_list)->ap_info, str_p + skip, - sizeof ((*ap_id_list)->ap_info)); + sizeof ((*ap_id_list)->ap_info)); free(str_p); @@ -1624,16 +1653,25 @@ if ((rv = do_control_ioctl(ap_id, SATA_CFGA_GET_AP_TYPE, NULL, (void **)&str_p, &size)) != CFGA_SATA_OK) { (void) printf( - "SATA_CFGA_GET_AP_TYPE ioctl failed\n"); + "SATA_CFGA_GET_AP_TYPE ioctl failed\n"); goto bailout; } (void) strlcpy((*ap_id_list)->ap_type, str_p, - sizeof ((*ap_id_list)->ap_type)); + sizeof ((*ap_id_list)->ap_type)); free(str_p); - if ((*ap_id_list)->ap_o_state == CFGA_STAT_CONFIGURED) { + /* + * Checking device type. Port multiplier has no dynamic + * suffix. + */ + if (strncmp((*ap_id_list)->ap_type, "sata-pmult", + sizeof ("sata-pmult")) == 0) + pmult = B_TRUE; + + if ((*ap_id_list)->ap_o_state == CFGA_STAT_CONFIGURED && + pmult == B_FALSE) { char *dyncomp = NULL; @@ -1647,18 +1685,27 @@ goto bailout; if (dyncomp != NULL) { (void) strcat((*ap_id_list)->ap_log_id, - DYN_SEP); + DYN_SEP); (void) strlcat((*ap_id_list)->ap_log_id, - dyncomp, - sizeof ((*ap_id_list)->ap_log_id)); + dyncomp, + sizeof ((*ap_id_list)->ap_log_id)); free(dyncomp); } } } else { - /* Change it when port multiplier is supported */ - (void) strlcpy((*ap_id_list)->ap_type, "sata-port", - sizeof ((*ap_id_list)->ap_type)); + /* This is an empty port */ + if (get_port_num(ap_id, &port) != SATA_CFGA_OK) { + goto bailout; + } + + if (port & SATA_CFGA_PMPORT_QUAL) { + (void) strlcpy((*ap_id_list)->ap_type, "pmult-port", + sizeof ((*ap_id_list)->ap_type)); + } else { + (void) strlcpy((*ap_id_list)->ap_type, "sata-port", + sizeof ((*ap_id_list)->ap_type)); + } } return (sata_err_msg(errstring, rv, ap_id, errno)); @@ -1694,8 +1741,7 @@ } if ((q = (char *)calloc(len + 1, 1)) == NULL) { - perror("cfga_msg" -); + perror("cfga_msg"); return; } @@ -1766,9 +1812,9 @@ */ static cfga_sata_ret_t verify_params( - const char *ap_id, - const char *options, - char **errstring) + const char *ap_id, + const char *options, + char **errstring) { char *pdyn, *lap_id; int rv; @@ -1802,29 +1848,35 @@ /* * Takes a validated ap_id and extracts the port number. - * For now, we do not support port multiplier port . + * Port multiplier is supported now. */ static cfga_sata_ret_t get_port_num(const char *ap_id, uint32_t *port) { - char *port_nbr_str; - char *temp; - uint32_t cport; - uint32_t pmport = 0; /* port multiplier not supported yet */ - int pmport_qual = 0; /* port multiplier not supported yet */ + uint32_t cport, pmport = 0, qual = 0; + char *cport_str, *pmport_str; - port_nbr_str = strrchr(ap_id, (int)*MINOR_SEP) + strlen(MINOR_SEP); - if ((temp = strrchr(ap_id, (int)*PORT_SEPARATOR)) != 0) { - port_nbr_str = temp + strlen(PORT_SEPARATOR); + /* Get the cport number */ + cport_str = strrchr(ap_id, (int)*MINOR_SEP) + strlen(MINOR_SEP); + + errno = 0; + cport = strtol(cport_str, NULL, 10); + if ((cport & ~SATA_CFGA_CPORT_MASK) != 0 || errno != 0) { + return (CFGA_SATA_PORT); } + /* Get pmport number if there is a PORT_SEPARATOR */ errno = 0; - cport = strtol(port_nbr_str, NULL, 10); - if ((cport & ~SATA_CFGA_CPORT_MASK) != 0 || errno != 0) - return (CFGA_SATA_PORT); + if ((pmport_str = strrchr(ap_id, (int)*PORT_SEPARATOR)) != 0) { + pmport_str += strlen(PORT_SEPARATOR); + pmport = strtol(pmport_str, NULL, 10); + qual = SATA_CFGA_PMPORT_QUAL; + if ((pmport & ~SATA_CFGA_PMPORT_MASK) != 0 || errno != 0) { + return (CFGA_SATA_PORT); + } + } - *port = cport + (pmport << SATA_CFGA_PMPORT_SHIFT) + pmport_qual; - + *port = cport | (pmport << SATA_CFGA_PMPORT_SHIFT) | qual; return (CFGA_SATA_OK); } @@ -1844,10 +1896,10 @@ static cfga_sata_ret_t setup_for_devctl_cmd( - const char *ap_id, - devctl_hdl_t *devctl_hdl, - nvlist_t **user_nvlistp, - uint_t oflag) + const char *ap_id, + devctl_hdl_t *devctl_hdl, + nvlist_t **user_nvlistp, + uint_t oflag) { uint_t port; @@ -1887,8 +1939,8 @@ */ if ((rv = get_port_num(lap_id, &port)) != CFGA_SATA_OK) { (void) printf( - "setup_for_devctl_cmd: get_port_num, errno: %d\n", - errno); + "setup_for_devctl_cmd: get_port_num, errno: %d\n", + errno); goto bailout; } @@ -1912,7 +1964,7 @@ static cfga_sata_ret_t port_state(devctl_hdl_t hdl, nvlist_t *list, - ap_rstate_t *rstate, ap_ostate_t *ostate) + ap_rstate_t *rstate, ap_ostate_t *ostate) { devctl_ap_state_t devctl_ap_state; @@ -1936,7 +1988,7 @@ */ cfga_sata_ret_t do_control_ioctl(const char *ap_id, sata_cfga_apctl_t subcommand, uint_t arg, - void **descrp, size_t *sizep) + void **descrp, size_t *sizep) { int fd = -1; uint_t port; @@ -1954,7 +2006,7 @@ if ((fd = open(ap_id, O_RDONLY)) == -1) { (void) printf("do_control_ioctl: open failed: errno:%d\n", - errno); + errno); rv = CFGA_SATA_OPEN; if (errno == EBUSY) { rv = CFGA_SATA_BUSY;
--- a/usr/src/lib/krb5/plugins/kdb/db2/kdb_db2.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/lib/krb5/plugins/kdb/db2/kdb_db2.c Mon Aug 17 09:14:35 2009 -0400 @@ -1,5 +1,5 @@ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -569,14 +569,14 @@ /* Solaris Kerberos: Better error logging */ snprintf(errbuf, sizeof(errbuf), gettext("Failed to modify " "access and modification times for \"%s\": "), - db_ctx->db_lf_file); + db_ctx->db_lf_name); krb5_db2_prepend_err_str(context, errbuf, retval, retval); } } else { retval = errno; /* Solaris Kerberos: Better error logging */ snprintf(errbuf, sizeof(errbuf), gettext("Failed to stat \"%s\": "), - db_ctx->db_lf_file); + db_ctx->db_lf_name); krb5_db2_prepend_err_str(context, errbuf, retval, retval); } if (!retval) { @@ -586,7 +586,7 @@ retval = errno; /* Solaris Kerberos: Better error logging */ snprintf(errbuf, sizeof(errbuf), gettext("Failed to stat \"%s\": "), - db_ctx->db_lf_file); + db_ctx->db_lf_name); krb5_db2_prepend_err_str(context, errbuf, retval, retval); } } @@ -648,7 +648,7 @@ /* Solaris Kerberos: Better error logging */ snprintf(errbuf, sizeof(errbuf), gettext("Failed to exclusively lock \"%s\": "), - db_ctx->db_lf_file); + db_ctx->db_lf_name); krb5_db2_prepend_err_str(context, errbuf, EBADF, EBADF); return KRB5_KDB_CANTLOCK_DB; @@ -660,7 +660,7 @@ /* Solaris Kerberos: Better error logging */ snprintf(errbuf, sizeof(errbuf), gettext("Failed to lock \"%s\": "), - db_ctx->db_lf_file); + db_ctx->db_lf_name); krb5_db2_prepend_err_str(context, errbuf, retval, retval); }
--- a/usr/src/lib/libnwam/Makefile.com Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/lib/libnwam/Makefile.com Mon Aug 17 09:14:35 2009 -0400 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -38,7 +38,7 @@ $(LINTLIB):= SRCS = $(SRCDIR)/$(LINTSRC) -LDLIBS += -ldoor -ldladm -lc +LDLIBS += -ldladm -lc CFLAGS += $(CCVERBOSE) CPPFLAGS += -I$(SRCDIR) -I$(NWAMDIR)
--- a/usr/src/lib/libscf/Makefile.com Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/lib/libscf/Makefile.com Mon Aug 17 09:14:35 2009 -0400 @@ -61,13 +61,13 @@ # of libuutil. # LIBUUTIL = $(SRC)/lib/libuutil -s390_NCPPFL = -_gcc=-nostdinc -I$(ROOT)/usr/include -MY_NATIVE_CPPFLAGS =\ - -DNATIVE_BUILD $(DTEXTDOM) \ - $($(MACH)_NCPPFL) \ +MY_NATIVE_CPPFLAGS_s390 = -_gcc=-nostdinc -I$(ROOT)/usr/include +MY_NATIVE_CPPFLAGS = \ + -DNATIVE_BUILD $(DTEXTDOM) \ + $(MY_NATIVE_CPPFLAGS_$(MACH)) \ -I../inc -I$(COMDIR) -I$(LIBUUTIL)/common -I$(ROOTHDRDIR) -s390_NLDLIBS = -L$(ROOT)/usr/lib -MY_NATIVE_LDLIBS = $($(MACH)_NLDLIBS) \ +MY_NATIVE_LDDIR_s390 = -L$(ROOT)/usr/lib +MY_NATIVE_LDLIBS = $(MY_NATIVE_LDDIR_$(MACH)) \ -L$(LIBUUTIL)/native -R$(LIBUUTIL)/native -luutil -ldoor -lc -lgen MY_NATIVE_LDLIBS_i386 = -lsmbios MY_NATIVE_LDLIBS += $(MY_NATIVE_LDLIBS_$(MACH))
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c Mon Aug 17 09:14:35 2009 -0400 @@ -401,70 +401,8 @@ static int make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) { - char *logstr; - libzfs_handle_t *hdl = zhp->zfs_hdl; - - /* - * Preserve history log string. - * any changes performed here will be - * logged as an internal event. - */ - logstr = zhp->zfs_hdl->libzfs_log_str; - zhp->zfs_hdl->libzfs_log_str = NULL; - -top: - if (put_stats_zhdl(zhp, zc) != 0) { - zhp->zfs_hdl->libzfs_log_str = logstr; + if (put_stats_zhdl(zhp, zc) != 0) return (-1); - } - - - if (zhp->zfs_dmustats.dds_inconsistent) { - zfs_cmd_t zc2 = { 0 }; - - /* - * If it is dds_inconsistent, then we've caught it in - * the middle of a 'zfs receive' or 'zfs destroy', and - * it is inconsistent from the ZPL's point of view, so - * can't be mounted. However, it could also be that we - * have crashed in the middle of one of those - * operations, in which case we need to get rid of the - * inconsistent state. We do that by either rolling - * back to the previous snapshot (which will fail if - * there is none), or destroying the filesystem. Note - * that if we are still in the middle of an active - * 'receive' or 'destroy', then the rollback and destroy - * will fail with EBUSY and we will drive on as usual. - */ - - (void) strlcpy(zc2.zc_name, zhp->zfs_name, - sizeof (zc2.zc_name)); - - if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) { - (void) zvol_remove_link(hdl, zhp->zfs_name); - zc2.zc_objset_type = DMU_OST_ZVOL; - } else { - zc2.zc_objset_type = DMU_OST_ZFS; - } - - /* - * If we can successfully destroy it, pretend that it - * never existed. - */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc2) == 0) { - zhp->zfs_hdl->libzfs_log_str = logstr; - errno = ENOENT; - return (-1); - } - /* If we can successfully roll it back, reset the stats */ - if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc2) == 0) { - if (get_stats_ioctl(zhp, zc) != 0) { - zhp->zfs_hdl->libzfs_log_str = logstr; - return (-1); - } - goto top; - } - } /* * We've managed to open the dataset and gather statistics. Determine @@ -486,7 +424,6 @@ else abort(); /* we should never see any other types */ - zhp->zfs_hdl->libzfs_log_str = logstr; zhp->zpool_hdl = zpool_handle(zhp); return (0); }
--- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c Mon Aug 17 09:14:35 2009 -0400 @@ -1139,6 +1139,7 @@ char newname[ZFS_MAXNAMELEN]; int error; boolean_t needagain, progress; + char *s1, *s2; VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap)); VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap)); @@ -1333,11 +1334,13 @@ VERIFY(0 == nvlist_lookup_uint64(stream_nvfs, "parentfromsnap", &stream_parent_fromsnap_guid)); + s1 = strrchr(fsname, '/'); + s2 = strrchr(stream_fsname, '/'); + /* check for rename */ if ((stream_parent_fromsnap_guid != 0 && stream_parent_fromsnap_guid != parent_fromsnap_guid) || - strcmp(strrchr(fsname, '/'), - strrchr(stream_fsname, '/')) != 0) { + ((s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) { nvlist_t *parent; char tryname[ZFS_MAXNAMELEN];
--- a/usr/src/pkgdefs/SUNWigb/postinstall Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/pkgdefs/SUNWigb/postinstall Mon Aug 17 09:14:35 2009 -0400 @@ -133,5 +133,7 @@ "pciex8086,10d6" "pciex8086,10e6" "pciex8086,10e7" - "pciex8086,10e8"' \ + "pciex8086,10e8" + "pciex8086,150a" + "pciex8086,150d"' \ -b "$BASEDIR" igb
--- a/usr/src/pkgdefs/SUNWixgbe/postinstall Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/pkgdefs/SUNWixgbe/postinstall Mon Aug 17 09:14:35 2009 -0400 @@ -141,6 +141,7 @@ "pciex8086,10f7" "pciex8086,10f9" "pciex8086,10fb" + "pciex8086,10fc" "pciex8086,1507" "pciex8086,1508" "pciex8086,1514"' \
--- a/usr/src/tools/SUNWonbld/prototype_com Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/SUNWonbld/prototype_com Mon Aug 17 09:14:35 2009 -0400 @@ -182,6 +182,7 @@ f none opt/onbld/man/man1/codereview.1 644 root bin f none opt/onbld/man/man1/cstyle.1 644 root bin f none opt/onbld/man/man1/cw.1 644 root bin +f none opt/onbld/man/man1/findunref.1 644 root bin f none opt/onbld/man/man1/flg.flp.1 644 root bin f none opt/onbld/man/man1/get_depend_info.1 644 root bin f none opt/onbld/man/man1/intf_check.1 644 root bin
--- a/usr/src/tools/findunref/Makefile Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/findunref/Makefile Mon Aug 17 09:14:35 2009 -0400 @@ -20,18 +20,19 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# -PROG = findunref +PROG = findunref +MAN1FILES = findunref.1 CFLAGS += $(CCVERBOSE) LINTFLAGS += -ux include ../Makefile.tools +$(ROOTONBLDMAN1FILES) := FILEMODE= 644 + EXCEPTION_SRC= $(SCM_TYPE:none=) common open $(CLOSED_BUILD)EXCEPTION_SRC += closed EXCEPTION_LISTS= $(EXCEPTION_SRC:%=exception_list.%) @@ -42,7 +43,7 @@ all: $(PROG) exception_list -install: all .WAIT $(ROOTONBLDMACHPROG) +install: all .WAIT $(ROOTONBLDMACHPROG) $(ROOTONBLDMAN1FILES) lint: lint_PROG
--- a/usr/src/tools/findunref/exception_list.closed Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/findunref/exception_list.closed Mon Aug 17 09:14:35 2009 -0400 @@ -20,11 +20,9 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# # # closed-tree exception list @@ -36,19 +34,19 @@ # Ignore internal packages, scripts, and tools that are intentionally not # built or used during a nightly. # -./closed/cmd/zic/makefile.tzpkg -./closed/cmd/zic/tzpkg.awk +./usr/closed/cmd/zic/makefile.tzpkg +./usr/closed/cmd/zic/tzpkg.awk # # Ignore files that get used during a EXPORT_SRC or CRYPT_SRC build only. # -./closed/uts/sun4v/io/n2cp/Makefile -./closed/uts/sun4v/io/ncp/Makefile +./usr/closed/uts/sun4v/io/n2cp/Makefile +./usr/closed/uts/sun4v/io/ncp/Makefile # # Ignore files that are only used for warlock # -./closed/uts/sparc/marvell88sx/Makefile +./usr/closed/uts/sparc/marvell88sx/Makefile # # An unfortunate artifact of the bridged, split gate is that closed-source @@ -59,4 +57,4 @@ # won't exist, so we need the ISUSED directive here. # # ISUSED - let checkpaths know that the next entry is good. -./closed/deleted_files +./usr/closed/deleted_files
--- a/usr/src/tools/findunref/exception_list.open Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/findunref/exception_list.open Mon Aug 17 09:14:35 2009 -0400 @@ -34,87 +34,87 @@ # Ignore oddly-named text files scattered about -- someday these should be # suffixed with .txt so we don't have to list them. # -./src/cmd/oawk/EXPLAIN -./src/cmd/rpcsvc/nis/rpc.nisd/resolv_server/DNS_FWD -./src/cmd/vi/port/ex.news -./src/cmd/ssh/doc +./usr/src/cmd/oawk/EXPLAIN +./usr/src/cmd/rpcsvc/nis/rpc.nisd/resolv_server/DNS_FWD +./usr/src/cmd/vi/port/ex.news +./usr/src/cmd/ssh/doc # # Ignore everything under trees that may be resynched from outside ON. # -./src/cmd/perl -./src/cmd/sqlite -./src/lib/libsqlite -./src/cmd/tcpd -./src/common/openssl -./src/grub -./src/uts/intel/sys/acpi +./usr/src/cmd/perl +./usr/src/cmd/sqlite +./usr/src/lib/libsqlite +./usr/src/cmd/tcpd +./usr/src/common/openssl +./usr/src/grub +./usr/src/uts/intel/sys/acpi # # Ignore ksh93/ast-related files that are only used to resync our build # configuration with upstream. # -./src/lib/libast/*/src/lib/libast/FEATURE -./src/lib/libast/common/comp/conf.* -./src/lib/libast/common/features -./src/lib/libast/common/include/ast_windows.h -./src/lib/libast/common/port/lc.tab -./src/lib/libast/common/port/lcgen.c -./src/lib/libcmd/*/src/lib/libcmd/FEATURE -./src/lib/libcmd/common/features -./src/lib/libdll/*/src/lib/libdll/FEATURE -./src/lib/libdll/common/features -./src/lib/libpp/*/pp.* -./src/lib/libpp/common/gentab.sh -./src/lib/libpp/common/ppsym.c -./src/lib/libpp/i386/ppdebug.h -./src/lib/libpp/sparc/ppdebug.h -./src/lib/libshell/*/src/cmd/ksh93/FEATURE -./src/lib/libshell/common/data/math.tab -./src/lib/libshell/common/features -./src/lib/libshell/misc/buildksh93.sh -./src/lib/libshell/misc/buildksh93.readme +./usr/src/lib/libast/*/src/lib/libast/FEATURE +./usr/src/lib/libast/common/comp/conf.* +./usr/src/lib/libast/common/features +./usr/src/lib/libast/common/include/ast_windows.h +./usr/src/lib/libast/common/port/lc.tab +./usr/src/lib/libast/common/port/lcgen.c +./usr/src/lib/libcmd/*/src/lib/libcmd/FEATURE +./usr/src/lib/libcmd/common/features +./usr/src/lib/libdll/*/src/lib/libdll/FEATURE +./usr/src/lib/libdll/common/features +./usr/src/lib/libpp/*/pp.* +./usr/src/lib/libpp/common/gentab.sh +./usr/src/lib/libpp/common/ppsym.c +./usr/src/lib/libpp/i386/ppdebug.h +./usr/src/lib/libpp/sparc/ppdebug.h +./usr/src/lib/libshell/*/src/cmd/ksh93/FEATURE +./usr/src/lib/libshell/common/data/math.tab +./usr/src/lib/libshell/common/features +./usr/src/lib/libshell/misc/buildksh93.sh +./usr/src/lib/libshell/misc/buildksh93.readme # # Ignore ksh93/ast-related "iffe" (if feature enabled) probe # -./src/lib/libsum/common/features/sum +./usr/src/lib/libsum/common/features/sum # # Ignore ksh93/ast-related upstream source, currently superseded by # a per-platform version of sum.h, since we use libmd.so.1 for some # ciphers. # -./src/lib/libsum/common/sum.h +./usr/src/lib/libsum/common/sum.h # # Ignore ksh93/ast-related test programs. # -./src/cmd/ast/msgcc/msgcc.tst -./src/lib/libast/common/port/astmath.c +./usr/src/cmd/ast/msgcc/msgcc.tst +./usr/src/lib/libast/common/port/astmath.c # # Ignore ksh93/ast-related source components that are not currently # used but may be useful later. # -./src/lib/libcmd/common/cksum.c -./src/lib/libcmd/common/md5sum.c -./src/lib/libcmd/common/sum.c -./src/lib/libshell/common/bltins/mkservice.c -./src/lib/libshell/common/data/bash_pre_rc.sh -./src/lib/libshell/common/include/env.h -./src/lib/libshell/common/sh/bash.c -./src/lib/libshell/common/sh/env.c -./src/lib/libshell/common/sh/shcomp.c -./src/lib/libshell/common/sh/suid_exec.c +./usr/src/lib/libcmd/common/cksum.c +./usr/src/lib/libcmd/common/md5sum.c +./usr/src/lib/libcmd/common/sum.c +./usr/src/lib/libshell/common/bltins/mkservice.c +./usr/src/lib/libshell/common/data/bash_pre_rc.sh +./usr/src/lib/libshell/common/include/env.h +./usr/src/lib/libshell/common/sh/bash.c +./usr/src/lib/libshell/common/sh/env.c +./usr/src/lib/libshell/common/sh/shcomp.c +./usr/src/lib/libshell/common/sh/suid_exec.c # # Ignore any files built as part of the nightly program itself. # # ISUSED - let checkpaths know that the next entry is good. -./src/*.out +./usr/src/*.out # ISUSED - let checkpaths know that the next entry is good. -./src/*.ref +./usr/src/*.ref # # Ignore internal test directories and test programs. @@ -123,145 +123,145 @@ */test *Test.java *_test.[ch] -./src/cmd/ldap/common/*test.c -./src/cmd/logadm/tester -./src/cmd/print/printmgr/com/sun/admin/pm/client/helptools/extract -./src/cmd/print/printmgr/com/sun/admin/pm/server/pmtest -./src/cmd/sendmail/libsm/t-*.c -./src/cmd/sort/common/convert.c -./src/cmd/sort/common/invoke.c -./src/lib/crypt_modules/sha256/test.c -./src/lib/efcode/fcode_test -./src/lib/libkvm/common/test.c +./usr/src/cmd/ldap/common/*test.c +./usr/src/cmd/logadm/tester +./usr/src/cmd/print/printmgr/com/sun/admin/pm/client/helptools/extract +./usr/src/cmd/print/printmgr/com/sun/admin/pm/server/pmtest +./usr/src/cmd/sendmail/libsm/t-*.c +./usr/src/cmd/sort/common/convert.c +./usr/src/cmd/sort/common/invoke.c +./usr/src/lib/crypt_modules/sha256/test.c +./usr/src/lib/efcode/fcode_test +./usr/src/lib/libkvm/common/test.c # # Ignore debugging code. # -./src/cmd/fs.d/pcfs/fsck/inject.c -./src/cmd/sort/common/statistics.c +./usr/src/cmd/fs.d/pcfs/fsck/inject.c +./usr/src/cmd/sort/common/statistics.c # # Ignore internal packages, scripts, and tools that are intentionally not # built or used during a nightly. # -./src/cmd/mdb/packages -./src/cmd/sgs/packages -./src/cmd/sgs/rtld.4.x -./src/prototypes -./src/cmd/pools/poold/com/sun/solaris/*/*/package.html -./src/uts/intel/io/acpica/cmp_ca.sh +./usr/src/cmd/mdb/packages +./usr/src/cmd/sgs/packages +./usr/src/cmd/sgs/rtld.4.x +./usr/src/prototypes +./usr/src/cmd/pools/poold/com/sun/solaris/*/*/package.html +./usr/src/uts/intel/io/acpica/cmp_ca.sh # # Ignore files that are only used by internal packages. # -./src/cmd/sgs/*/*/*chk.msg +./usr/src/cmd/sgs/*/*/*chk.msg # # Ignore files that get used during a EXPORT_SRC or CRYPT_SRC build only. # -./src/common/crypto/aes/Makefile -./src/common/crypto/arcfour/Makefile -./src/common/crypto/blowfish/Makefile -./src/common/crypto/des/Makefile -./src/common/crypto/rsa/Makefile -./src/lib/gss_mechs/mech_dh/backend/mapfile-vers -./src/lib/gss_mechs/mech_dh/dh1024/mapfile-vers -./src/lib/gss_mechs/mech_dh/dh192/mapfile-vers -./src/lib/gss_mechs/mech_dh/dh640/mapfile-vers -./src/lib/gss_mechs/mech_krb5/mapfile-vers-clean -./src/lib/gss_mechs/mech_spnego/mapfile-vers-clean -./src/lib/pkcs11/pkcs11_softtoken/common/Makefile -./src/uts/common/Makefile -./src/uts/common/crypto/io/Makefile -./src/uts/common/gssapi/include/Makefile -./src/uts/common/gssapi/mechs/dummy/Makefile -./src/uts/common/gssapi/mechs/krb5/Makefile -./src/xmod +./usr/src/common/crypto/aes/Makefile +./usr/src/common/crypto/arcfour/Makefile +./usr/src/common/crypto/blowfish/Makefile +./usr/src/common/crypto/des/Makefile +./usr/src/common/crypto/rsa/Makefile +./usr/src/lib/gss_mechs/mech_dh/backend/mapfile-vers +./usr/src/lib/gss_mechs/mech_dh/dh1024/mapfile-vers +./usr/src/lib/gss_mechs/mech_dh/dh192/mapfile-vers +./usr/src/lib/gss_mechs/mech_dh/dh640/mapfile-vers +./usr/src/lib/gss_mechs/mech_krb5/mapfile-vers-clean +./usr/src/lib/gss_mechs/mech_spnego/mapfile-vers-clean +./usr/src/lib/pkcs11/pkcs11_softtoken/common/Makefile +./usr/src/uts/common/Makefile +./usr/src/uts/common/crypto/io/Makefile +./usr/src/uts/common/gssapi/include/Makefile +./usr/src/uts/common/gssapi/mechs/dummy/Makefile +./usr/src/uts/common/gssapi/mechs/krb5/Makefile +./usr/src/xmod # # Ignore Makefiles which are used by developers but not used by nightly # itself. This is a questionable practice, since they tend to rot. # -./src/cmd/syslogd/sparcv9/Makefile -./src/uts/sparc/uhci/Makefile -./src/lib/pam_modules/smb/amd64/Makefile -./src/lib/pam_modules/smb/sparcv9/Makefile -./src/cmd/isns/isnsd/xml_def/isnsmgmtSchema.xsd +./usr/src/cmd/syslogd/sparcv9/Makefile +./usr/src/uts/sparc/uhci/Makefile +./usr/src/lib/pam_modules/smb/amd64/Makefile +./usr/src/lib/pam_modules/smb/sparcv9/Makefile +./usr/src/cmd/isns/isnsd/xml_def/isnsmgmtSchema.xsd # # Ignore dtrace scripts only used by developers # -./src/cmd/vscan/vscand/vscan.d +./usr/src/cmd/vscan/vscand/vscan.d # # Ignore sample source code. # -./src/cmd/sendmail/libmilter/example.c -./src/lib/libdhcpsvc/modules/templates +./usr/src/cmd/sendmail/libmilter/example.c +./usr/src/lib/libdhcpsvc/modules/templates # # Ignore .xcl files that aren't used because the program is statically linked. # -./src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.xcl +./usr/src/cmd/cmd-inet/sbin/dhcpagent/dhcpagent.xcl # # Ignore sendmail files included for completeness' sake, but which won't # be used until certain _FFR (for future release) #define's go live. # -./src/cmd/sendmail/src/statusd_shm.h +./usr/src/cmd/sendmail/src/statusd_shm.h # # Ignore files originally supplied by ISC (Internet Software Consortium) # as part of a BIND release. # -./src/lib/libresolv2/common/cylink/bn68000.c -./src/lib/libresolv2/common/cylink/bn8086.c -./src/lib/libresolv2/common/cylink/lbn68000.c -./src/lib/libresolv2/common/cylink/lbn68000.h -./src/lib/libresolv2/common/cylink/lbn68020.c -./src/lib/libresolv2/common/cylink/lbn68020.h -./src/lib/libresolv2/common/cylink/lbn80386.h -./src/lib/libresolv2/common/cylink/lbn8086.h -./src/lib/libresolv2/common/cylink/lbnppc.c -./src/lib/libresolv2/common/cylink/lbnppc.h -./src/lib/libresolv2/common/cylink/ppcasm.h -./src/lib/libresolv2/common/cylink/sizetest.c -./src/lib/libresolv2/common/irs/getaddrinfo.c -./src/lib/libresolv2/common/irs/nis_p.h -./src/lib/libresolv2/common/resolv/res_mkupdate.h -./src/lib/libresolv2/include/err.h +./usr/src/lib/libresolv2/common/cylink/bn68000.c +./usr/src/lib/libresolv2/common/cylink/bn8086.c +./usr/src/lib/libresolv2/common/cylink/lbn68000.c +./usr/src/lib/libresolv2/common/cylink/lbn68000.h +./usr/src/lib/libresolv2/common/cylink/lbn68020.c +./usr/src/lib/libresolv2/common/cylink/lbn68020.h +./usr/src/lib/libresolv2/common/cylink/lbn80386.h +./usr/src/lib/libresolv2/common/cylink/lbn8086.h +./usr/src/lib/libresolv2/common/cylink/lbnppc.c +./usr/src/lib/libresolv2/common/cylink/lbnppc.h +./usr/src/lib/libresolv2/common/cylink/ppcasm.h +./usr/src/lib/libresolv2/common/cylink/sizetest.c +./usr/src/lib/libresolv2/common/irs/getaddrinfo.c +./usr/src/lib/libresolv2/common/irs/nis_p.h +./usr/src/lib/libresolv2/common/resolv/res_mkupdate.h +./usr/src/lib/libresolv2/include/err.h # # Ignore mont_mulf.c. It is used as a starting point for some hand optimized # assembly files. We keep it around for future reference. # -./src/common/bignum/mont_mulf.c +./usr/src/common/bignum/mont_mulf.c # # Ignore the sparc Makefiles for x86-only drivers; # they're used to build warlock only. # -./src/uts/sparc/sata/Makefile -./src/uts/sparc/si3124/Makefile -./src/uts/sparc/nv_sata/Makefile -./src/uts/sparc/ahci/Makefile +./usr/src/uts/sparc/sata/Makefile +./usr/src/uts/sparc/si3124/Makefile +./usr/src/uts/sparc/nv_sata/Makefile +./usr/src/uts/sparc/ahci/Makefile # # Ignore uttrack.c. It is provided as part of the standard # ACPI CA source code but provides optional resource tracking # functionality which is not used. # -./src/uts/intel/io/acpica/utilities/uttrack.c +./usr/src/uts/intel/io/acpica/utilities/uttrack.c # # Ignore any files that get used during a gcc build only. # -./src/cmd/sgs/rtld/common/mapfile-order-gcc +./usr/src/cmd/sgs/rtld/common/mapfile-order-gcc # # Ignore compiler dependent header files for fpscrubber # and sparc prototype files # -./src/cmd/fps/fptest/singdoub* -./src/pkgdefs/SUNWfsu/prototype_gcc_sparc -./src/pkgdefs/SUNWfsu/prototype_cc_sparc +./usr/src/cmd/fps/fptest/singdoub* +./usr/src/pkgdefs/SUNWfsu/prototype_gcc_sparc +./usr/src/pkgdefs/SUNWfsu/prototype_cc_sparc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/tools/findunref/findunref.1 Mon Aug 17 09:14:35 2009 -0400 @@ -0,0 +1,94 @@ +.\" " CDDL HEADER START +.\" " +.\" " The contents of this file are subject to the terms of the +.\" " Common Development and Distribution License (the "License"). +.\" " You may not use this file except in compliance with the License. +.\" " +.\" " You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" " or http://www.opensolaris.org/os/licensing. +.\" " See the License for the specific language governing permissions +.\" " and limitations under the License. +.\" " +.\" " When distributing Covered Code, include this CDDL HEADER in each +.\" " file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" " If applicable, add the following below this CDDL HEADER, with the +.\" " fields enclosed by brackets "[]" replaced with your own identifying +.\" " information: Portions Copyright [yyyy] [name of copyright owner] +.\" " +.\" " CDDL HEADER END +.\" " +.\" "Copyright 2009 Sun Microsystems, Inc. All rights reserved. +.\" "Use is subject to license terms. +.TH findunref 1 "11 Aug 2009" +.I findunref +\- find unused files in a source tree +.SH SYNOPSIS +findunref [\fB-s\fP \fIsubtree\fP] [\fB-t\fP \fItstampfile\fP] +[\fB-S\fP \fBhg\fP|\fBtw\fP] \fIsrcroot\fP \fIexceptfile\fP +.LP +.SH DESCRIPTION +.IX "OS-Net build tools" "findunref" "" "\fBfindunref\fP" +.LP +The findunref utility lists the files in a source tree which have not been +accessed more recently than a particular timestamp file. Although +findunref may be used on its own, it is usually invoked by +\fBnightly\fP(1) to find files that are never referenced during a given +build (see \fB-f\fP in \fBnightly\fP(1)). +.LP +The root of the source tree to examine is specified by \fIsrcroot\fP. To +simplify comparing findunref output from different source trees, findunref +outputs all filenames relative to \fIsrcroot\fP. +.LP +Some files in a source tree may be intentionally unreferenced (e.g., +documentation) or only referenced during specialized types of builds. +Accordingly, \fIexceptfile\fP names a file containing a list of pathname +globs that will be ignored by findunref. Within \fIexceptfile\fP, any +lines consisting solely of whitespace or starting with \fB#\fP will be +ignored. Directory globs may also be specified, which will cause any +matching directories to be skipped entirely. If no exceptions are +desired, \fIexceptfile\fP can be \fB/dev/null\fP. +.LP +Depending on how findunref is invoked, it can either check all files, or +limit its checks to files under control of a specific source code +management (SCM) system. To limit checks to files managed by Mercurial, +the \fBhg\fP(1) utility must be present in \fB/usr/bin\fP and any relevant +repositories must be located at or under \fBsrcroot\fP. Nested Mercurial +repositories are supported. +.SH OPTIONS +.TP 10 +.B -s \fIsubtree\fP +Only look under \fIsubtree\fP for unreferenced files. By default, all +directories under \fIsrcroot\fP are examined. +.TP 10 +.B -t \fItstampfile\fP +Consider files older than \fItstampfile\fP to be unreferenced. +By default, \fIsrcroot\fB/.build.tstamp\fR is used. +.TP 10 +.B -S \fBhg\fP|\fBtw\fP +Only check files that are managed by the specified SCM. To simplify +interaction with \fBwhich_scm\fP(1), the SCM names "mercurial" and +"teamware" may also be specified. By default, all files are checked. +.SH SEE ALSO +.LP +\fBhg\fP(1), +\fBnightly\fP(1), +\fBwhich_scm\fP(1) +.SH NOTES +Since many files are only used when building for a particular ISA (e.g., +Makefiles that are specified to i386 or sparc), builds must be done on all +applicable ISAs and the results merged. For instance, if nightly builds +(with \fB-f\fP) are done on both SPARC and x86, \fBusr/src\fP will be +populated with a corresponding \fBunref-\fIisa\fB.out\fR file, which can +be merged with \fBcomm\fP(1): +.LP +.nf +comm -12 /path/to/unref-i386.out + /path/to/unref-sparc.out > unref.out +.fi +.LP +This merged file can then be compared against the gate's latest +unreferenced file list (e.g. \fB/ws/onnv-gate/usr/src/unrefmaster.out\fP). +.LP +Different gates have different unreferenced file policies. Any changes to +\fIexceptfile\fP that would define new unreferenced file policies for a +given gate must be cleared with the appropriate gatekeepers.
--- a/usr/src/tools/findunref/findunref.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/findunref/findunref.c Mon Aug 17 09:14:35 2009 -0400 @@ -17,13 +17,10 @@ * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" /* * Finds all unreferenced files in a source tree that do not match a list of @@ -55,18 +52,54 @@ unsigned int maxpaths; } pnset_t; +/* + * Data associated with the current Mercurial manifest. + */ +typedef struct hgdata { + pnset_t *manifest; + char hgpath[MAXPATHLEN]; + char root[MAXPATHLEN]; + unsigned int rootlen; + boolean_t rootwarn; +} hgdata_t; + +/* + * Hooks used to check if a given unreferenced file is known to an SCM + * (currently Mercurial and TeamWare). + */ +typedef int checkscm_func_t(const char *, const struct FTW *); +typedef void chdirscm_func_t(const char *); + +typedef struct { + const char *name; + checkscm_func_t *checkfunc; + chdirscm_func_t *chdirfunc; +} scm_t; + +static checkscm_func_t check_tw, check_hg; +static chdirscm_func_t chdir_hg; static int pnset_add(pnset_t *, const char *); static int pnset_check(const pnset_t *, const char *); static void pnset_empty(pnset_t *); +static void pnset_free(pnset_t *); static int checkpath(const char *, const struct stat *, int, struct FTW *); static pnset_t *make_exset(const char *); static void warn(const char *, ...); static void die(const char *, ...); +static const scm_t scms[] = { + { "tw", check_tw, NULL }, + { "teamware", check_tw, NULL }, + { "hg", check_hg, chdir_hg }, + { "mercurial", check_hg, chdir_hg }, + { NULL, NULL, NULL } +}; + +static const scm_t *scm; +static hgdata_t hgdata; static time_t tstamp; /* timestamp to compare files to */ static pnset_t *exsetp; /* pathname globs to ignore */ static const char *progname; -static boolean_t allfiles = B_FALSE; int main(int argc, char *argv[]) @@ -83,10 +116,10 @@ else progname++; - while ((c = getopt(argc, argv, "as:t:")) != EOF) { + while ((c = getopt(argc, argv, "as:t:S:")) != EOF) { switch (c) { case 'a': - allfiles = B_TRUE; + /* for compatibility; now the default */ break; case 's': @@ -97,6 +130,15 @@ tstampfile = optarg; break; + case 'S': + for (scm = scms; scm->name != NULL; scm++) { + if (strcmp(scm->name, optarg) == 0) + break; + } + if (scm->name == NULL) + die("unsupported SCM `%s'\n", optarg); + break; + default: case '?': goto usage; @@ -107,8 +149,9 @@ argv += optind; if (argc != 2) { -usage: (void) fprintf(stderr, "usage: %s [-a] [-s subtree] " - "[-t tstampfile] srcroot exceptfile\n", progname); +usage: (void) fprintf(stderr, "usage: %s [-s <subtree>] " + "[-t <tstampfile>] [-S hg|tw] <srcroot> <exceptfile>\n", + progname); return (EXIT_FAILURE); } @@ -134,7 +177,9 @@ /* * Walk the specified subtree of the tree rooted at argv[0]. */ - (void) chdir(argv[0]); + if (chdir(argv[0]) == -1) + die("cannot change directory to \"%s\"", argv[0]); + if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0) die("cannot walk tree rooted at \"%s\"\n", argv[0]); @@ -143,6 +188,143 @@ } /* + * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'. + */ +static pnset_t * +load_manifest(const char *hgroot) +{ + FILE *fp = NULL; + char *hgcmd = NULL; + char *newline; + pnset_t *pnsetp; + char path[MAXPATHLEN]; + + pnsetp = calloc(sizeof (pnset_t), 1); + if (pnsetp == NULL || + asprintf(&hgcmd, "/usr/bin/hg manifest -R %s", hgroot) == -1) + goto fail; + + fp = popen(hgcmd, "r"); + if (fp == NULL) + goto fail; + + while (fgets(path, sizeof (path), fp) != NULL) { + newline = strrchr(path, '\n'); + if (newline != NULL) + *newline = '\0'; + + if (pnset_add(pnsetp, path) == 0) + goto fail; + } + + (void) pclose(fp); + free(hgcmd); + return (pnsetp); +fail: + warn("cannot load hg manifest at %s", hgroot); + if (fp != NULL) + (void) pclose(fp); + free(hgcmd); + pnset_free(pnsetp); + return (NULL); +} + +/* + * If necessary, change our active manifest to be appropriate for `path'. + */ +static void +chdir_hg(const char *path) +{ + char hgpath[MAXPATHLEN]; + char basepath[MAXPATHLEN]; + char *slash; + + (void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", path); + + /* + * Change our active manifest if any one of the following is true: + * + * 1. No manifest is loaded. Find the nearest hgroot to load from. + * + * 2. A manifest is loaded, but we've moved into a directory with + * its own hgroot (e.g., usr/closed). Load from its hgroot. + * + * 3. A manifest is loaded, but no longer applies (e.g., the manifest + * under usr/closed is loaded, but we've moved to usr/src). + */ + if (hgdata.manifest == NULL || + strcmp(hgpath, hgdata.hgpath) != 0 && access(hgpath, X_OK) == 0 || + strncmp(path, hgdata.root, hgdata.rootlen - 1) != 0) { + pnset_free(hgdata.manifest); + hgdata.manifest = NULL; + + (void) strlcpy(basepath, path, MAXPATHLEN); + + /* + * Walk up the directory tree looking for .hg subdirectories. + */ + while (access(hgpath, X_OK) == -1) { + slash = strrchr(basepath, '/'); + if (slash == NULL) { + if (!hgdata.rootwarn) { + warn("no hg root for \"%s\"\n", path); + hgdata.rootwarn = B_TRUE; + } + return; + } + *slash = '\0'; + (void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", basepath); + } + + /* + * We found a directory with an .hg subdirectory; record it + * and load its manifest. + */ + (void) strlcpy(hgdata.hgpath, hgpath, MAXPATHLEN); + (void) strlcpy(hgdata.root, basepath, MAXPATHLEN); + hgdata.manifest = load_manifest(hgdata.root); + + /* + * The logic in check_hg() depends on hgdata.root having a + * single trailing slash, so only add it if it's missing. + */ + if (hgdata.root[strlen(hgdata.root) - 1] != '/') + (void) strlcat(hgdata.root, "/", MAXPATHLEN); + hgdata.rootlen = strlen(hgdata.root); + } +} + +/* + * Check if a file is under Mercurial control by checking against the manifest. + */ +/* ARGSUSED */ +static int +check_hg(const char *path, const struct FTW *ftwp) +{ + /* + * The manifest paths are relative to the manifest root; skip past it. + */ + path += hgdata.rootlen; + + return (hgdata.manifest != NULL && pnset_check(hgdata.manifest, path)); +} + +/* + * Check if a file is under TeamWare control by checking for its corresponding + * SCCS "s-dot" file. + */ +static int +check_tw(const char *path, const struct FTW *ftwp) +{ + char sccspath[MAXPATHLEN]; + + (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base, + path, path + ftwp->base); + + return (access(sccspath, F_OK) == 0); +} + +/* * Using `exceptfile' and a built-in list of exceptions, build and return a * pnset_t consisting of all of the pathnames globs which are allowed to be * unreferenced in the source tree. @@ -189,8 +371,7 @@ (void) fclose(fp); return (pnsetp); fail: - pnset_empty(pnsetp); - free(pnsetp); + pnset_free(pnsetp); return (NULL); } @@ -201,8 +382,6 @@ checkpath(const char *path, const struct stat *statp, int type, struct FTW *ftwp) { - char sccspath[MAXPATHLEN]; - switch (type) { case FTW_F: /* @@ -212,26 +391,28 @@ return (0); /* - * If not explicitly checking all files, restrict ourselves - * to unreferenced files under SCCS control. + * If requested, restrict ourselves to unreferenced files + * under SCM control. */ - if (!allfiles) { - (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", - ftwp->base, path, path + ftwp->base); - - if (access(sccspath, F_OK) == -1) - return (0); - } - - (void) puts(path); + if (scm == NULL || scm->checkfunc(path, ftwp)) + (void) puts(path); return (0); case FTW_D: /* * Prune any directories in the exception list. */ - if (pnset_check(exsetp, path)) + if (pnset_check(exsetp, path)) { ftwp->quit = FTW_PRUNE; + return (0); + } + + /* + * If necessary, advise the SCM logic of our new directory. + */ + if (scm != NULL && scm->chdirfunc != NULL) + scm->chdirfunc(path); + return (0); case FTW_DNR: @@ -256,14 +437,15 @@ pnset_add(pnset_t *pnsetp, const char *path) { char **newpaths; + unsigned int maxpaths; if (pnsetp->npath == pnsetp->maxpaths) { - newpaths = realloc(pnsetp->paths, sizeof (const char *) * - (pnsetp->maxpaths + 15)); + maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2; + newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths); if (newpaths == NULL) return (0); pnsetp->paths = newpaths; - pnsetp->maxpaths += 15; + pnsetp->maxpaths = maxpaths; } pnsetp->paths[pnsetp->npath] = strdup(path); @@ -302,6 +484,18 @@ pnsetp->maxpaths = 0; } +/* + * Free the pnset_t pointed to by `pnsetp'. + */ +static void +pnset_free(pnset_t *pnsetp) +{ + if (pnsetp != NULL) { + pnset_empty(pnsetp); + free(pnsetp); + } +} + /* PRINTFLIKE1 */ static void warn(const char *format, ...)
--- a/usr/src/tools/scripts/check_rtime.pl Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/scripts/check_rtime.pl Mon Aug 17 09:14:35 2009 -0400 @@ -71,7 +71,6 @@ # Define any directories we should skip completely. $SkipDirs = qr{ - usr/lib/devfsadm | # 4382889 usr/lib/libc | # optimized libc usr/lib/rcm | # 4426119 usr/perl5 | # alan's taking care of these :-) @@ -229,15 +228,18 @@ # Catch libintl and libw, although ld(1) will bind to these and thus determine # they're needed, their content was moved into libc as of on297 build 7. -# libthread and libpthread were completely moved into libc as of on10 build 53. -# libdl was moved into libc as of on10 build 49. librt and libaio were moved -# into libc as of Nevada build 44. +# libsched was moved into libc as of on10 build 36. libdl was moved into libc +# as of on10 build 49. libthread and libpthread were into libc as of on10 build +# 53. libdoor was moved into libc as of Nevada build 12. librt and libaio were +# moved into libc in Nevada build 44. $OldDeps = qr{ ^(?: libintl\.so\.1 | libw\.so\.1 | + libsched\.so\.1 | + libdl\.so\.1 | libthread\.so\.1 | libpthread\.so\.1 | - libdl\.so\.1 | + libdoor\.so\.1 | librt\.so\.1 | libaio\.so\.1 )$
--- a/usr/src/tools/scripts/checkpaths.sh Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/scripts/checkpaths.sh Mon Aug 17 09:14:35 2009 -0400 @@ -21,11 +21,9 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# # Quis custodiet ipsos custodies? @@ -97,10 +95,9 @@ # # The exception_list is generated from whichever input files are appropriate # for this workspace, so checking it obviates the need to check the inputs. -elist="" + if [ -r $SRC/tools/findunref/exception_list ]; then - validate_paths -k ISUSED -r -e '^\*' -b $SRC/.. \ - $SRC/tools/findunref/exception_list + validate_paths -k ISUSED -r -e '^\*' $SRC/tools/findunref/exception_list fi # These are straightforward.
--- a/usr/src/tools/scripts/nightly.sh Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/scripts/nightly.sh Mon Aug 17 09:14:35 2009 -0400 @@ -3159,19 +3159,9 @@ mv $SRC/unref-${MACH}.out $SRC/unref-${MACH}.ref fi - # - # For any SCM other than teamware, we want to disable the - # managed-by-SCCS test in findunref - # - findunref_all="" - if [ "$SCM_TYPE" != teamware ]; then - findunref_all="-a" - fi - - findunref $findunref_all -t $SRC/.build.tstamp $SRC/.. \ + findunref -S $SCM_TYPE -t $SRC/.build.tstamp -s usr $CODEMGR_WS \ ${TOOLS}/findunref/exception_list 2>> $mail_msg_file | \ - sort | sed -e s=^./src/=./= -e s=^./closed/=../closed/= \ - > $SRC/unref-${MACH}.out + sort > $SRC/unref-${MACH}.out if [ ! -f $SRC/unref-${MACH}.ref ]; then cp $SRC/unref-${MACH}.out $SRC/unref-${MACH}.ref
--- a/usr/src/tools/scripts/xref.1 Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/tools/scripts/xref.1 Mon Aug 17 09:14:35 2009 -0400 @@ -1,10 +1,8 @@ -.\" ident "%Z%%M% %I% %E% SMI" .\" " CDDL HEADER START .\" " .\" " The contents of this file are subject to the terms of the -.\" " Common Development and Distribution License, Version 1.0 only -.\" " (the "License"). You may not use this file except in compliance -.\" " with the License. +.\" " Common Development and Distribution License (the "License"). +.\" " You may not use this file except in compliance with the License. .\" " .\" " You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE .\" " or http://www.opensolaris.org/os/licensing. @@ -19,9 +17,9 @@ .\" " .\" " CDDL HEADER END .\" " -.\" "Copyright 2006 Sun Microsystems, Inc. All rights reserved." +.\" "Copyright 2009 Sun Microsystems, Inc. All rights reserved." .\" "Use is subject to license terms." -.TH xref 1 "10 Feb 2006" +.TH xref 1 "10 Aug 2009" .SH NAME .I xref \- build and maintain source cross-references @@ -40,7 +38,7 @@ matching -- by default, files matching the following patterns are eligible: - *.[Ccshlxy] Makefile* *.il* *.cc *.adb llib-* + *.[Ccdshlxy] Makefile* *.il* *.cc llib-* *.xml *.dtd.* *.ndl However, this default behavior can be changed, either through command-line options or by setting the cross-reference customization macros (see @@ -165,5 +163,5 @@ xref will silently not build etags. .LP The ETAGS environment variable can be used to specify an alternate -path to the etags utility. For instance, to use the one packaged with -Sun Freeware, set ETAGS to /opt/sfw/bin/etags prior to invoking xref. +path to the etags utility. For instance, to use the one included with +SUNWgnu-emacs, set ETAGS to /usr/gnu/bin/etags prior to invoking xref.
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_node.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_node.h Mon Aug 17 09:14:35 2009 -0400 @@ -217,12 +217,11 @@ uint32_t n_flag; smbmntinfo_t *n_mount; ino64_t n_ino; - /* Lock for the next 8 is r_lkserlock */ + /* Lock for the next 7 is r_lkserlock */ enum vtype n_ovtype; /* vnode type opened */ int n_dirrefs; struct smbfs_fctx *n_dirseq; /* ff context */ - long n_dirofs; /* last ff offset */ - long n_direof; /* End of dir. offset. */ + int n_dirofs; /* last ff offset */ int n_vcgenid; /* gereration no. (reconnect) */ int n_fidrefs; uint16_t n_fid; /* file handle */
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_smb.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_smb.c Mon Aug 17 09:14:35 2009 -0400 @@ -560,18 +560,6 @@ md_get_mem(mdp, fsa->fsa_tname, nlen, MB_MSYSTEM); } - /* - * If fs_name isn't NTFS they probably require resume keys. - * This is another example of the client trying to fix a server - * bug. This code uses the logic created by PR-3983209. See - * long block comment in smbfs_smb_findnextLM2. - */ - if (strcmp(fsa->fsa_tname, "NTFS")) { - SMB_SS_LOCK(ssp); - ssp->ss_flags |= SMBS_RESUMEKEYS; - SMB_SS_UNLOCK(ssp); - } - smb_t2_done(t2p); return (0); } @@ -2304,14 +2292,13 @@ struct smb_vc *vcp = SSTOVC(ctx->f_ssp); struct mbchain *mbp; struct mdchain *mdp; - uint16_t tw, flags; + uint16_t ecnt, eos, lno, flags; int len, error; if (ctx->f_t2) { smb_t2_done(ctx->f_t2); ctx->f_t2 = NULL; } - ctx->f_flags &= ~SMBFS_RDD_GOTRNAME; flags = FIND2_RETURN_RESUME_KEYS | FIND2_CLOSE_ON_EOS; if (ctx->f_flags & SMBFS_RDD_FINDSINGLE) { flags |= FIND2_CLOSE_AFTER_REQUEST; @@ -2330,7 +2317,6 @@ mb_put_uint16le(mbp, flags); mb_put_uint16le(mbp, ctx->f_infolevel); mb_put_uint32le(mbp, 0); - /* mb_put_uint8(mbp, SMB_DT_ASCII); specs? hah! */ len = ctx->f_wclen; error = smbfs_fullpath(mbp, vcp, ctx->f_dnp, ctx->f_wildcard, &len, '\\'); @@ -2347,11 +2333,10 @@ mb_put_uint16le(mbp, ctx->f_Sid); mb_put_uint16le(mbp, ctx->f_limit); mb_put_uint16le(mbp, ctx->f_infolevel); - if (ctx->f_ssp->ss_flags & SMBS_RESUMEKEYS) { - mb_put_uint32le(mbp, ctx->f_rkey); - } else - mb_put_uint32le(mbp, 0); + /* Send whatever resume key we received... */ + mb_put_uint32le(mbp, ctx->f_rkey); mb_put_uint16le(mbp, flags); + /* ... and the resume name if we have one. */ if (ctx->f_rname) { /* resume file name */ mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen, @@ -2361,67 +2346,66 @@ if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) mb_put_uint8(mbp, 0); /* 1st byte NULL Unicode char */ mb_put_uint8(mbp, 0); -#if 0 - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 200 * 1000 * 1000; /* 200ms */ - if (vcp->vc_flags & SMBC_WIN95) { - /* - * some implementations suggests to sleep here - * for 200ms, due to the bug in the Win95. - * I've didn't notice any problem, but put code - * for it. - */ - msleep(&flags, 0, PVFS, "fix95", &ts); - } -#endif } t2p->t2_maxpcount = 5 * 2; - t2p->t2_maxdcount = vcp->vc_txmax; + t2p->t2_maxdcount = 0xF000; /* 64K less some overhead */ error = smb_t2_request(t2p); if (error) return (error); + + /* + * This is the "resume name" we just sent. + * We want the new one (if any) that may be + * found in the response we just received and + * will now begin parsing. Free the old one + * now so we'll know if we found a new one. + */ + if (ctx->f_rname) { + kmem_free(ctx->f_rname, ctx->f_rnamelen); + ctx->f_rname = NULL; + ctx->f_rnamelen = 0; + } + mdp = &t2p->t2_rparam; if (ctx->f_flags & SMBFS_RDD_FINDFIRST) { if ((error = md_get_uint16le(mdp, &ctx->f_Sid)) != 0) - return (error); + goto nodata; ctx->f_flags &= ~SMBFS_RDD_FINDFIRST; } - if ((error = md_get_uint16le(mdp, &tw)) != 0) - return (error); - ctx->f_ecnt = tw; /* search count - # entries returned */ - if ((error = md_get_uint16le(mdp, &tw)) != 0) - return (error); + md_get_uint16le(mdp, &ecnt); /* entry count */ + md_get_uint16le(mdp, &eos); /* end of search */ + md_get_uint16le(mdp, NULL); /* EA err. off. */ + error = md_get_uint16le(mdp, &lno); /* last name off. */ + if (error != 0) + goto nodata; + /* - * tw now is the "end of search" flag. against an XP server tw + * The "end of search" flag from an XP server sometimes * comes back zero when the prior find_next returned exactly * the number of entries requested. in which case we'd try again - * but the search has in fact been closed so an EBADF results. our - * circumvention is to check here for a zero search count. + * but the search has in fact been closed so an EBADF results. + * our circumvention is to check here for a zero entry count. */ - if (tw || ctx->f_ecnt == 0) + ctx->f_ecnt = ecnt; + if (eos || ctx->f_ecnt == 0) ctx->f_flags |= SMBFS_RDD_EOF | SMBFS_RDD_NOCLOSE; - if ((error = md_get_uint16le(mdp, &tw)) != 0) - return (error); - if ((error = md_get_uint16le(mdp, &tw)) != 0) - return (error); if (ctx->f_ecnt == 0) return (ENOENT); - ctx->f_rnameofs = tw; - mdp = &t2p->t2_rdata; - if (mdp->md_top == NULL) { - SMBVDEBUG("ecnt = %d, but data is NULL\n", ctx->f_ecnt); - return (ENOENT); - } -#ifdef APPLE - if (mdp->md_top->m_len == 0) { - printf("bug: ecnt = %d, but m_len = 0 and m_next = %p " - "(please report)\n", ctx->f_ecnt, mbp->mb_top->m_next); - return (ENOENT); - } -#endif + + /* Last Name Off (LNO) is the entry with the resume name. */ + ctx->f_rnameofs = lno; ctx->f_eofs = 0; return (0); + +nodata: + /* + * Failed parsing the FindFirst or FindNext response. + * Force this directory listing closed, otherwise the + * calling process may hang in an infinite loop. + */ + ctx->f_ecnt = 0; /* Force closed. */ + ctx->f_flags |= SMBFS_RDD_EOF; + return (EIO); } static int @@ -2478,11 +2462,9 @@ uint16_t date, time, wattr; uint32_t size, next, dattr, resumekey = 0; uint64_t llongint; - int error, svtz, cnt, fxsz, nmlen, recsz, otw; + int error, svtz, cnt, fxsz, nmlen, recsz; struct timespec ts; -again: - otw = 0; /* nothing sent Over The Wire (yet) */ if (ctx->f_ecnt == 0) { if (ctx->f_flags & SMBFS_RDD_EOF) return (ENOENT); @@ -2493,7 +2475,6 @@ return (error); ctx->f_attr.fa_reqtime = ts; ctx->f_otws++; - otw = 1; } t2p = ctx->f_t2; mdp = &t2p->t2_rdata; @@ -2515,7 +2496,9 @@ md_get_uint32le(mdp, NULL); /* allocation size */ md_get_uint16le(mdp, &wattr); ctx->f_attr.fa_attr = wattr; - md_get_uint8(mdp, &tb); + error = md_get_uint8(mdp, &tb); + if (error) + goto nodata; size = nmlen = tb; fxsz = 23; recsz = next = 24 + nmlen; /* docs misses zero byte @end */ @@ -2537,7 +2520,9 @@ /* freebsd bug: fa_attr endian bug */ md_get_uint32le(mdp, &dattr); /* extended file attributes */ ctx->f_attr.fa_attr = dattr; - md_get_uint32le(mdp, &size); /* name len */ + error = md_get_uint32le(mdp, &size); /* name len */ + if (error) + goto nodata; fxsz = 64; /* size ofinfo up to filename */ if (ctx->f_infolevel == SMB_FIND_BOTH_DIRECTORY_INFO) { /* @@ -2545,7 +2530,9 @@ * a reserved byte, and ShortName(8.3 means 24 bytes, * as Leach defined it to always be Unicode) */ - md_get_mem(mdp, NULL, 30, MB_MSYSTEM); + error = md_get_mem(mdp, NULL, 30, MB_MSYSTEM); + if (error) + goto nodata; fxsz += 30; } recsz = next ? next : fxsz + size; @@ -2565,15 +2552,16 @@ error = md_get_mem(mdp, cp, nmlen, MB_MSYSTEM); if (error) - return (error); + goto nodata; if (next) { + /* How much data to skip? */ cnt = next - nmlen - fxsz; + if (cnt < 0) { + SMBVDEBUG("out of sync\n"); + goto nodata; + } if (cnt > 0) md_get_mem(mdp, NULL, cnt, MB_MSYSTEM); - else if (cnt < 0) { - SMBVDEBUG("out of sync\n"); - return (EBADRPC); - } } /* Don't count any trailing null in the name. */ if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { @@ -2584,63 +2572,27 @@ nmlen--; } if (nmlen == 0) - return (EBADRPC); + goto nodata; /* - * Ref radar 3983209. On a find-next we expect a server will - * 1) if the continue bit is set, use the server's idea of current loc, - * 2) else if the resume key is non-zero, use that location, - * 3) else if the resume name is set, use that location, - * 4) else use the server's idea of current location. - * - * Current NetApps don't do that. If we send no continue bit, a zero - * resume key, and a resume name, the NetApp ignores the resume name - * and acts on the (zero) resume key, sending back the start of the - * directory again. Panther doesn't expose the netapp bug; Panther used - * the continue bit, but that was changed for 2866172. Win2000 as a - * client also relies upon the resume name, but they request a very - * large number of files, so the bug would be seen only with very - * large directories. - * - * Our fix is to notice if the second OTW op (the first find-next) - * returns, in the first filename, the same filename we got back - * at the start of the first OTW (the find-first). In that case - * we've detected the server bug and set SMBS_RESUMEKEYS, causing us - * to send non-zero resume keys henceforth. + * On a find-next we expect that the server will: + * 1) if the continue bit is set, use the server's offset, + * 2) else if the resume key is non-zero, use that offset, + * 3) else if the resume name is set, use that offset, + * 4) else use the server's idea of current offset. * - * Caveat: if there's a netapp so old it doesn't negotiate NTLM 0.12 - * then we get no resume keys so f_rkey stays zero and this "fix" - * changes nothing. - * - * Note due to a similar problem (4051871) we also set SMBS_RESUMEKEYS - * for FAT volumes, at mount time. + * We always set the resume key flag. If the server returns + * a resume key then we should always send it back to them. */ - if (otw && !(ctx->f_ssp->ss_flags & SMBS_RESUMEKEYS)) { - if (ctx->f_otws == 1) { - ctx->f_firstnmlen = nmlen; - ctx->f_firstnm = kmem_alloc(nmlen, KM_SLEEP); - bcopy(ctx->f_name, ctx->f_firstnm, nmlen); - } else if (ctx->f_otws == 2 && nmlen == ctx->f_firstnmlen && - !(bcmp(ctx->f_name, ctx->f_firstnm, nmlen) == 0)) { - struct smb_share *ssp = ctx->f_ssp; - SMBERROR( - "server resume_name bug seen; using resume keys\n"); - SMB_SS_LOCK(ssp); - ssp->ss_flags |= SMBS_RESUMEKEYS; - SMB_SS_UNLOCK(ssp); - ctx->f_ecnt = 0; - goto again; /* must redo last otw op! */ - } - } ctx->f_rkey = resumekey; next = ctx->f_eofs + recsz; if (ctx->f_rnameofs && - (ctx->f_flags & SMBFS_RDD_GOTRNAME) == 0 && - (ctx->f_rnameofs >= ctx->f_eofs && - ctx->f_rnameofs < (int)next)) { + ctx->f_rnameofs >= ctx->f_eofs && + ctx->f_rnameofs < (int)next) { /* - * Server needs a resume filename. + * This entry is the "resume name". + * Save it for the next request. */ if (ctx->f_rnamelen != nmlen) { if (ctx->f_rname) @@ -2649,7 +2601,6 @@ ctx->f_rnamelen = nmlen; } bcopy(ctx->f_name, ctx->f_rname, nmlen); - ctx->f_flags |= SMBFS_RDD_GOTRNAME; } ctx->f_nmlen = nmlen; ctx->f_eofs = next; @@ -2658,6 +2609,18 @@ smbfs_fname_tolocal(ctx); return (0); + +nodata: + /* + * Something bad has happened and we ran out of data + * before we could parse all f_ecnt entries expected. + * Force this directory listing closed, otherwise the + * calling process may hang in an infinite loop. + */ + SMBVDEBUG("ran out of data\n"); + ctx->f_ecnt = 0; /* Force closed. */ + ctx->f_flags |= SMBFS_RDD_EOF; + return (EIO); } static int @@ -2715,22 +2678,9 @@ /* * Note: "limit" (maxcount) needs to fit in a short! - * - * smb_lookup always uses 1, which is OK (no wildcards). - * Otherwise, this is smbfs_readdir, and we want to force - * limit to be in the range 3 to 1000. The low limit (3) - * is so we can always give the caller one "real" entry - * (something other than "." or "..") The high limit is - * just tuning. WinNT used 512, Win2k 1366. We use 1000. - * - * XXX: Move the [skip . ..] gunk to our caller (readdir). */ - if ((ctx->f_flags & SMBFS_RDD_FINDSINGLE) == 0) { - if (limit < 3) - limit = 3; - if (limit > 1000) - limit = 1000; - } + if (limit > 0xffff) + limit = 0xffff; ctx->f_scred = scrp; for (;;) {
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr.h Mon Aug 17 09:14:35 2009 -0400 @@ -93,7 +93,6 @@ #define SMBFS_RDD_FINDSINGLE 0x04 /* note SMBFS_RDD_USESEARCH 0x08 replaced by smbfs_fctx_type */ #define SMBFS_RDD_NOCLOSE 0x10 -#define SMBFS_RDD_GOTRNAME 0x1000 /* * Search context supplied by server
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr2.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr2.c Mon Aug 17 09:14:35 2009 -0400 @@ -392,7 +392,6 @@ np->r_vnode = vp; np->n_mount = mi; np->r_hashq = rhtp; - np->n_direof = -1; np->n_fid = SMB_FID_UNUSED; np->n_uid = UID_NOBODY; np->n_gid = GID_NOBODY;
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vnops.c Mon Aug 17 09:14:35 2009 -0400 @@ -66,6 +66,17 @@ #include <fs/fs_subr.h> /* + * We assign directory offsets like the NFS client, where the + * offset increments by _one_ after each directory entry. + * Further, the entries "." and ".." are always at offsets + * zero and one (respectively) and the "real" entries from + * the server appear at offsets starting with two. This + * macro is used to initialize the n_dirofs field after + * setting n_dirseq with a _findopen call. + */ +#define FIRST_DIROFS 2 + +/* * These characters are illegal in NTFS file names. * ref: http://support.microsoft.com/kb/147438 * @@ -285,9 +296,18 @@ } /* - * Directory open is easy. + * Directory open. See smbfs_readvdir() */ if (vp->v_type == VDIR) { + if (np->n_dirseq == NULL) { + /* first open */ + error = smbfs_smb_findopen(np, "*", 1, + SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR, + &scred, &np->n_dirseq); + if (error != 0) + goto out; + } + np->n_dirofs = FIRST_DIROFS; np->n_dirrefs++; goto have_fid; } @@ -307,18 +327,16 @@ np->n_vcgenid == ssp->ss_vcgenid) { int upgrade = 0; - /* BEGIN CSTYLED */ if ((flag & FWRITE) && !(np->n_rights & (SA_RIGHT_FILE_WRITE_DATA | - GENERIC_RIGHT_ALL_ACCESS | - GENERIC_RIGHT_WRITE_ACCESS))) + GENERIC_RIGHT_ALL_ACCESS | + GENERIC_RIGHT_WRITE_ACCESS))) upgrade = 1; if ((flag & FREAD) && !(np->n_rights & (SA_RIGHT_FILE_READ_DATA | - GENERIC_RIGHT_ALL_ACCESS | - GENERIC_RIGHT_READ_ACCESS))) + GENERIC_RIGHT_ALL_ACCESS | + GENERIC_RIGHT_READ_ACCESS))) upgrade = 1; - /* END CSTYLED */ if (!upgrade) { /* * the existing open is good enough @@ -509,6 +527,7 @@ goto out; if ((fctx = np->n_dirseq) != NULL) { np->n_dirseq = NULL; + np->n_dirofs = 0; error = smbfs_smb_findclose(fctx, &scred); } } else { @@ -2573,49 +2592,69 @@ smbfs_readvdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, caller_context_t *ct) { - size_t dbufsiz; - struct dirent64 *dp; + /* + * Note: "limit" tells the SMB-level FindFirst/FindNext + * functions how many directory entries to request in + * each OtW call. It needs to be large enough so that + * we don't make lots of tiny OtW requests, but there's + * no point making it larger than the maximum number of + * OtW entries that would fit in a maximum sized trans2 + * response (64k / 48). Beyond that, it's just tuning. + * WinNT used 512, Win2k used 1366. We use 1000. + */ + static const int limit = 1000; + /* Largest possible dirent size. */ + static const size_t dbufsiz = DIRENT64_RECLEN(SMB_MAXFNAMELEN); struct smb_cred scred; vnode_t *newvp; struct smbnode *np = VTOSMB(vp); - int nmlen, reclen, error = 0; - long offset, limit; struct smbfs_fctx *ctx; + struct dirent64 *dp; + ssize_t save_resid; + offset_t save_offset; /* 64 bits */ + int offset; /* yes, 32 bits */ + int nmlen, error; + ushort_t reclen; ASSERT(curproc->p_zone == VTOSMI(vp)->smi_zone); /* Make sure we serialize for n_dirseq use. */ ASSERT(smbfs_rw_lock_held(&np->r_lkserlock, RW_WRITER)); - /* Min size is DIRENT64_RECLEN(256) rounded up. */ - if (uio->uio_resid < 512 || uio->uio_offset < 0) + /* + * Make sure smbfs_open filled in n_dirseq + */ + if (np->n_dirseq == NULL) + return (EBADF); + + /* Check for overflow of (32-bit) directory offset. */ + if (uio->uio_loffset < 0 || uio->uio_loffset > INT32_MAX || + (uio->uio_loffset + uio->uio_resid) > INT32_MAX) return (EINVAL); + /* Require space for at least one dirent. */ + if (uio->uio_resid < dbufsiz) + return (EINVAL); + +#ifdef USE_DNLC /* * This dnlc_purge_vp ensures that name cache for this dir will be * current - it'll only have the items for which the smbfs_nget * MAKEENTRY happened. */ -#ifdef NOT_YET if (smbfs_fastlookup) dnlc_purge_vp(vp); #endif SMBVDEBUG("dirname='%s'\n", np->n_rpath); smb_credinit(&scred, cr); - dbufsiz = DIRENT64_RECLEN(SMB_MAXFNAMELEN); dp = kmem_alloc(dbufsiz, KM_SLEEP); - offset = uio->uio_offset; /* NB: "cookie" */ - limit = uio->uio_resid / DIRENT64_RECLEN(1); - SMBVDEBUG("offset=0x%ld, limit=0x%ld\n", offset, limit); - - if (offset == 0) { - /* Don't know EOF until findclose */ - np->n_direof = -1; - } else if (offset == np->n_direof) { - /* Arrived at end of directory. */ - goto out; - } + save_resid = uio->uio_resid; + save_offset = uio->uio_loffset; + offset = uio->uio_offset; + SMBVDEBUG("in: offset=%d, resid=%d\n", + (int)uio->uio_offset, (int)uio->uio_resid); + error = 0; /* * Generate the "." and ".." entries here so we can @@ -2623,20 +2662,24 @@ * (2) deal with getting their I numbers which the * findnext below does only for normal names. */ - while (limit && offset < 2) { - limit--; + while (offset < FIRST_DIROFS) { + /* + * Tricky bit filling in the first two: + * offset 0 is ".", offset 1 is ".." + * so strlen of these is offset+1. + */ reclen = DIRENT64_RECLEN(offset + 1); + if (uio->uio_resid < reclen) + goto out; bzero(dp, reclen); - /*LINTED*/ dp->d_reclen = reclen; - /* Tricky: offset 0 is ".", offset 1 is ".." */ dp->d_name[0] = '.'; dp->d_name[1] = '.'; dp->d_name[offset + 1] = '\0'; /* * Want the real I-numbers for the "." and ".." * entries. For these two names, we know that - * smbfslookup can do this all locally. + * smbfslookup can get the nodes efficiently. */ error = smbfslookup(vp, dp->d_name, &newvp, cr, 1, ct); if (error) { @@ -2645,21 +2688,29 @@ dp->d_ino = VTOSMB(newvp)->n_ino; VN_RELE(newvp); } - dp->d_off = offset + 1; /* see d_off below */ - error = uiomove(dp, dp->d_reclen, UIO_READ, uio); + /* + * Note: d_off is the offset that a user-level program + * should seek to for reading the NEXT directory entry. + * See libc: readdir, telldir, seekdir + */ + dp->d_off = offset + 1; + error = uiomove(dp, reclen, UIO_READ, uio); if (error) goto out; + /* + * Note: uiomove updates uio->uio_offset, + * but we want it to be our "cookie" value, + * which just counts dirents ignoring size. + */ uio->uio_offset = ++offset; } - if (limit == 0) - goto out; - if (offset != np->n_dirofs || np->n_dirseq == NULL) { - SMBVDEBUG("Reopening search %ld:%ld\n", offset, np->n_dirofs); - if (np->n_dirseq) { - (void) smbfs_smb_findclose(np->n_dirseq, &scred); - np->n_dirseq = NULL; - } - np->n_dirofs = 2; + + /* + * If there was a backward seek, we have to reopen. + */ + if (offset < np->n_dirofs) { + SMBVDEBUG("Reopening search %d:%d\n", + offset, np->n_dirofs); error = smbfs_smb_findopen(np, "*", 1, SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR, &scred, &ctx); @@ -2667,72 +2718,97 @@ SMBVDEBUG("can not open search, error = %d", error); goto out; } + /* free the old one */ + (void) smbfs_smb_findclose(np->n_dirseq, &scred); + /* save the new one */ np->n_dirseq = ctx; - } else + np->n_dirofs = FIRST_DIROFS; + } else { ctx = np->n_dirseq; - while (np->n_dirofs < offset) { - if (smbfs_smb_findnext(ctx, offset - np->n_dirofs++, - &scred) != 0) { - (void) smbfs_smb_findclose(np->n_dirseq, &scred); - np->n_dirseq = NULL; - np->n_direof = np->n_dirofs; - np->n_dirofs = 0; - *eofp = 1; - error = 0; - goto out; - } } - error = 0; - for (; limit; limit--) { + + /* + * Skip entries before the requested offset. + */ + while (np->n_dirofs < offset) { error = smbfs_smb_findnext(ctx, limit, &scred); - if (error) { - if (error == EBADRPC) - error = ENOENT; - (void) smbfs_smb_findclose(np->n_dirseq, &scred); - np->n_dirseq = NULL; - np->n_direof = np->n_dirofs; - np->n_dirofs = 0; - *eofp = 1; - error = 0; - break; - } + if (error != 0) + goto out; np->n_dirofs++; + } + + /* + * While there's room in the caller's buffer: + * get a directory entry from SMB, + * convert to a dirent, copyout. + * We stop when there is no longer room for a + * maximum sized dirent because we must decide + * before we know anything about the next entry. + */ + while (uio->uio_resid >= dbufsiz) { + error = smbfs_smb_findnext(ctx, limit, &scred); + if (error != 0) + goto out; + np->n_dirofs++; + /* Sanity check the name length. */ nmlen = ctx->f_nmlen; if (nmlen > SMB_MAXFNAMELEN) { nmlen = SMB_MAXFNAMELEN; SMBVDEBUG("Truncating name: %s\n", ctx->f_name); } - reclen = DIRENT64_RECLEN(nmlen); - if (uio->uio_resid < reclen) - break; - bzero(dp, reclen); - /*LINTED*/ - dp->d_reclen = reclen; - dp->d_ino = ctx->f_attr.fa_ino; - /* - * Note: d_off is the offset that a user-level program - * should seek to for reading the _next_ directory entry. - * See libc: readdir, telldir, seekdir - */ - dp->d_off = offset + 1; - bcopy(ctx->f_name, dp->d_name, nmlen); - dp->d_name[nmlen] = '\0'; #ifdef NOT_YET if (smbfs_fastlookup) { - if (smbfs_nget(vp, ctx->f_name, - ctx->f_nmlen, &ctx->f_attr, &newvp) == 0) + if (smbfs_nget(vp, ctx->f_name, nmlen, + &ctx->f_attr, &newvp) == 0) VN_RELE(newvp); } #endif /* NOT_YET */ - error = uiomove(dp, dp->d_reclen, UIO_READ, uio); + + reclen = DIRENT64_RECLEN(nmlen); + bzero(dp, reclen); + dp->d_reclen = reclen; + bcopy(ctx->f_name, dp->d_name, nmlen); + dp->d_name[nmlen] = '\0'; + dp->d_ino = ctx->f_attr.fa_ino; + dp->d_off = offset + 1; /* See d_off comment above */ + error = uiomove(dp, reclen, UIO_READ, uio); if (error) - break; + goto out; + /* See comment re. uio_offset above. */ uio->uio_offset = ++offset; } - if (error == ENOENT) + +out: + /* + * When we come to the end of a directory, the + * SMB-level functions return ENOENT, but the + * caller is not expecting an error return. + * + * Also note that we must delay the call to + * smbfs_smb_findclose(np->n_dirseq, ...) + * until smbfs_close so that all reads at the + * end of the directory will return no data. + */ + if (error == ENOENT) { error = 0; -out: + if (eofp) + *eofp = 1; + } + /* + * If we encountered an error (i.e. "access denied") + * from the FindFirst call, we will have copied out + * the "." and ".." entries leaving offset == 2. + * In that case, restore the original offset/resid + * so the caller gets no data with the error. + */ + if (error != 0 && offset == FIRST_DIROFS) { + uio->uio_loffset = save_offset; + uio->uio_resid = save_resid; + } + SMBVDEBUG("out: offset=%d, resid=%d\n", + (int)uio->uio_offset, (int)uio->uio_resid); + kmem_free(dp, dbufsiz); smb_credrele(&scred); return (error);
--- a/usr/src/uts/common/fs/zfs/dbuf.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dbuf.c Mon Aug 17 09:14:35 2009 -0400 @@ -109,7 +109,7 @@ dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid) { dbuf_hash_table_t *h = &dbuf_hash_table; - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; uint64_t obj = dn->dn_object; uint64_t hv = DBUF_HASH(os, obj, level, blkid); uint64_t idx = hv & h->hash_table_mask; @@ -140,7 +140,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db) { dbuf_hash_table_t *h = &dbuf_hash_table; - objset_impl_t *os = db->db_objset; + objset_t *os = db->db_objset; uint64_t obj = db->db.db_object; int level = db->db_level; uint64_t blkid = db->db_blkid; @@ -894,7 +894,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) { dnode_t *dn = db->db_dnode; - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; dbuf_dirty_record_t **drp, *dr; int drop_struct_lock = FALSE; boolean_t do_free_accounting = B_FALSE; @@ -1488,7 +1488,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, dmu_buf_impl_t *parent, blkptr_t *blkptr) { - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; dmu_buf_impl_t *db, *odb; ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); @@ -2011,7 +2011,7 @@ arc_buf_t **datap = &dr->dt.dl.dr_data; dmu_buf_impl_t *db = dr->dr_dbuf; dnode_t *dn = db->db_dnode; - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; uint64_t txg = tx->tx_txg; ASSERT(dmu_tx_is_syncing(tx)); @@ -2182,7 +2182,7 @@ { dmu_buf_impl_t *db = dr->dr_dbuf; dnode_t *dn = db->db_dnode; - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; dmu_buf_impl_t *parent = db->db_parent; uint64_t txg = tx->tx_txg; zbookmark_t zb; @@ -2279,7 +2279,7 @@ { dmu_buf_impl_t *db = vdb; dnode_t *dn = db->db_dnode; - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; blkptr_t *bp = zio->io_bp; blkptr_t *bp_orig = &zio->io_bp_orig; uint64_t fill = 0;
--- a/usr/src/uts/common/fs/zfs/dmu.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dmu.c Mon Aug 17 09:14:35 2009 -0400 @@ -99,7 +99,7 @@ dmu_buf_impl_t *db; int err; - err = dnode_hold(os->os, object, FTAG, &dn); + err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); blkid = dbuf_whichblock(dn, offset); @@ -150,7 +150,7 @@ dmu_buf_impl_t *db; int error; - error = dnode_hold(os->os, object, FTAG, &dn); + error = dnode_hold(os, object, FTAG, &dn); if (error) return (error); @@ -282,7 +282,7 @@ dnode_t *dn; int err; - err = dnode_hold(os->os, object, FTAG, &dn); + err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); @@ -335,7 +335,7 @@ return; if (len == 0) { /* they're interested in the bonus buffer */ - dn = os->os->os_meta_dnode; + dn = os->os_meta_dnode; if (object == 0 || object >= DN_MAX_OBJECT) return; @@ -352,7 +352,7 @@ * already cached, we will do a *synchronous* read in the * dnode_hold() call. The same is true for any indirects. */ - err = dnode_hold(os->os, object, FTAG, &dn); + err = dnode_hold(os, object, FTAG, &dn); if (err != 0) return; @@ -484,7 +484,7 @@ dnode_t *dn; int err; - err = dnode_hold(os->os, object, FTAG, &dn); + err = dnode_hold(os, object, FTAG, &dn); if (err != 0) return (err); err = dmu_free_long_range_impl(os, dn, offset, length, FALSE); @@ -499,7 +499,7 @@ dmu_tx_t *tx; int err; - err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED, + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, FTAG, &dn); if (err != 0) return (err); @@ -527,7 +527,7 @@ uint64_t size, dmu_tx_t *tx) { dnode_t *dn; - int err = dnode_hold(os->os, object, FTAG, &dn); + int err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); ASSERT(offset < UINT64_MAX); @@ -545,7 +545,7 @@ dmu_buf_t **dbp; int numbufs, err; - err = dnode_hold(os->os, object, FTAG, &dn); + err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); @@ -852,8 +852,7 @@ dbuf_rele(db, FTAG); } else { dbuf_rele(db, FTAG); - ASSERT(dn->dn_objset->os.os == dn->dn_objset); - dmu_write(&dn->dn_objset->os, dn->dn_object, offset, blksz, + dmu_write(dn->dn_objset, dn->dn_object, offset, blksz, buf->b_data, tx); dmu_return_arcbuf(buf); } @@ -930,7 +929,7 @@ blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - objset_impl_t *os = db->db_objset; + objset_t *os = db->db_objset; dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; tx_state_t *tx = &dp->dp_tx; dbuf_dirty_record_t *dr; @@ -1078,7 +1077,7 @@ dnode_t *dn; int err; - err = dnode_hold(os->os, object, FTAG, &dn); + err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); err = dnode_set_blksz(dn, size, ibs, tx); @@ -1093,7 +1092,7 @@ dnode_t *dn; /* XXX assumes dnode_hold will not get an i/o error */ - (void) dnode_hold(os->os, object, FTAG, &dn); + (void) dnode_hold(os, object, FTAG, &dn); ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); dn->dn_checksum = checksum; dnode_setdirty(dn, tx); @@ -1107,7 +1106,7 @@ dnode_t *dn; /* XXX assumes dnode_hold will not get an i/o error */ - (void) dnode_hold(os->os, object, FTAG, &dn); + (void) dnode_hold(os, object, FTAG, &dn); ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); dn->dn_compress = compress; dnode_setdirty(dn, tx); @@ -1120,7 +1119,7 @@ dnode_t *dn; int i, err; - err = dnode_hold(os->os, object, FTAG, &dn); + err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); /* @@ -1134,7 +1133,7 @@ if (i != TXG_SIZE) { dnode_rele(dn, FTAG); txg_wait_synced(dmu_objset_pool(os), 0); - err = dnode_hold(os->os, object, FTAG, &dn); + err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); } @@ -1176,7 +1175,7 @@ dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) { dnode_t *dn; - int err = dnode_hold(os->os, object, FTAG, &dn); + int err = dnode_hold(os, object, FTAG, &dn); if (err) return (err);
--- a/usr/src/uts/common/fs/zfs/dmu_object.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dmu_object.c Mon Aug 17 09:14:35 2009 -0400 @@ -32,16 +32,15 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { - objset_impl_t *osi = os->os; uint64_t object; uint64_t L2_dnode_count = DNODES_PER_BLOCK << - (osi->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT); + (os->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT); dnode_t *dn = NULL; int restarted = B_FALSE; - mutex_enter(&osi->os_obj_lock); + mutex_enter(&os->os_obj_lock); for (;;) { - object = osi->os_obj_next; + object = os->os_obj_next; /* * Each time we polish off an L2 bp worth of dnodes * (2^13 objects), move to another L2 bp that's still @@ -51,14 +50,14 @@ */ if (P2PHASE(object, L2_dnode_count) == 0) { uint64_t offset = restarted ? object << DNODE_SHIFT : 0; - int error = dnode_next_offset(osi->os_meta_dnode, + int error = dnode_next_offset(os->os_meta_dnode, DNODE_FIND_HOLE, &offset, 2, DNODES_PER_BLOCK >> 2, 0); restarted = B_TRUE; if (error == 0) object = offset >> DNODE_SHIFT; } - osi->os_obj_next = ++object; + os->os_obj_next = ++object; /* * XXX We should check for an i/o error here and return @@ -66,19 +65,19 @@ * dmu_tx_assign(), but there is currently no mechanism * to do so. */ - (void) dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE, + (void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn); if (dn) break; if (dmu_object_next(os, &object, B_TRUE, 0) == 0) - osi->os_obj_next = object - 1; + os->os_obj_next = object - 1; } dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx); dnode_rele(dn, FTAG); - mutex_exit(&osi->os_obj_lock); + mutex_exit(&os->os_obj_lock); dmu_tx_add_new_object(tx, os, object); return (object); @@ -94,7 +93,7 @@ if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx)) return (EBADF); - err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE, FTAG, &dn); + err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn); if (err) return (err); dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx); @@ -116,7 +115,7 @@ if (object == DMU_META_DNODE_OBJECT) return (EBADF); - err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED, + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, FTAG, &dn); if (err) return (err); @@ -166,7 +165,7 @@ ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); - err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED, + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, FTAG, &dn); if (err) return (err); @@ -185,7 +184,7 @@ uint64_t offset = (*objectp + 1) << DNODE_SHIFT; int error; - error = dnode_next_offset(os->os->os_meta_dnode, + error = dnode_next_offset(os->os_meta_dnode, (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg); *objectp = offset >> DNODE_SHIFT;
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c Mon Aug 17 09:14:35 2009 -0400 @@ -45,13 +45,13 @@ spa_t * dmu_objset_spa(objset_t *os) { - return (os->os->os_spa); + return (os->os_spa); } zilog_t * dmu_objset_zil(objset_t *os) { - return (os->os->os_zil); + return (os->os_zil); } dsl_pool_t * @@ -59,82 +59,88 @@ { dsl_dataset_t *ds; - if ((ds = os->os->os_dsl_dataset) != NULL && ds->ds_dir) + if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir) return (ds->ds_dir->dd_pool); else - return (spa_get_dsl(os->os->os_spa)); + return (spa_get_dsl(os->os_spa)); } dsl_dataset_t * dmu_objset_ds(objset_t *os) { - return (os->os->os_dsl_dataset); + return (os->os_dsl_dataset); } dmu_objset_type_t dmu_objset_type(objset_t *os) { - return (os->os->os_phys->os_type); + return (os->os_phys->os_type); } void dmu_objset_name(objset_t *os, char *buf) { - dsl_dataset_name(os->os->os_dsl_dataset, buf); + dsl_dataset_name(os->os_dsl_dataset, buf); } uint64_t dmu_objset_id(objset_t *os) { - dsl_dataset_t *ds = os->os->os_dsl_dataset; + dsl_dataset_t *ds = os->os_dsl_dataset; return (ds ? ds->ds_object : 0); } +uint64_t +dmu_objset_logbias(objset_t *os) +{ + return (os->os_logbias); +} + static void checksum_changed_cb(void *arg, uint64_t newval) { - objset_impl_t *osi = arg; + objset_t *os = arg; /* * Inheritance should have been done by now. */ ASSERT(newval != ZIO_CHECKSUM_INHERIT); - osi->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); + os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE); } static void compression_changed_cb(void *arg, uint64_t newval) { - objset_impl_t *osi = arg; + objset_t *os = arg; /* * Inheritance and range checking should have been done by now. */ ASSERT(newval != ZIO_COMPRESS_INHERIT); - osi->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); + os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE); } static void copies_changed_cb(void *arg, uint64_t newval) { - objset_impl_t *osi = arg; + objset_t *os = arg; /* * Inheritance and range checking should have been done by now. */ ASSERT(newval > 0); - ASSERT(newval <= spa_max_replication(osi->os_spa)); + ASSERT(newval <= spa_max_replication(os->os_spa)); - osi->os_copies = newval; + os->os_copies = newval; } static void primary_cache_changed_cb(void *arg, uint64_t newval) { - objset_impl_t *osi = arg; + objset_t *os = arg; /* * Inheritance and range checking should have been done by now. @@ -142,13 +148,13 @@ ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || newval == ZFS_CACHE_METADATA); - osi->os_primary_cache = newval; + os->os_primary_cache = newval; } static void secondary_cache_changed_cb(void *arg, uint64_t newval) { - objset_impl_t *osi = arg; + objset_t *os = arg; /* * Inheritance and range checking should have been done by now. @@ -156,7 +162,19 @@ ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE || newval == ZFS_CACHE_METADATA); - osi->os_secondary_cache = newval; + os->os_secondary_cache = newval; +} + +static void +logbias_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + ASSERT(newval == ZFS_LOGBIAS_LATENCY || + newval == ZFS_LOGBIAS_THROUGHPUT); + os->os_logbias = newval; + if (os->os_zil) + zil_set_logbias(os->os_zil, newval); } void @@ -177,39 +195,38 @@ int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, - objset_impl_t **osip) + objset_t **osp) { - objset_impl_t *osi; + objset_t *os; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); - osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); - osi->os.os = osi; - osi->os_dsl_dataset = ds; - osi->os_spa = spa; - osi->os_rootbp = bp; - if (!BP_IS_HOLE(osi->os_rootbp)) { + os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); + os->os_dsl_dataset = ds; + os->os_spa = spa; + os->os_rootbp = bp; + if (!BP_IS_HOLE(os->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; - if (DMU_OS_IS_L2CACHEABLE(osi)) + if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_L2CACHE; - dprintf_bp(osi->os_rootbp, "reading %s", ""); + dprintf_bp(os->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ - err = arc_read_nolock(NULL, spa, osi->os_rootbp, - arc_getbuf_func, &osi->os_phys_buf, + err = arc_read_nolock(NULL, spa, os->os_rootbp, + arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { - kmem_free(osi, sizeof (objset_impl_t)); + kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; @@ -218,27 +235,27 @@ /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && - arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) { + arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, - sizeof (objset_phys_t), &osi->os_phys_buf, + sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); - bcopy(osi->os_phys_buf->b_data, buf->b_data, - arc_buf_size(osi->os_phys_buf)); - (void) arc_buf_remove_ref(osi->os_phys_buf, - &osi->os_phys_buf); - osi->os_phys_buf = buf; + bcopy(os->os_phys_buf->b_data, buf->b_data, + arc_buf_size(os->os_phys_buf)); + (void) arc_buf_remove_ref(os->os_phys_buf, + &os->os_phys_buf); + os->os_phys_buf = buf; } - osi->os_phys = osi->os_phys_buf->b_data; - osi->os_flags = osi->os_phys->os_flags; + os->os_phys = os->os_phys_buf->b_data; + os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; - osi->os_phys_buf = arc_buf_alloc(spa, size, - &osi->os_phys_buf, ARC_BUFC_METADATA); - osi->os_phys = osi->os_phys_buf->b_data; - bzero(osi->os_phys, size); + os->os_phys_buf = arc_buf_alloc(spa, size, + &os->os_phys_buf, ARC_BUFC_METADATA); + os->os_phys = os->os_phys_buf->b_data; + bzero(os->os_phys, size); } /* @@ -249,61 +266,64 @@ */ if (ds) { err = dsl_prop_register(ds, "primarycache", - primary_cache_changed_cb, osi); + primary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "secondarycache", - secondary_cache_changed_cb, osi); + secondary_cache_changed_cb, os); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", - checksum_changed_cb, osi); + checksum_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "compression", - compression_changed_cb, osi); + compression_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "copies", - copies_changed_cb, osi); + copies_changed_cb, os); + if (err == 0) + err = dsl_prop_register(ds, "logbias", + logbias_changed_cb, os); } if (err) { - VERIFY(arc_buf_remove_ref(osi->os_phys_buf, - &osi->os_phys_buf) == 1); - kmem_free(osi, sizeof (objset_impl_t)); + VERIFY(arc_buf_remove_ref(os->os_phys_buf, + &os->os_phys_buf) == 1); + kmem_free(os, sizeof (objset_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ - osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; - osi->os_compress = ZIO_COMPRESS_LZJB; - osi->os_copies = spa_max_replication(spa); - osi->os_primary_cache = ZFS_CACHE_ALL; - osi->os_secondary_cache = ZFS_CACHE_ALL; + os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; + os->os_compress = ZIO_COMPRESS_LZJB; + os->os_copies = spa_max_replication(spa); + os->os_primary_cache = ZFS_CACHE_ALL; + os->os_secondary_cache = ZFS_CACHE_ALL; } - osi->os_zil_header = osi->os_phys->os_zil_header; - osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); + os->os_zil_header = os->os_phys->os_zil_header; + os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { - list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), + list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); - list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), + list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } - list_create(&osi->os_dnodes, sizeof (dnode_t), + list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); - list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), + list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); - mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); - osi->os_meta_dnode = dnode_special_open(osi, - &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); - if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) { - osi->os_userused_dnode = dnode_special_open(osi, - &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); - osi->os_groupused_dnode = dnode_special_open(osi, - &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); + os->os_meta_dnode = dnode_special_open(os, + &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); + if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { + os->os_userused_dnode = dnode_special_open(os, + &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); + os->os_groupused_dnode = dnode_special_open(os, + &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); } /* @@ -311,117 +331,95 @@ * have ds_opening_lock */ if (ds) { - VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, - dmu_objset_evict)); + mutex_enter(&ds->ds_lock); + ASSERT(ds->ds_objset == NULL); + ds->ds_objset = os; + mutex_exit(&ds->ds_lock); } - *osip = osi; - return (0); -} - -static int -dmu_objset_open_ds_os(dsl_dataset_t *ds, objset_t *os, dmu_objset_type_t type) -{ - objset_impl_t *osi; - - mutex_enter(&ds->ds_opening_lock); - osi = dsl_dataset_get_user_ptr(ds); - if (osi == NULL) { - int err; - - err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), - ds, &ds->ds_phys->ds_bp, &osi); - if (err) { - mutex_exit(&ds->ds_opening_lock); - return (err); - } - } - mutex_exit(&ds->ds_opening_lock); - - os->os = osi; - os->os_mode = DS_MODE_NOHOLD; - - if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) - return (EINVAL); + *osp = os; return (0); } int -dmu_objset_open_ds(dsl_dataset_t *ds, dmu_objset_type_t type, objset_t **osp) +dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) { - objset_t *os; + int err = 0; + + mutex_enter(&ds->ds_opening_lock); + *osp = ds->ds_objset; + if (*osp == NULL) { + err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), + ds, &ds->ds_phys->ds_bp, osp); + } + mutex_exit(&ds->ds_opening_lock); + return (err); +} + +/* called from zpl */ +int +dmu_objset_hold(const char *name, void *tag, objset_t **osp) +{ + dsl_dataset_t *ds; int err; - os = kmem_alloc(sizeof (objset_t), KM_SLEEP); - err = dmu_objset_open_ds_os(ds, os, type); + err = dsl_dataset_hold(name, tag, &ds); if (err) - kmem_free(os, sizeof (objset_t)); - else - *osp = os; + return (err); + + err = dmu_objset_from_ds(ds, osp); + if (err) + dsl_dataset_rele(ds, tag); + return (err); } /* called from zpl */ int -dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, - objset_t **osp) +dmu_objset_own(const char *name, dmu_objset_type_t type, + boolean_t readonly, void *tag, objset_t **osp) { - objset_t *os; dsl_dataset_t *ds; int err; - ASSERT(DS_MODE_TYPE(mode) == DS_MODE_USER || - DS_MODE_TYPE(mode) == DS_MODE_OWNER); + err = dsl_dataset_own(name, B_FALSE, tag, &ds); + if (err) + return (err); - os = kmem_alloc(sizeof (objset_t), KM_SLEEP); - if (DS_MODE_TYPE(mode) == DS_MODE_USER) - err = dsl_dataset_hold(name, os, &ds); - else - err = dsl_dataset_own(name, mode, os, &ds); + err = dmu_objset_from_ds(ds, osp); if (err) { - kmem_free(os, sizeof (objset_t)); - return (err); + dsl_dataset_disown(ds, tag); + } else if ((type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) || + (!readonly && dsl_dataset_is_snapshot(ds))) { + dmu_objset_disown(*osp, tag); + return (EINVAL); } - err = dmu_objset_open_ds_os(ds, os, type); - if (err) { - if (DS_MODE_TYPE(mode) == DS_MODE_USER) - dsl_dataset_rele(ds, os); - else - dsl_dataset_disown(ds, os); - kmem_free(os, sizeof (objset_t)); - } else { - os->os_mode = mode; - *osp = os; - } return (err); } void -dmu_objset_close(objset_t *os) +dmu_objset_rele(objset_t *os, void *tag) { - ASSERT(DS_MODE_TYPE(os->os_mode) == DS_MODE_USER || - DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER || - DS_MODE_TYPE(os->os_mode) == DS_MODE_NOHOLD); + dsl_dataset_rele(os->os_dsl_dataset, tag); +} - if (DS_MODE_TYPE(os->os_mode) == DS_MODE_USER) - dsl_dataset_rele(os->os->os_dsl_dataset, os); - else if (DS_MODE_TYPE(os->os_mode) == DS_MODE_OWNER) - dsl_dataset_disown(os->os->os_dsl_dataset, os); - kmem_free(os, sizeof (objset_t)); +void +dmu_objset_disown(objset_t *os, void *tag) +{ + dsl_dataset_disown(os->os_dsl_dataset, tag); } int dmu_objset_evict_dbufs(objset_t *os) { - objset_impl_t *osi = os->os; dnode_t *dn; - mutex_enter(&osi->os_lock); + mutex_enter(&os->os_lock); /* process the mdn last, since the other dnodes have holds on it */ - list_remove(&osi->os_dnodes, osi->os_meta_dnode); - list_insert_tail(&osi->os_dnodes, osi->os_meta_dnode); + list_remove(&os->os_dnodes, os->os_meta_dnode); + list_insert_tail(&os->os_dnodes, os->os_meta_dnode); /* * Find the first dnode with holds. We have to do this dance @@ -429,93 +427,93 @@ * hold. If there are no holds then it has no dbufs so OK to * skip. */ - for (dn = list_head(&osi->os_dnodes); + for (dn = list_head(&os->os_dnodes); dn && !dnode_add_ref(dn, FTAG); - dn = list_next(&osi->os_dnodes, dn)) + dn = list_next(&os->os_dnodes, dn)) continue; while (dn) { dnode_t *next_dn = dn; do { - next_dn = list_next(&osi->os_dnodes, next_dn); + next_dn = list_next(&os->os_dnodes, next_dn); } while (next_dn && !dnode_add_ref(next_dn, FTAG)); - mutex_exit(&osi->os_lock); + mutex_exit(&os->os_lock); dnode_evict_dbufs(dn); dnode_rele(dn, FTAG); - mutex_enter(&osi->os_lock); + mutex_enter(&os->os_lock); dn = next_dn; } - mutex_exit(&osi->os_lock); - return (list_head(&osi->os_dnodes) != osi->os_meta_dnode); + mutex_exit(&os->os_lock); + return (list_head(&os->os_dnodes) != os->os_meta_dnode); } void -dmu_objset_evict(dsl_dataset_t *ds, void *arg) +dmu_objset_evict(objset_t *os) { - objset_impl_t *osi = arg; - objset_t os; + dsl_dataset_t *ds = os->os_dsl_dataset; int i; for (i = 0; i < TXG_SIZE; i++) { - ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); - ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); + ASSERT(list_head(&os->os_dirty_dnodes[i]) == NULL); + ASSERT(list_head(&os->os_free_dnodes[i]) == NULL); } if (ds) { if (!dsl_dataset_is_snapshot(ds)) { VERIFY(0 == dsl_prop_unregister(ds, "checksum", - checksum_changed_cb, osi)); + checksum_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "compression", - compression_changed_cb, osi)); + compression_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "copies", - copies_changed_cb, osi)); + copies_changed_cb, os)); + VERIFY(0 == dsl_prop_unregister(ds, "logbias", + logbias_changed_cb, os)); } VERIFY(0 == dsl_prop_unregister(ds, "primarycache", - primary_cache_changed_cb, osi)); + primary_cache_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", - secondary_cache_changed_cb, osi)); + secondary_cache_changed_cb, os)); } /* * We should need only a single pass over the dnode list, since * nothing can be added to the list at this point. */ - os.os = osi; - (void) dmu_objset_evict_dbufs(&os); + (void) dmu_objset_evict_dbufs(os); - dnode_special_close(osi->os_meta_dnode); - if (osi->os_userused_dnode) { - dnode_special_close(osi->os_userused_dnode); - dnode_special_close(osi->os_groupused_dnode); + dnode_special_close(os->os_meta_dnode); + if (os->os_userused_dnode) { + dnode_special_close(os->os_userused_dnode); + dnode_special_close(os->os_groupused_dnode); } - zil_free(osi->os_zil); + zil_free(os->os_zil); - ASSERT3P(list_head(&osi->os_dnodes), ==, NULL); + ASSERT3P(list_head(&os->os_dnodes), ==, NULL); - VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); - mutex_destroy(&osi->os_lock); - mutex_destroy(&osi->os_obj_lock); - mutex_destroy(&osi->os_user_ptr_lock); - kmem_free(osi, sizeof (objset_impl_t)); + VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); + mutex_destroy(&os->os_lock); + mutex_destroy(&os->os_obj_lock); + mutex_destroy(&os->os_user_ptr_lock); + kmem_free(os, sizeof (objset_t)); } /* called from dsl for meta-objset */ -objset_impl_t * +objset_t * dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx) { - objset_impl_t *osi; + objset_t *os; dnode_t *mdn; ASSERT(dmu_tx_is_syncing(tx)); if (ds) mutex_enter(&ds->ds_opening_lock); - VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &osi)); + VERIFY(0 == dmu_objset_open_impl(spa, ds, bp, &os)); if (ds) mutex_exit(&ds->ds_opening_lock); - mdn = osi->os_meta_dnode; + mdn = os->os_meta_dnode; dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); @@ -550,15 +548,15 @@ ASSERT(type != DMU_OST_NONE); ASSERT(type != DMU_OST_ANY); ASSERT(type < DMU_OST_NUMTYPES); - osi->os_phys->os_type = type; - if (dmu_objset_userused_enabled(osi)) { - osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; - osi->os_flags = osi->os_phys->os_flags; + os->os_phys->os_type = type; + if (dmu_objset_userused_enabled(os)) { + os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; + os->os_flags = os->os_phys->os_flags; } dsl_dataset_dirty(ds, tx); - return (osi); + return (os); } struct oscarg { @@ -613,18 +611,18 @@ if (oa->clone_origin == NULL) { dsl_dataset_t *ds; blkptr_t *bp; - objset_impl_t *osi; + objset_t *os; VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, FTAG, &ds)); bp = dsl_dataset_get_blkptr(ds); ASSERT(BP_IS_HOLE(bp)); - osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), + os = dmu_objset_create_impl(dsl_dataset_get_spa(ds), ds, bp, oa->type, tx); if (oa->userfunc) - oa->userfunc(&osi->os, oa->userarg, cr, tx); + oa->userfunc(os, oa->userarg, cr, tx); dsl_dataset_rele(ds, FTAG); } @@ -692,7 +690,7 @@ int dmu_objset_destroy(const char *name, boolean_t defer) { - objset_t *os; + dsl_dataset_t *ds; int error; /* @@ -702,15 +700,13 @@ * structure. Only the ZIL knows how to free them, so we have * to call into it here. */ - error = dmu_objset_open(name, DMU_OST_ANY, - DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os); + error = dsl_dataset_own(name, B_TRUE, FTAG, &ds); if (error == 0) { - dsl_dataset_t *ds = os->os->os_dsl_dataset; - zil_destroy(dmu_objset_zil(os), B_FALSE); - - error = dsl_dataset_destroy(ds, os, defer); + objset_t *os; + if (dmu_objset_from_ds(ds, &os) == 0) + zil_destroy(dmu_objset_zil(os), B_FALSE); + error = dsl_dataset_destroy(ds, FTAG, defer); /* dsl_dataset_destroy() closes the ds. */ - kmem_free(os, sizeof (objset_t)); } return (error); @@ -732,7 +728,7 @@ /* The props have already been checked by zfs_check_userprops(). */ - return (dsl_dataset_snapshot_check(os->os->os_dsl_dataset, + return (dsl_dataset_snapshot_check(os->os_dsl_dataset, sn->snapname, tx)); } @@ -740,7 +736,7 @@ snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { objset_t *os = arg1; - dsl_dataset_t *ds = os->os->os_dsl_dataset; + dsl_dataset_t *ds = os->os_dsl_dataset; struct snaparg *sn = arg2; dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx); @@ -767,13 +763,13 @@ (err = zfs_secpolicy_snapshot_perms(name, CRED()))) return (err); - err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os); + err = dmu_objset_hold(name, sn, &os); if (err != 0) return (err); /* If the objset is in an inconsistent state, return busy */ - if (os->os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { - dmu_objset_close(os); + if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { + dmu_objset_rele(os, sn); return (EBUSY); } @@ -787,7 +783,7 @@ dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync, os, sn, 3); } else { - dmu_objset_close(os); + dmu_objset_rele(os, sn); } return (err); @@ -827,11 +823,11 @@ for (dst = list_head(&sn.dstg->dstg_tasks); dst; dst = list_next(&sn.dstg->dstg_tasks, dst)) { objset_t *os = dst->dst_arg1; - dsl_dataset_t *ds = os->os->os_dsl_dataset; + dsl_dataset_t *ds = os->os_dsl_dataset; if (dst->dst_err) dsl_dataset_name(ds, sn.failed); zil_resume(dmu_objset_zil(os)); - dmu_objset_close(os); + dmu_objset_rele(os, &sn); } if (err) @@ -874,7 +870,7 @@ { blkptr_t *bp = zio->io_bp; blkptr_t *bp_orig = &zio->io_bp_orig; - objset_impl_t *os = arg; + objset_t *os = arg; dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; ASSERT(bp == os->os_rootbp); @@ -903,7 +899,7 @@ /* called from dsl */ void -dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) +dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; @@ -1008,7 +1004,7 @@ } boolean_t -dmu_objset_userused_enabled(objset_impl_t *os) +dmu_objset_userused_enabled(objset_t *os) { return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && used_cbs[os->os_phys->os_type] && @@ -1016,7 +1012,7 @@ } void -dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx) +dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx) { dnode_t *dn; list_t *list = &os->os_synced_dnodes; @@ -1035,10 +1031,10 @@ /* Allocate the user/groupused objects if necessary. */ if (os->os_userused_dnode->dn_type == DMU_OT_NONE) { - VERIFY(0 == zap_create_claim(&os->os, + VERIFY(0 == zap_create_claim(os, DMU_USERUSED_OBJECT, DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); - VERIFY(0 == zap_create_claim(&os->os, + VERIFY(0 == zap_create_claim(os, DMU_GROUPUSED_OBJECT, DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); } @@ -1065,7 +1061,7 @@ (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf, DN_MAX_BONUSLEN) == 0 && DN_USED_BYTES(dn->dn_oldphys) == 0)); - used_cbs[os->os_phys->os_type](&os->os, bonustype, + used_cbs[os->os_phys->os_type](os, bonustype, DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys), DN_USED_BYTES(dn->dn_oldphys), DN_USED_BYTES(dn->dn_phys), tx); @@ -1086,7 +1082,7 @@ boolean_t dmu_objset_userspace_present(objset_t *os) { - return (os->os->os_phys->os_flags & + return (os->os_phys->os_flags & OBJSET_FLAG_USERACCOUNTING_COMPLETE); } @@ -1098,7 +1094,7 @@ if (dmu_objset_userspace_present(os)) return (0); - if (!dmu_objset_userused_enabled(os->os)) + if (!dmu_objset_userused_enabled(os)) return (ENOTSUP); if (dmu_objset_is_snapshot(os)) return (EINVAL); @@ -1134,7 +1130,7 @@ dmu_tx_commit(tx); } - os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; + os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; txg_wait_synced(dmu_objset_pool(os), 0); return (0); } @@ -1143,35 +1139,35 @@ dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp, uint64_t *usedobjsp, uint64_t *availobjsp) { - dsl_dataset_space(os->os->os_dsl_dataset, refdbytesp, availbytesp, + dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp, usedobjsp, availobjsp); } uint64_t dmu_objset_fsid_guid(objset_t *os) { - return (dsl_dataset_fsid_guid(os->os->os_dsl_dataset)); + return (dsl_dataset_fsid_guid(os->os_dsl_dataset)); } void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat) { - stat->dds_type = os->os->os_phys->os_type; - if (os->os->os_dsl_dataset) - dsl_dataset_fast_stat(os->os->os_dsl_dataset, stat); + stat->dds_type = os->os_phys->os_type; + if (os->os_dsl_dataset) + dsl_dataset_fast_stat(os->os_dsl_dataset, stat); } void dmu_objset_stats(objset_t *os, nvlist_t *nv) { - ASSERT(os->os->os_dsl_dataset || - os->os->os_phys->os_type == DMU_OST_META); + ASSERT(os->os_dsl_dataset || + os->os_phys->os_type == DMU_OST_META); - if (os->os->os_dsl_dataset != NULL) - dsl_dataset_stats(os->os->os_dsl_dataset, nv); + if (os->os_dsl_dataset != NULL) + dsl_dataset_stats(os->os_dsl_dataset, nv); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE, - os->os->os_phys->os_type); + os->os_phys->os_type); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING, dmu_objset_userspace_present(os)); } @@ -1179,8 +1175,8 @@ int dmu_objset_is_snapshot(objset_t *os) { - if (os->os->os_dsl_dataset != NULL) - return (dsl_dataset_is_snapshot(os->os->os_dsl_dataset)); + if (os->os_dsl_dataset != NULL) + return (dsl_dataset_is_snapshot(os->os_dsl_dataset)); else return (B_FALSE); } @@ -1189,7 +1185,7 @@ dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen, boolean_t *conflict) { - dsl_dataset_t *ds = os->os->os_dsl_dataset; + dsl_dataset_t *ds = os->os_dsl_dataset; uint64_t ignored; if (ds->ds_phys->ds_snapnames_zapobj == 0) @@ -1204,7 +1200,7 @@ dmu_snapshot_list_next(objset_t *os, int namelen, char *name, uint64_t *idp, uint64_t *offp, boolean_t *case_conflict) { - dsl_dataset_t *ds = os->os->os_dsl_dataset; + dsl_dataset_t *ds = os->os_dsl_dataset; zap_cursor_t cursor; zap_attribute_t attr; @@ -1241,12 +1237,12 @@ dmu_dir_list_next(objset_t *os, int namelen, char *name, uint64_t *idp, uint64_t *offp) { - dsl_dir_t *dd = os->os->os_dsl_dataset->ds_dir; + dsl_dir_t *dd = os->os_dsl_dataset->ds_dir; zap_cursor_t cursor; zap_attribute_t attr; /* there is no next dir on a snapshot! */ - if (os->os->os_dsl_dataset->ds_object != + if (os->os_dsl_dataset->ds_object != dd->dd_phys->dd_head_dataset_obj) return (ENOENT); @@ -1420,7 +1416,7 @@ if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { mutex_enter(&ds->ds_opening_lock); - if (!dsl_dataset_get_user_ptr(ds)) { + if (ds->ds_objset == NULL) { uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; zbookmark_t zb; @@ -1445,13 +1441,13 @@ void dmu_objset_set_user(objset_t *os, void *user_ptr) { - ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock)); - os->os->os_user_ptr = user_ptr; + ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); + os->os_user_ptr = user_ptr; } void * dmu_objset_get_user(objset_t *os) { - ASSERT(MUTEX_HELD(&os->os->os_user_ptr_lock)); - return (os->os->os_user_ptr); + ASSERT(MUTEX_HELD(&os->os_user_ptr_lock)); + return (os->os_user_ptr); }
--- a/usr/src/uts/common/fs/zfs/dmu_send.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dmu_send.c Mon Aug 17 09:14:35 2009 -0400 @@ -216,8 +216,8 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, vnode_t *vp, offset_t *off) { - dsl_dataset_t *ds = tosnap->os->os_dsl_dataset; - dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL; + dsl_dataset_t *ds = tosnap->os_dsl_dataset; + dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; dmu_replay_record_t *drr; struct backuparg ba; int err; @@ -257,7 +257,7 @@ drr->drr_u.drr_begin.drr_version = DMU_BACKUP_STREAM_VERSION; drr->drr_u.drr_begin.drr_creation_time = ds->ds_phys->ds_creation_time; - drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type; + drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type; if (fromorigin) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE; drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; @@ -362,7 +362,7 @@ dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1, rbsa->origin, flags, cr, tx); VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj, - DS_MODE_INCONSISTENT, dmu_recv_tag, &rbsa->ds)); + B_TRUE, dmu_recv_tag, &rbsa->ds)); if (rbsa->origin == NULL) { (void) dmu_objset_create_impl(dd->dd_pool->dp_spa, @@ -431,8 +431,7 @@ /* create and open the temporary clone */ dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname, ohds->ds_prev, flags, cr, tx); - VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, - DS_MODE_INCONSISTENT, dmu_recv_tag, &cds)); + VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds)); /* * If we actually created a non-clone, we need to create the @@ -477,7 +476,7 @@ rbsa.tofs = tofs; rbsa.tosnap = tosnap; - rbsa.origin = origin ? origin->os->os_dsl_dataset : NULL; + rbsa.origin = origin ? origin->os_dsl_dataset : NULL; rbsa.fromguid = drrb->drr_fromguid; rbsa.type = drrb->drr_type; rbsa.tag = FTAG; @@ -880,7 +879,7 @@ /* * Open the objset we are modifying. */ - VERIFY(dmu_objset_open_ds(drc->drc_real_ds, DMU_OST_ANY, &os) == 0); + VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0); ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT); @@ -950,8 +949,6 @@ ASSERT(ra.err != 0); out: - dmu_objset_close(os); - if (ra.err != 0) { /* * destroy what we created, so we don't leave it in the
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dmu_tx.c Mon Aug 17 09:14:35 2009 -0400 @@ -58,9 +58,9 @@ dmu_tx_t * dmu_tx_create(objset_t *os) { - dmu_tx_t *tx = dmu_tx_create_dd(os->os->os_dsl_dataset->ds_dir); + dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir); tx->tx_objset = os; - tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os->os_dsl_dataset); + tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset); return (tx); } @@ -98,7 +98,7 @@ int err; if (object != DMU_NEW_OBJECT) { - err = dnode_hold(os->os, object, tx, &dn); + err = dnode_hold(os, object, tx, &dn); if (err) { tx->tx_err = err; return (NULL); @@ -376,7 +376,7 @@ dmu_tx_count_dnode(dmu_tx_hold_t *txh) { dnode_t *dn = txh->txh_dnode; - dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode; + dnode_t *mdn = txh->txh_tx->tx_objset->os_meta_dnode; uint64_t space = mdn->dn_datablksz + ((mdn->dn_nlevels-1) << mdn->dn_indblkshift); @@ -688,7 +688,7 @@ * access the name in this fat-zap so that we'll check * for i/o errors to the leaf blocks, etc. */ - err = zap_lookup(&dn->dn_objset->os, dn->dn_object, name, + err = zap_lookup(dn->dn_objset, dn->dn_object, name, 8, 0, NULL); if (err == EIO) { tx->tx_err = err; @@ -696,7 +696,7 @@ } } - err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add, + err = zap_count_write(dn->dn_objset, dn->dn_object, name, add, &txh->txh_space_towrite, &txh->txh_space_tooverwrite); /* @@ -771,7 +771,7 @@ dnode_t *dn = db->db_dnode; ASSERT(tx->tx_txg != 0); - ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset->os); + ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset); ASSERT3U(dn->dn_object, ==, db->db.db_object); if (tx->tx_anyobj) @@ -931,7 +931,7 @@ * assume that we won't be able to free or overwrite anything. */ if (tx->tx_objset && - dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) > + dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) > tx->tx_lastsnap_txg) { towrite += tooverwrite; tooverwrite = tofree = 0;
--- a/usr/src/uts/common/fs/zfs/dnode.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dnode.c Mon Aug 17 09:14:35 2009 -0400 @@ -272,7 +272,7 @@ } static dnode_t * -dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, +dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, uint64_t object) { dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP); @@ -309,7 +309,7 @@ static void dnode_destroy(dnode_t *dn) { - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; #ifdef ZFS_DEBUG int i; @@ -488,7 +488,7 @@ } dnode_t * -dnode_special_open(objset_impl_t *os, dnode_phys_t *dnp, uint64_t object) +dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object) { dnode_t *dn = dnode_create(os, dnp, NULL, object); DNODE_VERIFY(dn); @@ -535,7 +535,7 @@ * succeeds even for free dnodes. */ int -dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag, +dnode_hold_impl(objset_t *os, uint64_t object, int flag, void *tag, dnode_t **dnp) { int epb, idx, err; @@ -650,7 +650,7 @@ * Return held dnode if the object is allocated, NULL if not. */ int -dnode_hold(objset_impl_t *os, uint64_t object, void *tag, dnode_t **dnp) +dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp) { return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp)); } @@ -689,7 +689,7 @@ void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; uint64_t txg = tx->tx_txg; if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { @@ -1248,7 +1248,7 @@ void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx) { - objset_impl_t *os = dn->dn_objset; + objset_t *os = dn->dn_objset; dsl_dataset_t *ds = os->os_dsl_dataset; if (space > 0)
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c Mon Aug 17 09:14:35 2009 -0400 @@ -244,8 +244,8 @@ unique_remove(ds->ds_fsid_guid); - if (ds->ds_user_ptr != NULL) - ds->ds_user_evict_func(ds, ds->ds_user_ptr); + if (ds->ds_objset != NULL) + dmu_objset_evict(ds->ds_objset); if (ds->ds_prev) { dsl_dataset_drop_ref(ds->ds_prev, ds); @@ -545,17 +545,14 @@ } int -dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner, - dsl_dataset_t **dsp) +dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, + void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp); - - ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER); - + int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); if (err) return (err); - if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { - dsl_dataset_rele(*dsp, owner); + if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { + dsl_dataset_rele(*dsp, tag); *dsp = NULL; return (EBUSY); } @@ -622,18 +619,14 @@ } int -dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp) +dsl_dataset_own(const char *name, boolean_t inconsistentok, + void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold(name, owner, dsp); + int err = dsl_dataset_hold(name, tag, dsp); if (err) return (err); - if ((*dsp)->ds_phys->ds_num_children > 0 && - !DS_MODE_IS_READONLY(flags)) { - dsl_dataset_rele(*dsp, owner); - return (EROFS); - } - if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { - dsl_dataset_rele(*dsp, owner); + if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { + dsl_dataset_rele(*dsp, tag); return (EBUSY); } return (0); @@ -705,9 +698,9 @@ } void -dsl_dataset_disown(dsl_dataset_t *ds, void *owner) +dsl_dataset_disown(dsl_dataset_t *ds, void *tag) { - ASSERT((ds->ds_owner == owner && ds->ds_dbuf) || + ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); mutex_enter(&ds->ds_lock); @@ -718,20 +711,20 @@ } mutex_exit(&ds->ds_lock); if (ds->ds_dbuf) - dsl_dataset_drop_ref(ds, owner); + dsl_dataset_drop_ref(ds, tag); else dsl_dataset_evict(ds->ds_dbuf, ds); } boolean_t -dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner) +dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) { boolean_t gotit = FALSE; mutex_enter(&ds->ds_lock); if (ds->ds_owner == NULL && (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { - ds->ds_owner = owner; + ds->ds_owner = tag; if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) rw_exit(&ds->ds_rwlock); gotit = TRUE; @@ -865,16 +858,15 @@ char *dsname; dsname = kmem_asprintf("%s@%s", name, da->snapname); - err = dsl_dataset_own(dsname, DS_MODE_READONLY | DS_MODE_INCONSISTENT, - da->dstg, &ds); + err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); strfree(dsname); if (err == 0) { struct dsl_ds_destroyarg *dsda; dsl_dataset_make_exclusive(ds, da->dstg); - if (ds->ds_user_ptr) { - ds->ds_user_evict_func(ds, ds->ds_user_ptr); - ds->ds_user_ptr = NULL; + if (ds->ds_objset != NULL) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; } dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); dsda->ds = ds; @@ -958,13 +950,12 @@ int error; objset_t *os; - error = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); + error = dmu_objset_from_ds(ds, &os); if (error) return (error); if (dmu_objset_type(os) == DMU_OST_ZVOL) error = zvol_remove_minor(name); - dmu_objset_close(os); return (error); } @@ -1003,9 +994,7 @@ return (error); } #endif - error = dsl_dataset_own(name, - DS_MODE_READONLY | DS_MODE_INCONSISTENT, - tag, &origin); + error = dsl_dataset_own(name, B_TRUE, tag, &origin); kmem_free(name, namelen); if (error) return (error); @@ -1036,9 +1025,9 @@ /* Destroying a snapshot is simpler */ dsl_dataset_make_exclusive(ds, tag); - if (ds->ds_user_ptr) { - ds->ds_user_evict_func(ds, ds->ds_user_ptr); - ds->ds_user_ptr = NULL; + if (ds->ds_objset != NULL) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; } /* NOTE: defer is always B_FALSE for non-snapshots */ dsda.defer = defer; @@ -1060,7 +1049,7 @@ if (err) goto out; - err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); + err = dmu_objset_from_ds(ds, &os); if (err) goto out; @@ -1089,7 +1078,7 @@ * context, the user space accounting should be zero. */ if (ds->ds_phys->ds_bp.blk_fill == 0 && - dmu_objset_userused_enabled(os->os)) { + dmu_objset_userused_enabled(os)) { uint64_t count; ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || @@ -1098,7 +1087,6 @@ count == 0); } - dmu_objset_close(os); if (err != ESRCH) goto out; @@ -1109,7 +1097,7 @@ if (err) goto out; - if (ds->ds_user_ptr) { + if (ds->ds_objset) { /* * We need to sync out all in-flight IO before we try * to evict (the dataset evict func is trying to clear @@ -1122,9 +1110,9 @@ * Blow away the dsl_dir + head dataset. */ dsl_dataset_make_exclusive(ds, tag); - if (ds->ds_user_ptr) { - ds->ds_user_evict_func(ds, ds->ds_user_ptr); - ds->ds_user_ptr = NULL; + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; } /* @@ -1173,28 +1161,6 @@ return (err); } -void * -dsl_dataset_set_user_ptr(dsl_dataset_t *ds, - void *p, dsl_dataset_evict_func_t func) -{ - void *old; - - mutex_enter(&ds->ds_lock); - old = ds->ds_user_ptr; - if (old == NULL) { - ds->ds_user_ptr = p; - ds->ds_user_evict_func = func; - } - mutex_exit(&ds->ds_lock); - return (old); -} - -void * -dsl_dataset_get_user_ptr(dsl_dataset_t *ds) -{ - return (ds->ds_user_ptr); -} - blkptr_t * dsl_dataset_get_blkptr(dsl_dataset_t *ds) { @@ -1228,7 +1194,7 @@ if (ds == NULL) /* this is the meta-objset */ return; - ASSERT(ds->ds_user_ptr != NULL); + ASSERT(ds->ds_objset != NULL); if (ds->ds_phys->ds_next_snap_obj != 0) panic("dirtying snapshot!"); @@ -1820,12 +1786,12 @@ struct dsl_ds_destroyarg ndsda = {0}; ASSERT3P(origin, ==, dsda->rm_origin); - if (origin->ds_user_ptr) { - origin->ds_user_evict_func(origin, origin->ds_user_ptr); - origin->ds_user_ptr = NULL; + if (origin->ds_objset) { + dmu_objset_evict(origin->ds_objset); + origin->ds_objset = NULL; } - dsl_dataset_rele(origin, ds); + dsl_dataset_rele(ds->ds_prev, ds); ds->ds_prev = NULL; ndsda.ds = origin; @@ -2010,7 +1976,7 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); - ASSERT(ds->ds_user_ptr != NULL); + ASSERT(ds->ds_objset != NULL); ASSERT(ds->ds_phys->ds_next_snap_obj == 0); /* @@ -2021,7 +1987,7 @@ ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; dsl_dir_dirty(ds->ds_dir, tx); - dmu_objset_sync(ds->ds_user_ptr, zio, tx); + dmu_objset_sync(ds->ds_objset, zio, tx); } void @@ -2577,9 +2543,9 @@ dsl_dataset_t *ds = snap->ds; /* unregister props as dsl_dir is changing */ - if (ds->ds_user_ptr) { - ds->ds_user_evict_func(ds, ds->ds_user_ptr); - ds->ds_user_ptr = NULL; + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; } /* move snap name entry */ VERIFY(0 == dsl_dataset_get_snapname(ds)); @@ -2879,15 +2845,14 @@ dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); - if (csa->cds->ds_user_ptr != NULL) { - csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); - csa->cds->ds_user_ptr = NULL; + if (csa->cds->ds_objset != NULL) { + dmu_objset_evict(csa->cds->ds_objset); + csa->cds->ds_objset = NULL; } - if (csa->ohds->ds_user_ptr != NULL) { - csa->ohds->ds_user_evict_func(csa->ohds, - csa->ohds->ds_user_ptr); - csa->ohds->ds_user_ptr = NULL; + if (csa->ohds->ds_objset != NULL) { + dmu_objset_evict(csa->ohds->ds_objset); + csa->ohds->ds_objset = NULL; } /* @@ -3463,9 +3428,9 @@ */ if (!ra->own) return (EBUSY); - if (ds->ds_user_ptr) { - ds->ds_user_evict_func(ds, ds->ds_user_ptr); - ds->ds_user_ptr = NULL; + if (ds->ds_objset) { + dmu_objset_evict(ds->ds_objset); + ds->ds_objset = NULL; } } dsda.ds = ds; @@ -3557,8 +3522,7 @@ return (error); } #endif - if (!dsl_dataset_tryown(ds, - DS_MODE_READONLY | DS_MODE_INCONSISTENT, dtag)) { + if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { dsl_dataset_rele(ds, dtag); return (EBUSY); } else { @@ -3666,3 +3630,24 @@ dsl_dataset_rele(ds, FTAG); return (0); } + +/* + * Note, this fuction is used as the callback for dmu_objset_find(). We + * always return 0 so that we will continue to find and process + * inconsistent datasets, even if we encounter an error trying to + * process one of them. + */ +/* ARGSUSED */ +int +dsl_destroy_inconsistent(char *dsname, void *arg) +{ + dsl_dataset_t *ds; + + if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { + if (DS_IS_INCONSISTENT(ds)) + (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); + else + dsl_dataset_disown(ds, FTAG); + } + return (0); +}
--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c Mon Aug 17 09:14:35 2009 -0400 @@ -739,5 +739,5 @@ boolean_t dsl_delegation_on(objset_t *os) { - return (os->os->os_spa->spa_delegation); + return (os->os_spa->spa_delegation); }
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c Mon Aug 17 09:14:35 2009 -0400 @@ -703,7 +703,7 @@ */ if (first && tx->tx_objset) { int error; - dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset; + dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset; error = dsl_dataset_check_quota(ds, checkrefquota, asize, est_inflight, &used_on_disk, &ref_rsrv);
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c Mon Aug 17 09:14:35 2009 -0400 @@ -103,13 +103,12 @@ dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dsl_dir_t *dd; dsl_dataset_t *ds; - objset_impl_t *osi; rw_enter(&dp->dp_config_rwlock, RW_WRITER); - err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi); + err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, + &dp->dp_meta_objset); if (err) goto out; - dp->dp_meta_objset = &osi->os; err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, @@ -219,7 +218,7 @@ /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ if (dp->dp_meta_objset) - dmu_objset_evict(NULL, dp->dp_meta_objset->os); + dmu_objset_evict(dp->dp_meta_objset); txg_list_destroy(&dp->dp_dirty_datasets); txg_list_destroy(&dp->dp_dirty_dirs); @@ -242,13 +241,13 @@ int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); - objset_impl_t *osip; + objset_t *os; dsl_dataset_t *ds; uint64_t dsobj; /* create and open the MOS (meta-objset) */ - dp->dp_meta_objset = &dmu_objset_create_impl(spa, - NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx)->os; + dp->dp_meta_objset = dmu_objset_create_impl(spa, + NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, @@ -273,10 +272,10 @@ /* create the root objset */ VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); - osip = dmu_objset_create_impl(dp->dp_spa, ds, + os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); #ifdef _KERNEL - zfs_create_fs(&osip->os, kcred, zplprops, tx); + zfs_create_fs(os, kcred, zplprops, tx); #endif dsl_dataset_rele(ds, FTAG); @@ -293,7 +292,7 @@ dsl_dir_t *dd; dsl_dataset_t *ds; dsl_sync_task_group_t *dstg; - objset_impl_t *mosi = dp->dp_meta_objset->os; + objset_t *mos = dp->dp_meta_objset; hrtime_t start, write_time; uint64_t data_written; int err; @@ -323,7 +322,7 @@ for (ds = list_head(&dp->dp_synced_datasets); ds; ds = list_next(&dp->dp_synced_datasets, ds)) - dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx); + dmu_objset_do_userquota_callbacks(ds->ds_objset, tx); /* * Sync the datasets again to push out the changes due to @@ -358,10 +357,10 @@ dsl_pool_scrub_sync(dp, tx); start = gethrtime(); - if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL || - list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) { + if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL || + list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) { zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); - dmu_objset_sync(mosi, zio, tx); + dmu_objset_sync(mos, zio, tx); err = zio_wait(zio); ASSERT(err == 0); dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); @@ -421,8 +420,8 @@ while (ds = list_head(&dp->dp_synced_datasets)) { list_remove(&dp->dp_synced_datasets, ds); - ASSERT(ds->ds_user_ptr != NULL); - zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil); + ASSERT(ds->ds_objset != NULL); + zil_clean(ds->ds_objset->os_zil); dmu_buf_rele(ds->ds_dbuf, ds); } }
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/dsl_prop.c Mon Aug 17 09:14:35 2009 -0400 @@ -552,7 +552,7 @@ int dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local) { - dsl_dataset_t *ds = os->os->os_dsl_dataset; + dsl_dataset_t *ds = os->os_dsl_dataset; dsl_dir_t *dd = ds->ds_dir; boolean_t snapshot = dsl_dataset_is_snapshot(ds); int err = 0;
--- a/usr/src/uts/common/fs/zfs/spa.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/spa.c Mon Aug 17 09:14:35 2009 -0400 @@ -371,12 +371,14 @@ break; } - if (error = dmu_objset_open(strval, DMU_OST_ZFS, - DS_MODE_USER | DS_MODE_READONLY, &os)) + if (error = dmu_objset_hold(strval, FTAG, &os)) break; - /* We don't support gzip bootable datasets */ - if ((error = dsl_prop_get_integer(strval, + /* Must be ZPL and not gzip compressed. */ + + if (dmu_objset_type(os) != DMU_OST_ZFS) { + error = ENOTSUP; + } else if ((error = dsl_prop_get_integer(strval, zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL)) == 0 && !BOOTFS_COMPRESS_VALID(compress)) { @@ -384,7 +386,7 @@ } else { objnum = dmu_objset_id(os); } - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); } break; @@ -1598,6 +1600,12 @@ */ if (vdev_resilver_needed(rvd, NULL, NULL)) spa_async_request(spa, SPA_ASYNC_RESILVER); + + /* + * Delete any inconsistent datasets. + */ + (void) dmu_objset_find(spa_name(spa), + dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); } error = 0;
--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h Mon Aug 17 09:14:35 2009 -0400 @@ -75,7 +75,6 @@ DB_EVICTING } dbuf_states_t; -struct objset_impl; struct dnode; struct dmu_tx; @@ -148,7 +147,7 @@ dmu_buf_t db; /* the objset we belong to */ - struct objset_impl *db_objset; + struct objset *db_objset; /* * the dnode we belong to (NULL when evicted)
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h Mon Aug 17 09:14:35 2009 -0400 @@ -59,7 +59,6 @@ struct zbookmark; struct spa; struct nvlist; -struct objset_impl; struct arc_buf; typedef struct objset objset_t; @@ -140,16 +139,6 @@ void zfs_acl_byteswap(void *buf, size_t size); void zfs_znode_byteswap(void *buf, size_t size); -#define DS_MODE_NOHOLD 0 /* internal use only */ -#define DS_MODE_USER 1 /* simple access, no special needs */ -#define DS_MODE_OWNER 2 /* the "main" access, e.g. a mount */ -#define DS_MODE_TYPE_MASK 0x3 -#define DS_MODE_TYPE(x) ((x) & DS_MODE_TYPE_MASK) -#define DS_MODE_READONLY 0x8 -#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY) -#define DS_MODE_INCONSISTENT 0x10 -#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT) - #define DS_FIND_SNAPSHOTS (1<<0) #define DS_FIND_CHILDREN (1<<1) @@ -166,11 +155,13 @@ /* * Public routines to create, destroy, open, and close objsets. */ -int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, - objset_t **osp); -int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type, - objset_t **osp); -void dmu_objset_close(objset_t *os); +int dmu_objset_hold(const char *name, void *tag, objset_t **osp); +int dmu_objset_own(const char *name, dmu_objset_type_t type, + boolean_t readonly, void *tag, objset_t **osp); +void dmu_objset_rele(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, void *tag); +int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); + int dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); @@ -309,7 +300,7 @@ * Decide how many copies of a given block we should make. Can be from * 1 to SPA_DVAS_PER_BP. */ -int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb, +int dmu_get_replication_level(objset_t *os, struct zbookmark *zb, dmu_object_type_t ot); /* * The bonus data is accessed more or less like a regular buffer. @@ -575,6 +566,7 @@ extern void dmu_objset_name(objset_t *os, char *buf); extern dmu_objset_type_t dmu_objset_type(objset_t *os); extern uint64_t dmu_objset_id(objset_t *os); +extern uint64_t dmu_objset_logbias(objset_t *os); extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name, uint64_t *id, uint64_t *offp, boolean_t *case_conflict); extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
--- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h Mon Aug 17 09:14:35 2009 -0400 @@ -210,8 +210,7 @@ * * ds_lock * protects: - * ds_user_ptr - * ds_user_evict_func + * ds_objset * ds_open_refcount * ds_snapname * ds_phys accounting
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h Mon Aug 17 09:14:35 2009 -0400 @@ -40,7 +40,6 @@ struct dsl_dataset; struct dmu_tx; -struct objset_impl; #define OBJSET_PHYS_SIZE 2048 #define OBJSET_OLD_PHYS_SIZE 1024 @@ -59,11 +58,6 @@ } objset_phys_t; struct objset { - struct objset_impl *os; - int os_mode; -}; - -typedef struct objset_impl { /* Immutable: */ struct dsl_dataset *os_dsl_dataset; spa_t *os_spa; @@ -73,12 +67,12 @@ dnode_t *os_userused_dnode; dnode_t *os_groupused_dnode; zilog_t *os_zil; - objset_t os; uint8_t os_checksum; /* can change, under dsl_dir's locks */ uint8_t os_compress; /* can change, under dsl_dir's locks */ uint8_t os_copies; /* can change, under dsl_dir's locks */ uint8_t os_primary_cache; /* can change, under dsl_dir's locks */ uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */ + uint8_t os_logbias; /* can change, under dsl_dir's locks */ /* no lock needed: */ struct dmu_tx *os_synctx; /* XXX sketchy */ @@ -101,7 +95,7 @@ /* stuff we store for the user */ kmutex_t os_user_ptr_lock; void *os_user_ptr; -} objset_impl_t; +}; #define DMU_META_DNODE_OBJECT 0 #define DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0) @@ -111,9 +105,13 @@ (os)->os_secondary_cache == ZFS_CACHE_METADATA) /* called from zpl */ -int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode, - objset_t **osp); -void dmu_objset_close(objset_t *os); +int dmu_objset_hold(const char *name, void *tag, objset_t **osp); +int dmu_objset_own(const char *name, dmu_objset_type_t type, + boolean_t readonly, void *tag, objset_t **osp); +void dmu_objset_rele(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, void *tag); +int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); + int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin, @@ -135,14 +133,14 @@ int dmu_objset_evict_dbufs(objset_t *os); /* called from dsl */ -void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx); -objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds, +void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); +objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx); int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, - objset_impl_t **osip); -void dmu_objset_evict(struct dsl_dataset *ds, void *arg); -void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx); -boolean_t dmu_objset_userused_enabled(objset_impl_t *os); + objset_t **osp); +void dmu_objset_evict(objset_t *os); +void dmu_objset_do_userquota_callbacks(objset_t *os, dmu_tx_t *tx); +boolean_t dmu_objset_userused_enabled(objset_t *os); int dmu_objset_userspace_upgrade(objset_t *os); boolean_t dmu_objset_userspace_present(objset_t *os);
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/dnode.h Mon Aug 17 09:14:35 2009 -0400 @@ -88,7 +88,7 @@ #define EPB(blkshift, typeshift) (1 << (blkshift - typeshift)) struct dmu_buf_impl; -struct objset_impl; +struct objset; struct zio; enum dnode_dirtycontext { @@ -136,7 +136,7 @@ list_node_t dn_link; /* immutable: */ - struct objset_impl *dn_objset; + struct objset *dn_objset; uint64_t dn_object; struct dmu_buf_impl *dn_dbuf; dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */ @@ -202,14 +202,14 @@ uint64_t fr_nblks; } free_range_t; -dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp, +dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp, uint64_t object); void dnode_special_close(dnode_t *dn); void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx); -int dnode_hold(struct objset_impl *dd, uint64_t object, +int dnode_hold(struct objset *dd, uint64_t object, void *ref, dnode_t **dnp); -int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag, +int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, void *ref, dnode_t **dnp); boolean_t dnode_add_ref(dnode_t *dn, void *ref); void dnode_rele(dnode_t *dn, void *ref);
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h Mon Aug 17 09:14:35 2009 -0400 @@ -42,8 +42,6 @@ struct dsl_dir; struct dsl_pool; -typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *); - #define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_IS_INCONSISTENT(ds) \ ((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) @@ -132,8 +130,7 @@ * Protected by ds_lock: */ kmutex_t ds_lock; - void *ds_user_ptr; - dsl_dataset_evict_func_t *ds_user_evict_func; + objset_t *ds_objset; uint64_t ds_userrefs; /* @@ -174,17 +171,17 @@ int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp); int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, dsl_dataset_t **); -int dsl_dataset_own(const char *name, int flags, void *owner, - dsl_dataset_t **dsp); +int dsl_dataset_own(const char *name, boolean_t inconsistentok, + void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, - int flags, void *owner, dsl_dataset_t **); + boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp); void dsl_dataset_name(dsl_dataset_t *ds, char *name); void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); -void dsl_dataset_disown(dsl_dataset_t *ds, void *owner); +void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, - void *owner); -void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner); + void *tag); +void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, @@ -205,10 +202,6 @@ boolean_t recursive); int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp); -void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds, - void *p, dsl_dataset_evict_func_t func); -void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds); - blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds); void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx); @@ -245,6 +238,8 @@ int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation, dmu_tx_t *tx); +int dsl_destroy_inconsistent(char *dsname, void *arg); + #ifdef ZFS_DEBUG #define dprintf_ds(ds, fmt, ...) do { \ if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
--- a/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/zfs_vfsops.h Mon Aug 17 09:14:35 2009 -0400 @@ -132,8 +132,8 @@ extern uint_t zfs_fsyncer_key; -extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode); -extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode); +extern int zfs_suspend_fs(zfsvfs_t *zfsvfs); +extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname); extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, const char *domain, uint64_t rid, uint64_t *valuep); extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, @@ -143,7 +143,7 @@ extern boolean_t zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid); extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers); -extern int zfsvfs_create(const char *name, int mode, zfsvfs_t **zvp); +extern int zfsvfs_create(const char *name, zfsvfs_t **zvp); extern void zfsvfs_free(zfsvfs_t *zfsvfs); #ifdef __cplusplus
--- a/usr/src/uts/common/fs/zfs/sys/zil.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/zil.h Mon Aug 17 09:14:35 2009 -0400 @@ -397,6 +397,8 @@ extern void zil_add_block(zilog_t *zilog, blkptr_t *bp); +extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval); + extern int zil_disable; #ifdef __cplusplus
--- a/usr/src/uts/common/fs/zfs/sys/zil_impl.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/zil_impl.h Mon Aug 17 09:14:35 2009 -0400 @@ -83,6 +83,7 @@ uint8_t zl_stop_sync; /* for debugging */ uint8_t zl_writer; /* boolean: write setup in progress */ uint8_t zl_log_error; /* boolean: log write error */ + uint8_t zl_logbias; /* latency or throughput */ list_t zl_itx_list; /* in-memory itx list */ uint64_t zl_itx_list_sz; /* total size of records on list */ uint64_t zl_cur_used; /* current commit log size used */
--- a/usr/src/uts/common/fs/zfs/sys/zio.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/sys/zio.h Mon Aug 17 09:14:35 2009 -0400 @@ -383,7 +383,7 @@ boolean_t labels); extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, - blkptr_t *old_bp, uint64_t txg); + blkptr_t *old_bp, uint64_t txg, boolean_t bypass_slog); extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg); extern void zio_flush(zio_t *zio, vdev_t *vd);
--- a/usr/src/uts/common/fs/zfs/zfs_acl.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/zfs_acl.c Mon Aug 17 09:14:35 2009 -0400 @@ -2011,8 +2011,6 @@ if (mask & VSA_ACE) { size_t aclsz; - zfs_acl_node_t *aclnode = list_head(&aclp->z_acl); - aclsz = count * sizeof (ace_t) + sizeof (ace_object_t) * largeace; @@ -2023,8 +2021,17 @@ zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr, vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES)); else { - bcopy(aclnode->z_acldata, vsecp->vsa_aclentp, - count * sizeof (ace_t)); + zfs_acl_node_t *aclnode; + void *start = vsecp->vsa_aclentp; + + for (aclnode = list_head(&aclp->z_acl); aclnode; + aclnode = list_next(&aclp->z_acl, aclnode)) { + bcopy(aclnode->z_acldata, start, + aclnode->z_size); + start = (caddr_t)start + aclnode->z_size; + } + ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp == + aclp->z_acl_bytes); } } if (mask & VSA_ACE_ACLFLAGS) {
--- a/usr/src/uts/common/fs/zfs/zfs_ctldir.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/zfs_ctldir.c Mon Aug 17 09:14:35 2009 -0400 @@ -853,8 +853,7 @@ */ return (err == EILSEQ ? ENOENT : err); } - if (dmu_objset_open(snapname, DMU_OST_ZFS, - DS_MODE_USER | DS_MODE_READONLY, &snap) != 0) { + if (dmu_objset_hold(snapname, FTAG, &snap) != 0) { mutex_exit(&sdp->sd_lock); ZFS_EXIT(zfsvfs); return (ENOENT); @@ -866,7 +865,7 @@ *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); avl_insert(&sdp->sd_snaps, sep, where); - dmu_objset_close(snap); + dmu_objset_rele(snap, FTAG); domount: mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) + strlen("/.zfs/snapshot/") + strlen(nm) + 1;
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c Mon Aug 17 09:14:35 2009 -0400 @@ -175,22 +175,15 @@ static boolean_t zfs_is_bootfs(const char *name) { - spa_t *spa; - boolean_t ret = B_FALSE; - - if (spa_open(name, &spa, FTAG) == 0) { - if (spa->spa_bootfs) { - objset_t *os; - - if (dmu_objset_open(name, DMU_OST_ZFS, - DS_MODE_USER | DS_MODE_READONLY, &os) == 0) { - ret = (dmu_objset_id(os) == spa->spa_bootfs); - dmu_objset_close(os); - } - } - spa_close(spa, FTAG); + objset_t *os; + + if (dmu_objset_hold(name, FTAG, &os) == 0) { + boolean_t ret; + ret = (dmu_objset_id(os) == dmu_objset_spa(os)->spa_bootfs); + dmu_objset_rele(os, FTAG); + return (ret); } - return (ret); + return (B_FALSE); } /* @@ -224,13 +217,17 @@ objset_t *os; boolean_t rc = B_TRUE; - if (dmu_objset_open(name, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &os) == 0) { + if (dmu_objset_hold(name, FTAG, &os) == 0) { uint64_t zplversion; + if (dmu_objset_type(os) != DMU_OST_ZFS) { + dmu_objset_rele(os, FTAG); + return (B_TRUE); + } + /* XXX reading from non-owned objset */ if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0) rc = zplversion < version; - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); } return (rc); } @@ -548,20 +545,19 @@ if (error) return (error); - error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &clone); + error = dmu_objset_hold(zc->zc_name, FTAG, &clone); if (error == 0) { dsl_dataset_t *pclone = NULL; dsl_dir_t *dd; - dd = clone->os->os_dsl_dataset->ds_dir; + dd = clone->os_dsl_dataset->ds_dir; rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); error = dsl_dataset_hold_obj(dd->dd_pool, dd->dd_phys->dd_origin_obj, FTAG, &pclone); rw_exit(&dd->dd_pool->dp_config_rwlock); if (error) { - dmu_objset_close(clone); + dmu_objset_rele(clone, FTAG); return (error); } @@ -569,7 +565,7 @@ ZFS_DELEG_PERM_MOUNT, cr); dsl_dataset_name(pclone, parentname); - dmu_objset_close(clone); + dmu_objset_rele(clone, FTAG); dsl_dataset_rele(pclone, FTAG); if (error == 0) error = zfs_secpolicy_write_perms(parentname, @@ -840,20 +836,23 @@ objset_t *os; int error; - error = dmu_objset_open(dsname, DMU_OST_ZFS, - DS_MODE_USER | DS_MODE_READONLY, &os); + error = dmu_objset_hold(dsname, FTAG, &os); if (error) return (error); - - mutex_enter(&os->os->os_user_ptr_lock); + if (dmu_objset_type(os) != DMU_OST_ZFS) { + dmu_objset_rele(os, FTAG); + return (EINVAL); + } + + mutex_enter(&os->os_user_ptr_lock); *zvp = dmu_objset_get_user(os); if (*zvp) { VFS_HOLD((*zvp)->z_vfs); } else { error = ESRCH; } - mutex_exit(&os->os->os_user_ptr_lock); - dmu_objset_close(os); + mutex_exit(&os->os_user_ptr_lock); + dmu_objset_rele(os, FTAG); return (error); } @@ -862,13 +861,12 @@ * case its z_vfs will be NULL, and it will be opened as the owner. */ static int -zfsvfs_hold(const char *name, boolean_t readonly, void *tag, zfsvfs_t **zvp) +zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zvp) { int error = 0; - int mode = DS_MODE_OWNER | (readonly ? DS_MODE_READONLY : 0); if (getzfsvfs(name, zvp) != 0) - error = zfsvfs_create(name, mode, zvp); + error = zfsvfs_create(name, zvp); if (error == 0) { rrw_enter(&(*zvp)->z_teardown_lock, RW_READER, tag); if ((*zvp)->z_unmounted) { @@ -892,7 +890,7 @@ if (zfsvfs->z_vfs) { VFS_RELE(zfsvfs->z_vfs); } else { - dmu_objset_close(zfsvfs->z_os); + dmu_objset_disown(zfsvfs->z_os, zfsvfs); zfsvfs_free(zfsvfs); } } @@ -1186,18 +1184,30 @@ return (0); } +/* + * inputs: + * zc_name name of filesystem + * zc_obj object to find + * + * outputs: + * zc_value name of object + */ static int zfs_ioc_obj_to_path(zfs_cmd_t *zc) { - objset_t *osp; + objset_t *os; int error; - if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS, - DS_MODE_USER | DS_MODE_READONLY, &osp)) != 0) + /* XXX reading from objset not owned */ + if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) return (error); - error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value, + if (dmu_objset_type(os) != DMU_OST_ZFS) { + dmu_objset_rele(os, FTAG); + return (EINVAL); + } + error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value, sizeof (zc->zc_value)); - dmu_objset_close(osp); + dmu_objset_rele(os, FTAG); return (error); } @@ -1380,8 +1390,7 @@ int error; nvlist_t *nv; - if (error = dmu_objset_open(zc->zc_name, - DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os)) + if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) return (error); dmu_objset_fast_stat(os, &zc->zc_objset_stats); @@ -1394,6 +1403,7 @@ * which we aren't supposed to do with a * DS_MODE_USER hold, because it could be * inconsistent. So this is a bit of a workaround... + * XXX reading with out owning */ if (!zc->zc_objset_stats.dds_inconsistent) { if (dmu_objset_type(os) == DMU_OST_ZVOL) @@ -1403,7 +1413,7 @@ nvlist_free(nv); } - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); return (error); } @@ -1438,8 +1448,8 @@ objset_t *os; int err; - if (err = dmu_objset_open(zc->zc_name, - DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os)) + /* XXX reading without owning */ + if (err = dmu_objset_hold(zc->zc_name, FTAG, &os)) return (err); dmu_objset_fast_stat(os, &zc->zc_objset_stats); @@ -1464,7 +1474,7 @@ } else { err = ENOENT; } - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); return (err); } @@ -1505,8 +1515,7 @@ int error; char *p; - if (error = dmu_objset_open(zc->zc_name, - DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os)) { + if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) { if (error == ENOENT) error = ESRCH; return (error); @@ -1536,7 +1545,7 @@ if (error == ENOENT) error = ESRCH; } while (error == 0 && dataset_name_hidden(zc->zc_name)); - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); if (error == 0) error = zfs_ioc_objset_stats(zc); /* fill in the stats */ @@ -1562,8 +1571,7 @@ objset_t *os; int error; - error = dmu_objset_open(zc->zc_name, - DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os); + error = dmu_objset_hold(zc->zc_name, FTAG, &os); if (error) return (error == ENOENT ? ESRCH : error); @@ -1576,14 +1584,14 @@ * so exit immediately. */ if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) { - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); return (ESRCH); } error = dmu_snapshot_list_next(os, sizeof (zc->zc_name) - strlen(zc->zc_name), zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL); - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); if (error == 0) error = zfs_ioc_objset_stats(zc); /* fill in the stats */ else if (error == ENOENT) @@ -1726,8 +1734,7 @@ domain = propname + strlen(zfs_userquota_prop_prefixes[type]); - error = zfsvfs_hold(name, B_FALSE, FTAG, - &zfsvfs); + error = zfsvfs_hold(name, FTAG, &zfsvfs); if (error == 0) { error = zfs_set_userquota(zfsvfs, type, domain, rid, quota); @@ -1794,8 +1801,7 @@ if ((error = nvpair_value_uint64(elem, &intval)) != 0) goto out; - if ((error = zfsvfs_hold(name, B_FALSE, FTAG, - &zfsvfs)) != 0) + if ((error = zfsvfs_hold(name, FTAG, &zfsvfs)) != 0) goto out; error = zfs_set_version(zfsvfs, intval); zfsvfs_rele(zfsvfs, FTAG); @@ -1911,13 +1917,12 @@ nvlist_t *origprops; objset_t *os; - if (dmu_objset_open(zc->zc_name, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &os) == 0) { + if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) { if (dsl_prop_get_all(os, &origprops, TRUE) == 0) { clear_props(zc->zc_name, origprops, nvl); nvlist_free(origprops); } - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); } } @@ -2316,13 +2321,12 @@ /* * Open parent object set so we can inherit zplprop values. */ - if ((error = dmu_objset_open(parentname, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) + if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0) return (error); error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, createprops, zplprops, is_ci); - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); return (error); } @@ -2399,15 +2403,14 @@ return (EINVAL); } - error = dmu_objset_open(zc->zc_value, type, - DS_MODE_USER | DS_MODE_READONLY, &clone); + error = dmu_objset_hold(zc->zc_value, FTAG, &clone); if (error) { nvlist_free(nvprops); return (error); } error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0); - dmu_objset_close(clone); + dmu_objset_rele(clone, FTAG); if (error) { nvlist_free(nvprops); return (error); @@ -2644,7 +2647,7 @@ if (error) goto out; - error = dsl_dataset_own(clone_name, DS_MODE_INCONSISTENT, FTAG, &clone); + error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone); if (error) goto out; @@ -2652,9 +2655,7 @@ * Do clone swap. */ if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { - int mode; - - error = zfs_suspend_fs(zfsvfs, NULL, &mode); + error = zfs_suspend_fs(zfsvfs); if (error == 0) { int resume_err; @@ -2666,7 +2667,7 @@ } else { error = EBUSY; } - resume_err = zfs_resume_fs(zfsvfs, zc->zc_name, mode); + resume_err = zfs_resume_fs(zfsvfs, zc->zc_name); error = error ? error : resume_err; } VFS_RELE(zfsvfs->z_vfs); @@ -2796,20 +2797,18 @@ return (EBADF); } - if (props && dmu_objset_open(tofs, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &os) == 0) { + if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) { /* * If new properties are supplied, they are to completely * replace the existing ones, so stash away the existing ones. */ - (void) dsl_prop_get_all(os, &origprops, TRUE); - - dmu_objset_close(os); + (void) dsl_prop_get_all(os, &origprops, B_TRUE); + + dmu_objset_rele(os, FTAG); } if (zc->zc_string[0]) { - error = dmu_objset_open(zc->zc_string, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &origin); + error = dmu_objset_hold(zc->zc_string, FTAG, &origin); if (error) goto out; } @@ -2817,7 +2816,7 @@ error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record, force, origin, &drc); if (origin) - dmu_objset_close(origin); + dmu_objset_rele(origin, FTAG); if (error) goto out; @@ -2842,11 +2841,8 @@ if (getzfsvfs(tofs, &zfsvfs) == 0) { /* online recv */ int end_err; - char *osname; - int mode; - - osname = kmem_alloc(MAXNAMELEN, KM_SLEEP); - error = zfs_suspend_fs(zfsvfs, osname, &mode); + + error = zfs_suspend_fs(zfsvfs); /* * If the suspend fails, then the recv_end will * likely also fail, and clean up after itself. @@ -2854,12 +2850,11 @@ end_err = dmu_recv_end(&drc); if (error == 0) { int resume_err = - zfs_resume_fs(zfsvfs, osname, mode); + zfs_resume_fs(zfsvfs, tofs); error = error ? error : resume_err; } error = error ? error : end_err; VFS_RELE(zfsvfs->z_vfs); - kmem_free(osname, MAXNAMELEN); } else { error = dmu_recv_end(&drc); } @@ -2901,8 +2896,7 @@ int error; offset_t off; - error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &tosnap); + error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap); if (error) return (error); @@ -2916,20 +2910,19 @@ if (cp) *(cp+1) = 0; (void) strncat(buf, zc->zc_value, MAXPATHLEN); - error = dmu_objset_open(buf, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &fromsnap); + error = dmu_objset_hold(buf, FTAG, &fromsnap); kmem_free(buf, MAXPATHLEN); if (error) { - dmu_objset_close(tosnap); + dmu_objset_rele(tosnap, FTAG); return (error); } } fp = getf(zc->zc_cookie); if (fp == NULL) { - dmu_objset_close(tosnap); + dmu_objset_rele(tosnap, FTAG); if (fromsnap) - dmu_objset_close(fromsnap); + dmu_objset_rele(fromsnap, FTAG); return (EBADF); } @@ -2940,8 +2933,8 @@ fp->f_offset = off; releasef(zc->zc_cookie); if (fromsnap) - dmu_objset_close(fromsnap); - dmu_objset_close(tosnap); + dmu_objset_rele(fromsnap, FTAG); + dmu_objset_rele(tosnap, FTAG); return (error); } @@ -3099,7 +3092,7 @@ if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); - error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs); + error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs); if (error) return (error); @@ -3127,7 +3120,7 @@ zfsvfs_t *zfsvfs; int error; - error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs); + error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs); if (error) return (error); @@ -3163,30 +3156,27 @@ zfsvfs_t *zfsvfs; if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { - if (!dmu_objset_userused_enabled(zfsvfs->z_os->os)) { + if (!dmu_objset_userused_enabled(zfsvfs->z_os)) { /* * If userused is not enabled, it may be because the * objset needs to be closed & reopened (to grow the * objset_phys_t). Suspend/resume the fs will do that. */ - int mode; - error = zfs_suspend_fs(zfsvfs, NULL, &mode); - if (error == 0) { - error = zfs_resume_fs(zfsvfs, - zc->zc_name, mode); - } + error = zfs_suspend_fs(zfsvfs); + if (error == 0) + error = zfs_resume_fs(zfsvfs, zc->zc_name); } if (error == 0) error = dmu_objset_userspace_upgrade(zfsvfs->z_os); VFS_RELE(zfsvfs->z_vfs); } else { - error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, - DS_MODE_USER, &os); + /* XXX kind of reading contents without owning */ + error = dmu_objset_hold(zc->zc_name, FTAG, &os); if (error) return (error); error = dmu_objset_userspace_upgrade(os); - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); } return (error);
--- a/usr/src/uts/common/fs/zfs/zfs_log.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/zfs_log.c Mon Aug 17 09:14:35 2009 -0400 @@ -474,14 +474,19 @@ itx_wr_state_t write_state; boolean_t slogging; uintptr_t fsync_cnt; + ssize_t immediate_write_sz; if (zilog == NULL || zp->z_unlinked) return; ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - slogging = spa_has_slogs(zilog->zl_spa); - if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz) + immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + ? 0 : zfs_immediate_write_sz; + + slogging = spa_has_slogs(zilog->zl_spa) && + (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz) write_state = WR_INDIRECT; else if (ioflag & (FSYNC | FDSYNC)) write_state = WR_COPIED;
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c Mon Aug 17 09:14:35 2009 -0400 @@ -835,32 +835,30 @@ } int -zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp) +zfsvfs_create(const char *osname, zfsvfs_t **zvp) { objset_t *os; zfsvfs_t *zfsvfs; uint64_t zval; int i, error; - if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL)) - return (error); - if (zval) - mode |= DS_MODE_READONLY; + zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); - error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); - if (error == EROFS) { - mode |= DS_MODE_READONLY; - error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os); + /* + * We claim to always be readonly so we can open snapshots; + * other ZPL code will prevent us from writing to snapshots. + */ + error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); + if (error) { + kmem_free(zfsvfs, sizeof (zfsvfs_t)); + return (error); } - if (error) - return (error); /* * Initialize the zfs-specific filesystem structure. * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ - zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs->z_vfs = NULL; zfsvfs->z_parent = zfsvfs; zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; @@ -948,7 +946,7 @@ return (0); out: - dmu_objset_close(os); + dmu_objset_disown(os, zfsvfs); *zvp = NULL; kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); @@ -966,9 +964,9 @@ /* * Set the objset user_ptr to track its zfsvfs. */ - mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); + mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); dmu_objset_set_user(zfsvfs->z_os, zfsvfs); - mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); + mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); if (zil_disable) { @@ -1084,7 +1082,7 @@ ASSERT(vfsp); ASSERT(osname); - error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs); + error = zfsvfs_create(osname, &zfsvfs); if (error) return (error); zfsvfs->z_vfs = vfsp; @@ -1146,9 +1144,9 @@ xattr_changed_cb(zfsvfs, pval); zfsvfs->z_issnap = B_TRUE; - mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); + mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); dmu_objset_set_user(zfsvfs->z_os, zfsvfs); - mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); + mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); } else { error = zfsvfs_setup(zfsvfs, B_TRUE); } @@ -1157,7 +1155,7 @@ zfsctl_create(zfsvfs); out: if (error) { - dmu_objset_close(zfsvfs->z_os); + dmu_objset_disown(zfsvfs->z_os, zfsvfs); zfsvfs_free(zfsvfs); } else { atomic_add_32(&zfs_active_fs_count, 1); @@ -1725,14 +1723,14 @@ /* * Unset the objset user_ptr. */ - mutex_enter(&os->os->os_user_ptr_lock); + mutex_enter(&os->os_user_ptr_lock); dmu_objset_set_user(os, NULL); - mutex_exit(&os->os->os_user_ptr_lock); + mutex_exit(&os->os_user_ptr_lock); /* * Finally release the objset */ - dmu_objset_close(os); + dmu_objset_disown(os, zfsvfs); } /* @@ -1835,17 +1833,13 @@ * 'z_teardown_inactive_lock' write held. */ int -zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep) +zfs_suspend_fs(zfsvfs_t *zfsvfs) { int error; if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) return (error); - - *modep = zfsvfs->z_os->os_mode; - if (name) - dmu_objset_name(zfsvfs->z_os, name); - dmu_objset_close(zfsvfs->z_os); + dmu_objset_disown(zfsvfs->z_os, zfsvfs); return (0); } @@ -1854,14 +1848,15 @@ * Reopen zfsvfs_t::z_os and release VOPs. */ int -zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) +zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) { int err; ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); - err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); + err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, + &zfsvfs->z_os); if (err) { zfsvfs->z_os = NULL; } else {
--- a/usr/src/uts/common/fs/zfs/zil.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/zil.c Mon Aug 17 09:14:35 2009 -0400 @@ -367,7 +367,7 @@ } error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk, - NULL, txg); + NULL, txg, zilog->zl_logbias != ZFS_LOGBIAS_LATENCY); if (error == 0) zil_init_log_chain(zilog, &blk); @@ -500,7 +500,7 @@ objset_t *os; int error; - error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); + error = dmu_objset_hold(osname, FTAG, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); @@ -549,7 +549,7 @@ } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); return (0); } @@ -571,7 +571,7 @@ zil_trailer_t *ztp; int error; - error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); + error = dmu_objset_hold(osname, FTAG, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); @@ -581,7 +581,7 @@ zh = zil_header_in_syncing_context(zilog); blk = zh->zh_log; if (BP_IS_HOLE(&blk)) { - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); return (0); /* no chain */ } @@ -594,7 +594,7 @@ blk = ztp->zit_next_blk; VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); } - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); if (error == ECKSUM) return (0); /* normal end of chain */ return (error); @@ -791,7 +791,8 @@ BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ - error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg); + error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg, + zilog->zl_logbias != ZFS_LOGBIAS_LATENCY); if (error) { dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg); @@ -1280,6 +1281,12 @@ kmem_cache_destroy(zil_lwb_cache); } +void +zil_set_logbias(zilog_t *zilog, uint64_t logbias) +{ + zilog->zl_logbias = logbias; +} + zilog_t * zil_alloc(objset_t *os, zil_header_t *zh_phys) { @@ -1292,6 +1299,7 @@ zilog->zl_spa = dmu_objset_spa(os); zilog->zl_dmu_pool = dmu_objset_pool(os); zilog->zl_destroy_txg = TXG_INITIAL - 1; + zilog->zl_logbias = dmu_objset_logbias(os); mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1658,7 +1666,7 @@ zilog_t *zilog; int error; - error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); + error = dmu_objset_hold(osname, FTAG, &os); if (error) return (error); @@ -1667,6 +1675,6 @@ error = EEXIST; else zil_resume(zilog); - dmu_objset_close(os); + dmu_objset_rele(os, FTAG); return (error); }
--- a/usr/src/uts/common/fs/zfs/zio.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/zio.c Mon Aug 17 09:14:35 2009 -0400 @@ -1716,12 +1716,13 @@ */ int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t txg) + uint64_t txg, boolean_t bypass_slog) { - int error; + int error = 1; - error = metaslab_alloc(spa, spa->spa_log_class, size, - new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + if (!bypass_slog) + error = metaslab_alloc(spa, spa->spa_log_class, size, + new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); if (error) error = metaslab_alloc(spa, spa->spa_normal_class, size,
--- a/usr/src/uts/common/fs/zfs/zvol.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/fs/zfs/zvol.c Mon Aug 17 09:14:35 2009 -0400 @@ -80,6 +80,7 @@ #include "zfs_namecheck.h" static void *zvol_state; +static char *zvol_tag = "zvol_tag"; #define ZVOL_DUMPSIZE "dumpsize" @@ -109,7 +110,7 @@ uint8_t zv_min_bs; /* minimum addressable block shift */ uint8_t zv_flags; /* readonly, dumpified, etc. */ objset_t *zv_objset; /* objset handle */ - uint32_t zv_mode; /* DS_MODE_* flags at open time */ + boolean_t zv_issnap; /* is a snapshot (read-only) */ uint32_t zv_open_count[OTYPCNT]; /* open counts */ uint32_t zv_total_opens; /* total open count */ zilog_t *zv_zilog; /* ZIL handle */ @@ -432,7 +433,6 @@ uint64_t volsize; minor_t minor = 0; struct pathname linkpath; - int ds_mode = DS_MODE_OWNER; vnode_t *vp = NULL; char *devpath; char chrbuf[30], blkbuf[30]; @@ -445,10 +445,8 @@ return (EEXIST); } - if (strchr(name, '@') != 0) - ds_mode |= DS_MODE_READONLY; - - error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os); + /* lie and say we're read-only */ + error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) { mutex_exit(&zvol_state_lock); @@ -458,7 +456,7 @@ error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) { - dmu_objset_close(os); + dmu_objset_disown(os, zvol_tag); mutex_exit(&zvol_state_lock); return (error); } @@ -500,13 +498,13 @@ minor = zvol_minor_alloc(); if (minor == 0) { - dmu_objset_close(os); + dmu_objset_disown(os, zvol_tag); mutex_exit(&zvol_state_lock); return (ENXIO); } if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) { - dmu_objset_close(os); + dmu_objset_disown(os, zvol_tag); mutex_exit(&zvol_state_lock); return (EAGAIN); } @@ -519,7 +517,7 @@ if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_soft_state_free(zvol_state, minor); - dmu_objset_close(os); + dmu_objset_disown(os, zvol_tag); mutex_exit(&zvol_state_lock); return (EAGAIN); } @@ -530,7 +528,7 @@ minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_remove_minor_node(zfs_dip, chrbuf); ddi_soft_state_free(zvol_state, minor); - dmu_objset_close(os); + dmu_objset_disown(os, zvol_tag); mutex_exit(&zvol_state_lock); return (EAGAIN); } @@ -542,7 +540,7 @@ zv->zv_minor = minor; zv->zv_volsize = volsize; zv->zv_objset = os; - zv->zv_mode = ds_mode; + zv->zv_issnap = dmu_objset_is_snapshot(os); zv->zv_zilog = zil_open(os, zvol_get_data); mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, @@ -600,7 +598,7 @@ zil_close(zv->zv_zilog); zv->zv_zilog = NULL; - dmu_objset_close(zv->zv_objset); + dmu_objset_disown(zv->zv_objset, zvol_tag); zv->zv_objset = NULL; avl_destroy(&zv->zv_znode.z_range_avl); mutex_destroy(&zv->zv_znode.z_range_lock); @@ -704,7 +702,7 @@ * If we are doing a "zfs clone -o volsize=", then the * minor node won't exist yet. */ - error = dmu_objset_open(name, DMU_OST_ZVOL, DS_MODE_OWNER, + error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, FTAG, &state.zv_objset); if (error != 0) goto out; @@ -717,7 +715,7 @@ doi.doi_data_block_size)) != 0) goto out; - if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) { + if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap) { error = EROFS; goto out; } @@ -760,7 +758,7 @@ out: if (state.zv_objset) - dmu_objset_close(state.zv_objset); + dmu_objset_disown(state.zv_objset, FTAG); mutex_exit(&zvol_state_lock); @@ -788,7 +786,7 @@ mutex_exit(&zvol_state_lock); return (ENXIO); } - if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) { + if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap) { if (needlock) mutex_exit(&zvol_state_lock); return (EROFS); @@ -836,7 +834,7 @@ ASSERT(zv->zv_objset != NULL); if ((flag & FWRITE) && - (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY))) { + (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap)) { mutex_exit(&zvol_state_lock); return (EROFS); } @@ -992,6 +990,7 @@ uint32_t blocksize = zv->zv_volblocksize; zilog_t *zilog = zv->zv_zilog; boolean_t slogging; + ssize_t immediate_write_sz; if (zil_disable) return; @@ -1003,7 +1002,11 @@ return; } - slogging = spa_has_slogs(zilog->zl_spa); + immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + ? 0 : zvol_immediate_write_sz; + + slogging = spa_has_slogs(zilog->zl_spa) && + (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); while (resid) { itx_t *itx; @@ -1015,7 +1018,7 @@ * Unlike zfs_log_write() we can be called with * upto DMU_MAX_ACCESS/2 (5MB) writes. */ - if (blocksize > zvol_immediate_write_sz && !slogging && + if (blocksize > immediate_write_sz && !slogging && resid >= blocksize && off % blocksize == 0) { write_state = WR_INDIRECT; /* uses dmu_sync */ len = blocksize; @@ -1160,8 +1163,7 @@ } if (!(bp->b_flags & B_READ) && - (zv->zv_flags & ZVOL_RDONLY || - zv->zv_mode & DS_MODE_READONLY)) { + (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap)) { bioerror(bp, EROFS); biodone(bp); return (0); @@ -1721,7 +1723,7 @@ dmu_tx_t *tx; objset_t *os = zv->zv_objset; - if (zv->zv_flags & ZVOL_RDONLY || (zv->zv_mode & DS_MODE_READONLY)) + if (zv->zv_flags & ZVOL_RDONLY || zv->zv_issnap) return (EROFS); /*
--- a/usr/src/uts/common/inet/tcp.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/inet/tcp.h Mon Aug 17 09:14:35 2009 -0400 @@ -536,7 +536,6 @@ */ struct tcp_s *tcp_loopback_peer; /* peer tcp for loopback */ mblk_t *tcp_fused_sigurg_mp; /* M_PCSIG mblk for SIGURG */ - size_t tcp_fuse_rcv_hiwater; /* fusion receive queue size */ uint32_t tcp_fused : 1, /* loopback tcp in fusion mode */
--- a/usr/src/uts/common/inet/tcp/tcp.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/inet/tcp/tcp.c Mon Aug 17 09:14:35 2009 -0400 @@ -789,7 +789,6 @@ static void tcp_process_options(tcp_t *, tcph_t *); static void tcp_rput_common(tcp_t *tcp, mblk_t *mp); static void tcp_rsrv(queue_t *q); -static int tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd); static int tcp_snmp_state(tcp_t *tcp); static void tcp_timer(void *arg); static void tcp_timer_callback(void *); @@ -1373,6 +1372,28 @@ iph, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, \ ip6_t *, ip6h, int, 0); +static void +tcp_set_recv_threshold(tcp_t *tcp, uint32_t new_rcvthresh) +{ + uint32_t default_threshold = SOCKET_RECVHIWATER >> 3; + + if (IPCL_IS_NONSTR(tcp->tcp_connp)) { + conn_t *connp = tcp->tcp_connp; + struct sock_proto_props sopp; + + /* + * only increase rcvthresh upto default_threshold + */ + if (new_rcvthresh > default_threshold) + new_rcvthresh = default_threshold; + + sopp.sopp_flags = SOCKOPT_RCVTHRESH; + sopp.sopp_rcvthresh = new_rcvthresh; + + (*connp->conn_upcalls->su_set_proto_props) + (connp->conn_upper_handle, &sopp); + } +} /* * Figure out the value of window scale opton. Note that the rwnd is * ASSUMED to be rounded up to the nearest MSS before the calculation. @@ -5536,10 +5557,10 @@ } /* - * listener->tcp_rq->q_hiwat should be the default window size or a - * window size changed via SO_RCVBUF option. First round up the - * eager's tcp_rwnd to the nearest MSS. Then find out the window - * scale option value if needed. Call tcp_rwnd_set() to finish the + * listeners tcp_recv_hiwater should be the default window size or a + * window size changed via SO_RCVBUF option. First round up the + * eager's tcp_rwnd to the nearest MSS. Then find out the window + * scale option value if needed. Call tcp_rwnd_set() to finish the * setting. * * Note if there is a rpipe metric associated with the remote host, @@ -7568,8 +7589,6 @@ tcp->tcp_ip_src_v6 = tcp->tcp_bound_source_v6; ASSERT(tcp->tcp_ptpbhn != NULL); - if (!IPCL_IS_NONSTR(tcp->tcp_connp)) - tcp->tcp_rq->q_hiwat = tcps->tcps_recv_hiwat; tcp->tcp_recv_hiwater = tcps->tcps_recv_hiwat; tcp->tcp_recv_lowater = tcp_rinfo.mi_lowat; tcp->tcp_rwnd = tcps->tcps_recv_hiwat; @@ -7857,7 +7876,7 @@ tcp->tcp_unfusable = B_FALSE; tcp->tcp_fused_sigurg = B_FALSE; tcp->tcp_loopback_peer = NULL; - tcp->tcp_fuse_rcv_hiwater = 0; + tcp->tcp_recv_hiwater = 0; tcp->tcp_lso = B_FALSE; @@ -7961,7 +7980,7 @@ tcp->tcp_unfusable = B_FALSE; tcp->tcp_fused_sigurg = B_FALSE; tcp->tcp_loopback_peer = NULL; - tcp->tcp_fuse_rcv_hiwater = 0; + tcp->tcp_recv_hiwater = 0; /* Initialize the header template */ if (tcp->tcp_ipversion == IPV4_VERSION) { @@ -8902,7 +8921,7 @@ if (TCP_IS_DETACHED(tcp)) return (mss); if (tcp->tcp_fused) { - maxpsz = tcp_fuse_maxpsz_set(tcp); + maxpsz = tcp_fuse_maxpsz(tcp); mss = INFPSZ; } else if (tcp->tcp_mdt || tcp->tcp_lso || tcp->tcp_maxpsz == 0) { /* @@ -9321,8 +9340,6 @@ return (NULL); } q = connp->conn_rq; - } else { - RD(q)->q_hiwat = tcps->tcps_recv_hiwat; } SOCK_CONNID_INIT(tcp->tcp_connid); @@ -15650,7 +15667,7 @@ if (canputnext(q)) { /* Not flow-controlled, open rwnd */ - tcp->tcp_rwnd = q->q_hiwat; + tcp->tcp_rwnd = tcp->tcp_recv_hiwater; /* * Send back a window update immediately if TCP is above @@ -15725,7 +15742,7 @@ * XXX - Should allow a lower rwnd than tcp_recv_hiwat_minmss * mss if the * user requests so. */ -static int +int tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd) { uint32_t mss = tcp->tcp_mss; @@ -15739,25 +15756,11 @@ tcp_t *peer_tcp = tcp->tcp_loopback_peer; ASSERT(peer_tcp != NULL); - /* - * Record the stream head's high water mark for - * this endpoint; this is used for flow-control - * purposes in tcp_fuse_output(). - */ sth_hiwat = tcp_fuse_set_rcv_hiwat(tcp, rwnd); if (!tcp_detached) { (void) proto_set_rx_hiwat(tcp->tcp_rq, tcp->tcp_connp, sth_hiwat); - if (IPCL_IS_NONSTR(tcp->tcp_connp)) { - conn_t *connp = tcp->tcp_connp; - struct sock_proto_props sopp; - - sopp.sopp_flags = SOCKOPT_RCVTHRESH; - sopp.sopp_rcvthresh = sth_hiwat >> 3; - - (*connp->conn_upcalls->su_set_proto_props) - (connp->conn_upper_handle, &sopp); - } + tcp_set_recv_threshold(tcp, sth_hiwat >> 3); } /* @@ -15767,7 +15770,7 @@ * have changed we need to update the peer's maxpsz. */ (void) tcp_maxpsz_set(peer_tcp, B_TRUE); - return (rwnd); + return (sth_hiwat); } if (tcp_detached) { @@ -15840,14 +15843,11 @@ if (tcp_detached) return (rwnd); - /* - * We set the maximum receive window into rq->q_hiwat if it is - * a STREAMS socket. - * This is not actually used for flow control. - */ - if (!IPCL_IS_NONSTR(tcp->tcp_connp)) - tcp->tcp_rq->q_hiwat = rwnd; + + tcp_set_recv_threshold(tcp, rwnd >> 3); + tcp->tcp_recv_hiwater = rwnd; + /* * Set the STREAM head high water mark. This doesn't have to be * here, since we are simply using default values, but we would @@ -17258,19 +17258,13 @@ } /* - * Set the max window size (tcp_rq->q_hiwat) of the acceptor - * properly. This is the first time we know of the acceptor' - * queue. So we do it here. - * - * XXX + * Set max window size (tcp_recv_hiwater) of the acceptor. */ if (tcp->tcp_rcv_list == NULL) { /* * Recv queue is empty, tcp_rwnd should not have changed. * That means it should be equal to the listener's tcp_rwnd. */ - if (!IPCL_IS_NONSTR(connp)) - tcp->tcp_rq->q_hiwat = tcp->tcp_rwnd; tcp->tcp_recv_hiwater = tcp->tcp_rwnd; } else { #ifdef DEBUG @@ -17286,8 +17280,6 @@ ASSERT(cnt != 0 && tcp->tcp_rcv_cnt == cnt); #endif /* There is some data, add them back to get the max. */ - if (!IPCL_IS_NONSTR(connp)) - tcp->tcp_rq->q_hiwat = tcp->tcp_rwnd + tcp->tcp_rcv_cnt; tcp->tcp_recv_hiwater = tcp->tcp_rwnd + tcp->tcp_rcv_cnt; } /* @@ -17298,10 +17290,6 @@ sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_MAXBLK | SOCKOPT_WROFF; sopp_maxblk = tcp_maxpsz_set(tcp, B_FALSE); - /* - * Record the stream head's high water mark for this endpoint; - * this is used for flow-control purposes. - */ sopp_rxhiwat = tcp->tcp_fused ? tcp_fuse_set_rcv_hiwat(tcp, tcp->tcp_recv_hiwater) : MAX(tcp->tcp_recv_hiwater, tcps->tcps_sth_rcv_hiwat); @@ -17455,7 +17443,7 @@ /* We drain directly in case of fused tcp loopback */ if (!tcp->tcp_fused && canputnext(q)) { - tcp->tcp_rwnd = q->q_hiwat; + tcp->tcp_rwnd = tcp->tcp_recv_hiwater; if (tcp->tcp_state >= TCPS_ESTABLISHED && tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) { tcp_xmit_ctl(NULL, @@ -25639,7 +25627,6 @@ cred_t *ecr; ts_label_t *tsl; uint32_t mss; - queue_t *q = tcp->tcp_rq; conn_t *connp = tcp->tcp_connp; tcp_stack_t *tcps = tcp->tcp_tcps; @@ -25748,8 +25735,6 @@ */ tcp->tcp_rwnd = MAX(MSS_ROUNDUP(tcp->tcp_rwnd, mss), tcps->tcps_recv_hiwat_minmss * mss); - if (!IPCL_IS_NONSTR(connp)) - q->q_hiwat = tcp->tcp_rwnd; tcp->tcp_recv_hiwater = tcp->tcp_rwnd; tcp_set_ws_value(tcp); U32_TO_ABE16((tcp->tcp_rwnd >> tcp->tcp_rcv_ws), @@ -26799,9 +26784,7 @@ tcp->tcp_tcps->tcps_wroff_xtra); if (tcp->tcp_snd_sack_ok) stropt->so_wroff += TCPOPT_MAX_SACK_LEN; - stropt->so_hiwat = tcp->tcp_fused ? - tcp_fuse_set_rcv_hiwat(tcp, tcp->tcp_recv_hiwater) : - MAX(tcp->tcp_recv_hiwater, tcp->tcp_tcps->tcps_sth_rcv_hiwat); + stropt->so_hiwat = tcp->tcp_recv_hiwater; stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE); putnext(RD(q), stropt_mp);
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c Mon Aug 17 09:14:35 2009 -0400 @@ -136,14 +136,14 @@ ASSERT(tcp->tcp_loopback); ASSERT(tcp->tcp_loopback_peer == NULL); /* - * We need to inherit q_hiwat of the listener tcp, but we can't - * really use tcp_listener since we get here after sending up - * T_CONN_IND and tcp_wput_accept() may be called independently, - * at which point tcp_listener is cleared; this is why we use - * tcp_saved_listener. The listener itself is guaranteed to be - * around until tcp_accept_finish() is called on this eager -- - * this won't happen until we're done since we're inside the - * eager's perimeter now. + * We need to inherit tcp_recv_hiwater of the listener tcp, + * but we can't really use tcp_listener since we get here after + * sending up T_CONN_IND and tcp_wput_accept() may be called + * independently, at which point tcp_listener is cleared; + * this is why we use tcp_saved_listener. The listener itself + * is guaranteed to be around until tcp_accept_finish() is called + * on this eager -- this won't happen until we're done since we're + * inside the eager's perimeter now. * * We can also get called in the case were a connection needs * to be re-fused. In this case tcp_saved_listener will be @@ -272,29 +272,19 @@ tcp_timers_stop(tcp); tcp_timers_stop(peer_tcp); - /* - * At this point we are a detached eager tcp and therefore - * don't have a queue assigned to us until accept happens. - * In the mean time the peer endpoint may immediately send - * us data as soon as fusion is finished, and we need to be - * able to flow control it in case it sends down huge amount - * of data while we're still detached. To prevent that we - * inherit the listener's recv_hiwater value; this is temporary - * since we'll repeat the process in tcp_accept_finish(). - */ if (!tcp->tcp_refuse) { - (void) tcp_fuse_set_rcv_hiwat(tcp, - tcp->tcp_saved_listener->tcp_recv_hiwater); + /* + * Set receive buffer and max packet size for the + * active open tcp. + * eager's values will be set in tcp_accept_finish. + */ + + (void) tcp_rwnd_set(peer_tcp, + peer_tcp->tcp_recv_hiwater); /* - * Set the stream head's write offset value to zero - * since we won't be needing any room for TCP/IP - * headers; tell it to not break up the writes (this - * would reduce the amount of work done by kmem); and - * configure our receive buffer. Note that we can only - * do this for the active connect tcp since our eager is - * still detached; it will be dealt with later in - * tcp_accept_finish(). + * Set the write offset value to zero since we won't + * be needing any room for TCP/IP headers. */ if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp)) { struct stroptions *stropt; @@ -303,19 +293,9 @@ mp->b_wptr += sizeof (*stropt); stropt = (struct stroptions *)mp->b_rptr; - stropt->so_flags = SO_MAXBLK|SO_WROFF|SO_HIWAT; - stropt->so_maxblk = tcp_maxpsz_set(peer_tcp, - B_FALSE); + stropt->so_flags = SO_WROFF; stropt->so_wroff = 0; - /* - * Record the stream head's high water mark for - * peer endpoint; this is used for flow-control - * purposes in tcp_fuse_output(). - */ - stropt->so_hiwat = tcp_fuse_set_rcv_hiwat( - peer_tcp, peer_rq->q_hiwat); - /* Send the options up */ putnext(peer_rq, mp); } else { @@ -324,16 +304,8 @@ /* The peer is a non-STREAMS end point */ ASSERT(IPCL_IS_TCP(peer_connp)); - (void) tcp_fuse_set_rcv_hiwat(tcp, - tcp->tcp_saved_listener->tcp_recv_hiwater); - - sopp.sopp_flags = SOCKOPT_MAXBLK | - SOCKOPT_WROFF | SOCKOPT_RCVHIWAT; - sopp.sopp_maxblk = tcp_maxpsz_set(peer_tcp, - B_FALSE); + sopp.sopp_flags = SOCKOPT_WROFF; sopp.sopp_wroff = 0; - sopp.sopp_rxhiwat = tcp_fuse_set_rcv_hiwat( - peer_tcp, peer_tcp->tcp_recv_hiwater); (*peer_connp->conn_upcalls->su_set_proto_props) (peer_connp->conn_upper_handle, &sopp); } @@ -789,7 +761,7 @@ mutex_enter(&tcp->tcp_non_sq_lock); flow_stopped = tcp->tcp_flow_stopped; if ((TCP_IS_DETACHED(peer_tcp) && - (peer_tcp->tcp_rcv_cnt >= peer_tcp->tcp_fuse_rcv_hiwater)) || + (peer_tcp->tcp_rcv_cnt >= peer_tcp->tcp_recv_hiwater)) || (!TCP_IS_DETACHED(peer_tcp) && !IPCL_IS_NONSTR(peer_tcp->tcp_connp) && !canputnext(peer_tcp->tcp_rq))) { @@ -989,7 +961,12 @@ * after SO_SNDBUF; the latter is also similarly rounded up. */ rwnd = P2ROUNDUP_TYPED(rwnd, PAGESIZE, size_t); - tcp->tcp_fuse_rcv_hiwater = rwnd; + + /* + * Record high water mark, this is used for flow-control + * purposes in tcp_fuse_output(). + */ + tcp->tcp_recv_hiwater = rwnd; return (rwnd); } @@ -997,7 +974,7 @@ * Calculate the maximum outstanding unread data block for a fused tcp endpoint. */ int -tcp_fuse_maxpsz_set(tcp_t *tcp) +tcp_fuse_maxpsz(tcp_t *tcp) { tcp_t *peer_tcp = tcp->tcp_loopback_peer; uint_t sndbuf = tcp->tcp_xmit_hiwater; @@ -1005,7 +982,7 @@ ASSERT(tcp->tcp_fused); ASSERT(peer_tcp != NULL); - ASSERT(peer_tcp->tcp_fuse_rcv_hiwater != 0); + ASSERT(peer_tcp->tcp_recv_hiwater != 0); /* * In the fused loopback case, we want the stream head to split * up larger writes into smaller chunks for a more accurate flow- @@ -1014,8 +991,8 @@ * We round up the buffer to system page size due to the lack of * TCP MSS concept in Fusion. */ - if (maxpsz > peer_tcp->tcp_fuse_rcv_hiwater) - maxpsz = peer_tcp->tcp_fuse_rcv_hiwater; + if (maxpsz > peer_tcp->tcp_recv_hiwater) + maxpsz = peer_tcp->tcp_recv_hiwater; maxpsz = P2ROUNDUP_TYPED(maxpsz, PAGESIZE, uint_t) >> 1; return (maxpsz);
--- a/usr/src/uts/common/inet/tcp_impl.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/inet/tcp_impl.h Mon Aug 17 09:14:35 2009 -0400 @@ -222,8 +222,9 @@ extern void tcp_fuse_output_urg(tcp_t *, mblk_t *); extern boolean_t tcp_fuse_rcv_drain(queue_t *, tcp_t *, mblk_t **); extern size_t tcp_fuse_set_rcv_hiwat(tcp_t *, size_t); -extern int tcp_fuse_maxpsz_set(tcp_t *); -extern void tcp_fuse_backenable(tcp_t *tcp); +extern int tcp_fuse_maxpsz(tcp_t *); +extern void tcp_fuse_backenable(tcp_t *); +extern int tcp_rwnd_set(tcp_t *, uint32_t); /* * Object to represent database of options to search passed to
--- a/usr/src/uts/common/io/aggr/aggr_grp.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/aggr/aggr_grp.c Mon Aug 17 09:14:35 2009 -0400 @@ -623,7 +623,8 @@ /* * Get the list the the underlying HW rings. */ - hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh); + hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh, + MAC_RING_TYPE_RX); if (port->lp_hwgh != NULL) { /* @@ -689,7 +690,8 @@ goto done; ASSERT(rx_grp->arg_gh != NULL); - hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh); + hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh, + MAC_RING_TYPE_RX); /* * If hw_rh_cnt is 0, it means that the underlying port does not
--- a/usr/src/uts/common/io/audio/drv/audiovia823x/audiovia823x.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/audio/drv/audiovia823x/audiovia823x.c Mon Aug 17 09:14:35 2009 -0400 @@ -203,9 +203,9 @@ { auvia_devc_t *devc = (void *)argp; auvia_portc_t *portc; - uint32_t gstat; uint8_t status; unsigned intrs = 0; + boolean_t claimed = B_FALSE; _NOTE(ARGUNUSED(nocare)); @@ -215,12 +215,6 @@ return (DDI_INTR_UNCLAIMED); } - gstat = INL(devc, devc->base + REG_GSTAT); - if (gstat == 0) { - mutex_exit(&devc->mutex); - return (DDI_INTR_UNCLAIMED); - } - for (int i = 0; i < AUVIA_NUM_PORTC; i++) { portc = devc->portc[i]; @@ -242,13 +236,18 @@ if (portc->started) { intrs |= (1U << i); } - /* XXX: do we really need to do this? */ + /* let the chip know we are acking the interrupt */ OUTB(devc, portc->base + OFF_STATUS, status); + + claimed = B_TRUE; } - OUTL(devc, devc->base + REG_GSTAT, gstat); + mutex_exit(&devc->mutex); - mutex_exit(&devc->mutex); + if (!claimed) { + return (DDI_INTR_UNCLAIMED); + } + if (intrs & (1U << AUVIA_PLAY_SGD_NUM)) { audio_engine_consume(devc->portc[AUVIA_PLAY_SGD_NUM]->engine); }
--- a/usr/src/uts/common/io/cmlb.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/cmlb.c Mon Aug 17 09:14:35 2009 -0400 @@ -313,7 +313,8 @@ #endif #if defined(_SUNOS_VTOC_16) -static void cmlb_convert_geometry(diskaddr_t capacity, struct dk_geom *cl_g); +static void cmlb_convert_geometry(struct cmlb_lun *cl, diskaddr_t capacity, + struct dk_geom *cl_g, void *tg_cookie); #endif static int cmlb_dkio_get_geometry(struct cmlb_lun *cl, caddr_t arg, int flag, @@ -349,7 +350,8 @@ static int cmlb_is_linux_swap(struct cmlb_lun *cl, uint32_t part_start, void *tg_cookie); static int cmlb_dkio_get_virtgeom(struct cmlb_lun *cl, caddr_t arg, int flag); -static int cmlb_dkio_get_phygeom(struct cmlb_lun *cl, caddr_t arg, int flag); +static int cmlb_dkio_get_phygeom(struct cmlb_lun *cl, caddr_t arg, int flag, + void *tg_cookie); static int cmlb_dkio_partinfo(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag); static int cmlb_dkio_extpartinfo(struct cmlb_lun *cl, dev_t dev, caddr_t arg, @@ -1286,7 +1288,7 @@ case DKIOCG_PHYGEOM: cmlb_dbg(CMLB_TRACE, cl, "DKIOCG_PHYGEOM\n"); #if defined(__i386) || defined(__amd64) - err = cmlb_dkio_get_phygeom(cl, (caddr_t)arg, flag); + err = cmlb_dkio_get_phygeom(cl, (caddr_t)arg, flag, tg_cookie); #else err = ENOTTY; #endif @@ -1803,9 +1805,13 @@ * Context: Kernel thread only */ static void -cmlb_convert_geometry(diskaddr_t capacity, struct dk_geom *cl_g) +cmlb_convert_geometry(struct cmlb_lun *cl, diskaddr_t capacity, + struct dk_geom *cl_g, void *tg_cookie) { + ASSERT(cl != NULL); + ASSERT(mutex_owned(CMLB_MUTEX(cl))); + /* Unlabeled SCSI floppy device */ if (capacity <= 0x1000) { cl_g->dkg_nhead = 2; @@ -1839,6 +1845,15 @@ * 4211279100 (1.96TB) 255 252 * 5264098875 (2.45TB) 255 315 * ... + * + * For Solid State Drive(SSD), it uses 4K page size inside and may be + * double with every new generation. If the I/O is not aligned with + * page size on SSDs, SSDs perform a lot slower. + * By default, Solaris partition starts from cylinder 1. It will be + * misaligned even with 4K if using heads(255) and SPT(63). To + * workaround the problem, if the device is SSD, we use heads(224) and + * SPT multiple of 56. Thus the default Solaris partition starts from + * a position that aligns with 128K on a 512 bytes sector size SSD. */ if (capacity <= 0x200000) { @@ -1848,15 +1863,36 @@ cl_g->dkg_nhead = 128; cl_g->dkg_nsect = 32; } else { - cl_g->dkg_nhead = 255; - - /* make nsect be smallest multiple of 63 */ + tg_attribute_t tgattribute; + int is_solid_state; + unsigned short nhead; + unsigned short nsect; + + bzero(&tgattribute, sizeof (tg_attribute_t)); + + mutex_exit(CMLB_MUTEX(cl)); + is_solid_state = + (DK_TG_GETATTRIBUTE(cl, &tgattribute, tg_cookie) == 0) ? + tgattribute.media_is_solid_state : FALSE; + mutex_enter(CMLB_MUTEX(cl)); + + if (is_solid_state) { + nhead = 224; + nsect = 56; + } else { + nhead = 255; + nsect = 63; + } + + cl_g->dkg_nhead = nhead; + + /* make nsect be smallest multiple of nhead */ cl_g->dkg_nsect = ((capacity + - (UINT16_MAX * 255 * 63) - 1) / - (UINT16_MAX * 255 * 63)) * 63; + (UINT16_MAX * nhead * nsect) - 1) / + (UINT16_MAX * nhead * nsect)) * nsect; if (cl_g->dkg_nsect == 0) - cl_g->dkg_nsect = (UINT16_MAX / 63) * 63; + cl_g->dkg_nsect = (UINT16_MAX / nsect) * nsect; } } @@ -3327,7 +3363,7 @@ capacity = cl->cl_blockcount; - cmlb_convert_geometry(capacity, &cl_g); + cmlb_convert_geometry(cl, capacity, &cl_g, tg_cookie); bcopy(&cl_g, &cl->cl_g, sizeof (cl->cl_g)); phys_spc = cl->cl_g.dkg_nhead * cl->cl_g.dkg_nsect; } @@ -5381,7 +5417,8 @@ #if defined(__i386) || defined(__amd64) static int -cmlb_dkio_get_phygeom(struct cmlb_lun *cl, caddr_t arg, int flag) +cmlb_dkio_get_phygeom(struct cmlb_lun *cl, caddr_t arg, int flag, + void *tg_cookie) { int err = 0; diskaddr_t capacity; @@ -5443,7 +5480,7 @@ else capacity = cl->cl_blockcount; - cmlb_convert_geometry(capacity, dkgp); + cmlb_convert_geometry(cl, capacity, dkgp, tg_cookie); dkgp->dkg_acyl = 0; dkgp->dkg_ncyl = capacity / (dkgp->dkg_nhead * dkgp->dkg_nsect);
--- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_pgr.c Mon Aug 17 09:14:35 2009 -0400 @@ -1647,17 +1647,10 @@ pgr->pgr_PRgeneration++; if (pr_out->action == PR_OUT_PREEMPT_ABORT) { - /* - * XXX iscsi port provider doesn't like this idea - * Need to implement abort differently - * - * task->task_mgmt_function = TM_ABORT_TASK_SET; - * stmf_scsilib_handle_task_mgmt(task); - */ - stmf_scsilib_send_status(task, STATUS_GOOD, 0); - } else { - stmf_scsilib_send_status(task, STATUS_GOOD, 0); + stmf_abort(STMF_QUEUE_ABORT_LU, task, STMF_ABORTED, + (void *)slu->sl_lu); } + stmf_scsilib_send_status(task, STATUS_GOOD, 0); } static void
--- a/usr/src/uts/common/io/ib/clients/ibd/ibd.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ib/clients/ibd/ibd.c Mon Aug 17 09:14:35 2009 -0400 @@ -256,6 +256,14 @@ #define IBD_DRV_STARTED 0x80000 /* + * Start/stop in-progress flags; note that restart must always remain + * the OR of start and stop flag values. + */ +#define IBD_DRV_START_IN_PROGRESS 0x10000000 +#define IBD_DRV_STOP_IN_PROGRESS 0x20000000 +#define IBD_DRV_RESTART_IN_PROGRESS 0x30000000 + +/* * Miscellaneous constants */ #define IBD_SEND 0 @@ -433,7 +441,10 @@ static int ibd_get_port_details(ibd_state_t *); static int ibd_alloc_cqs(ibd_state_t *); static int ibd_setup_ud_channel(ibd_state_t *); -static int ibd_undo_m_start(ibd_state_t *); +static int ibd_start(ibd_state_t *); +static int ibd_undo_start(ibd_state_t *, link_state_t); +static void ibd_set_mac_progress(ibd_state_t *, uint_t); +static void ibd_clr_mac_progress(ibd_state_t *, uint_t); /* @@ -2006,19 +2017,14 @@ link_state_t new_link_state = LINK_STATE_UP; uint8_t itreply; uint16_t pkix; - - /* - * Do not send a request to the async daemon if it has not - * yet been created or is being destroyed. If the async - * daemon has not yet been created, we still need to track - * last known state of the link. If this code races with the - * detach path, then we are assured that the detach path has - * not yet done the ibt_close_hca (which waits for all async - * events to complete). If the code races with the attach path, - * we need to validate the pkey/gid (in the link_up case) if - * the initialization path has already set these up and created - * IBTF resources based on the values. - */ + int ret; + + /* + * Let's not race with a plumb or an unplumb; if we detect a + * pkey relocation event later on here, we may have to restart. + */ + ibd_set_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS); + mutex_enter(&state->id_link_mutex); /* @@ -2027,7 +2033,7 @@ */ if (state->id_link_state == LINK_STATE_UNKNOWN) { mutex_exit(&state->id_link_mutex); - return; + goto link_mod_return; } /* @@ -2085,12 +2091,21 @@ ibt_free_portinfo(port_infop, port_infosz); mutex_exit(&state->id_link_mutex); - ibd_m_stop(state); - if ((ibt_status = ibd_m_start(state)) != IBT_SUCCESS) { - DPRINT(10, "link_mod: cannot " - "restart, ret=%d", ibt_status); + /* + * Currently a restart is required if our pkey has moved + * in the pkey table. If we get the ibt_recycle_ud() to + * work as documented (expected), we may be able to + * avoid a complete restart. Note that we've already + * marked both the start and stop 'in-progress' flags, + * so it is ok to go ahead and do this restart. + */ + ibd_undo_start(state, LINK_STATE_DOWN); + if ((ret = ibd_start(state)) != 0) { + DPRINT(10, "ibd_restart: cannot restart, " + "ret=%d", ret); } - return; + + goto link_mod_return; } else { new_link_state = LINK_STATE_DOWN; } @@ -2106,7 +2121,7 @@ */ if (state->id_link_state == new_link_state) { mutex_exit(&state->id_link_mutex); - return; + goto link_mod_return; } /* @@ -2116,7 +2131,7 @@ if (!ibd_async_safe(state)) { state->id_link_state = new_link_state; mutex_exit(&state->id_link_mutex); - return; + goto link_mod_return; } mutex_exit(&state->id_link_mutex); @@ -2144,6 +2159,9 @@ req = kmem_cache_alloc(state->id_req_kmc, KM_SLEEP); req->rq_ptr = (void *)opcode; ibd_queue_work_slot(state, req, IBD_ASYNC_LINK); + +link_mod_return: + ibd_clr_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS); } /* @@ -2619,6 +2637,9 @@ state->id_req_kmc = kmem_cache_create(buf, sizeof (ibd_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + mutex_init(&state->id_macst_lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&state->id_macst_cv, NULL, CV_DEFAULT, NULL); + return (DDI_SUCCESS); } @@ -2628,6 +2649,9 @@ static void ibd_state_fini(ibd_state_t *state) { + cv_destroy(&state->id_macst_cv); + mutex_destroy(&state->id_macst_lock); + kmem_cache_destroy(state->id_req_kmc); mutex_destroy(&state->id_rxpost_lock); @@ -4050,7 +4074,7 @@ mutex_exit(&state->id_link_mutex); DPRINT(10, "ibd_get_port_details: ibt_query_hca_ports() " "failed, ret=%d", ret); - return (DDI_FAILURE); + return (ENETDOWN); } /* @@ -4061,7 +4085,7 @@ mutex_exit(&state->id_link_mutex); ibt_free_portinfo(port_infop, port_infosz); DPRINT(10, "ibd_get_port_details: port is not active"); - return (DDI_FAILURE); + return (ENETDOWN); } /* @@ -4073,7 +4097,7 @@ ibt_free_portinfo(port_infop, port_infosz); DPRINT(10, "ibd_get_port_details: ibt_pkey2index " "failed, ret=%d", ret); - return (DDI_FAILURE); + return (ENONET); } state->id_mtu = (128 << port_infop->p_mtu); @@ -4088,7 +4112,7 @@ */ state->id_link_speed = ibd_get_portspeed(state); - return (DDI_SUCCESS); + return (0); } static int @@ -4263,7 +4287,7 @@ } static int -ibd_undo_m_start(ibd_state_t *state) +ibd_undo_start(ibd_state_t *state, link_state_t cur_link_state) { uint32_t progress = state->id_mac_state; uint_t attempts; @@ -4273,17 +4297,23 @@ uint8_t jstate; /* - * Before we try to stop/undo whatever we did in ibd_m_start(), - * we need to mark the link state as unknown to prevent nw - * layer from using this instance for any new transfers. - */ - if (progress & IBD_DRV_PORT_DETAILS_OBTAINED) { + * Before we try to stop/undo whatever we did in ibd_start(), + * we need to mark the link state appropriately to prevent the + * ip layer from using this instance for any new transfers. Note + * that if the original state of the link was "up" when we're + * here, we'll set the final link state to "unknown", to behave + * in the same fashion as other ethernet drivers. + */ + mutex_enter(&state->id_link_mutex); + if (cur_link_state == LINK_STATE_DOWN) { + state->id_link_state = cur_link_state; + } else { state->id_link_state = LINK_STATE_UNKNOWN; - mac_link_update(state->id_mh, state->id_link_state); - - state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED); - } - + } + mutex_exit(&state->id_link_mutex); + mac_link_update(state->id_mh, state->id_link_state); + + state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED); if (progress & IBD_DRV_STARTED) { state->id_mac_state &= (~IBD_DRV_STARTED); } @@ -4309,7 +4339,7 @@ * we turned off the notification and * return failure. */ - DPRINT(2, "ibd_undo_m_start: " + DPRINT(2, "ibd_undo_start: " "reclaiming failed"); ibd_poll_compq(state, state->id_rcq_hdl); ibt_set_cq_handler(state->id_rcq_hdl, @@ -4342,7 +4372,7 @@ */ if ((ret = ibt_flush_channel(state->id_chnl_hdl)) != IBT_SUCCESS) { - DPRINT(10, "undo_m_start: flush_channel " + DPRINT(10, "ibd_undo_start: flush_channel " "failed, ret=%d", ret); } @@ -4394,7 +4424,7 @@ * has also been suppressed at this point. Thus, no locks * are required while traversing the mc full list. */ - DPRINT(2, "ibd_undo_m_start: clear full cache entries"); + DPRINT(2, "ibd_undo_start: clear full cache entries"); mce = list_head(&state->id_mc_full); while (mce != NULL) { mgid = mce->mc_info.mc_adds_vect.av_dgid; @@ -4418,7 +4448,7 @@ if (progress & IBD_DRV_UD_CHANNEL_SETUP) { if ((ret = ibt_free_channel(state->id_chnl_hdl)) != IBT_SUCCESS) { - DPRINT(10, "undo_m_start: free_channel " + DPRINT(10, "ibd_undo_start: free_channel " "failed, ret=%d", ret); } @@ -4431,7 +4461,7 @@ sizeof (ibt_wc_t) * state->id_txwcs_size); if ((ret = ibt_free_cq(state->id_scq_hdl)) != IBT_SUCCESS) { - DPRINT(10, "undo_m_start: free_cq(scq) " + DPRINT(10, "ibd_undo_start: free_cq(scq) " "failed, ret=%d", ret); } } @@ -4439,7 +4469,7 @@ kmem_free(state->id_rxwcs, sizeof (ibt_wc_t) * state->id_rxwcs_size); if ((ret = ibt_free_cq(state->id_rcq_hdl)) != IBT_SUCCESS) { - DPRINT(10, "undo_m_start: free_cq(rcq) failed, " + DPRINT(10, "ibd_undo_start: free_cq(rcq) failed, " "ret=%d", ret); } @@ -4478,6 +4508,34 @@ } /* + * These pair of routines are used to set/clear the condition that + * the caller is likely to do something to change the id_mac_state. + * If there's already someone doing either a start or a stop (possibly + * due to the async handler detecting a pkey relocation event, a plumb + * or dlpi_open, or an unplumb or dlpi_close coming in), we wait until + * that's done. + */ +static void +ibd_set_mac_progress(ibd_state_t *state, uint_t flag) +{ + mutex_enter(&state->id_macst_lock); + while (state->id_mac_state & IBD_DRV_RESTART_IN_PROGRESS) + cv_wait(&state->id_macst_cv, &state->id_macst_lock); + + state->id_mac_state |= flag; + mutex_exit(&state->id_macst_lock); +} + +static void +ibd_clr_mac_progress(ibd_state_t *state, uint_t flag) +{ + mutex_enter(&state->id_macst_lock); + state->id_mac_state &= (~flag); + cv_signal(&state->id_macst_cv); + mutex_exit(&state->id_macst_lock); +} + +/* * GLDv3 entry point to start hardware. */ /*ARGSUSED*/ @@ -4485,6 +4543,20 @@ ibd_m_start(void *arg) { ibd_state_t *state = arg; + int ret; + + ibd_set_mac_progress(state, IBD_DRV_START_IN_PROGRESS); + + ret = ibd_start(state); + + ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS); + + return (ret); +} + +static int +ibd_start(ibd_state_t *state) +{ kthread_t *kht; int err; ibt_status_t ret; @@ -4494,11 +4566,11 @@ /* * Get port details; if we fail here, very likely the port - * state is inactive or the pkey can't be verified - */ - if (ibd_get_port_details(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_get_port_details() failed"); - return (EAGAIN); + * state is inactive or the pkey can't be verified. + */ + if ((err = ibd_get_port_details(state)) != 0) { + DPRINT(10, "ibd_start: ibd_get_port_details() failed"); + goto start_fail; } state->id_mac_state |= IBD_DRV_PORT_DETAILS_OBTAINED; @@ -4506,9 +4578,9 @@ * Find the IPoIB broadcast group */ if (ibd_find_bgroup(state) != IBT_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_find_bgroup() failed"); - err = ENOENT; - goto m_start_fail; + DPRINT(10, "ibd_start: ibd_find_bgroup() failed"); + err = ENOTACTIVE; + goto start_fail; } state->id_mac_state |= IBD_DRV_BCAST_GROUP_FOUND; @@ -4517,9 +4589,9 @@ * it is most likely due to a lack of resources */ if (ibd_acache_init(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_acache_init() failed"); + DPRINT(10, "ibd_start: ibd_acache_init() failed"); err = ENOMEM; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_ACACHE_INITIALIZED; @@ -4527,9 +4599,9 @@ * Allocate send and receive completion queues */ if (ibd_alloc_cqs(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_alloc_cqs() failed"); + DPRINT(10, "ibd_start: ibd_alloc_cqs() failed"); err = ENOMEM; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_CQS_ALLOCD; @@ -4538,8 +4610,8 @@ */ if (ibd_setup_ud_channel(state) != DDI_SUCCESS) { err = ENOMEM; - DPRINT(10, "ibd_m_start: ibd_setup_ud_channel() failed"); - goto m_start_fail; + DPRINT(10, "ibd_start: ibd_setup_ud_channel() failed"); + goto start_fail; } state->id_mac_state |= IBD_DRV_UD_CHANNEL_SETUP; @@ -4547,9 +4619,9 @@ * Allocate and initialize the tx buffer list */ if (ibd_init_txlist(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_init_txlist() failed"); + DPRINT(10, "ibd_start: ibd_init_txlist() failed"); err = ENOMEM; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_TXLIST_ALLOCD; @@ -4560,10 +4632,10 @@ ibt_set_cq_handler(state->id_scq_hdl, ibd_scq_handler, state); if ((ret = ibt_enable_cq_notify(state->id_scq_hdl, IBT_NEXT_COMPLETION)) != IBT_SUCCESS) { - DPRINT(10, "ibd_m_start: ibt_enable_cq_notify(scq) " + DPRINT(10, "ibd_start: ibt_enable_cq_notify(scq) " "failed, ret=%d", ret); err = EINVAL; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_SCQ_NOTIFY_ENABLED; } @@ -4572,9 +4644,9 @@ * Allocate and initialize the rx buffer list */ if (ibd_init_rxlist(state) != DDI_SUCCESS) { - DPRINT(10, "ibd_m_start: ibd_init_rxlist() failed"); + DPRINT(10, "ibd_start: ibd_init_rxlist() failed"); err = ENOMEM; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_RXLIST_ALLOCD; @@ -4582,9 +4654,9 @@ * Join IPoIB broadcast group */ if (ibd_join_group(state, state->id_mgid, IB_MC_JSTATE_FULL) == NULL) { - DPRINT(10, "ibd_m_start: ibd_join_group() failed"); - err = EINVAL; - goto m_start_fail; + DPRINT(10, "ibd_start: ibd_join_group() failed"); + err = ENOTACTIVE; + goto start_fail; } state->id_mac_state |= IBD_DRV_BCAST_GROUP_JOINED; @@ -4617,10 +4689,10 @@ ibt_set_cq_handler(state->id_rcq_hdl, ibd_rcq_handler, state); if ((ret = ibt_enable_cq_notify(state->id_rcq_hdl, IBT_NEXT_COMPLETION)) != IBT_SUCCESS) { - DPRINT(10, "ibd_m_start: ibt_enable_cq_notify(rcq) " + DPRINT(10, "ibd_start: ibt_enable_cq_notify(rcq) " "failed, ret=%d", ret); err = EINVAL; - goto m_start_fail; + goto start_fail; } state->id_mac_state |= IBD_DRV_RCQ_NOTIFY_ENABLED; @@ -4656,14 +4728,14 @@ return (DDI_SUCCESS); -m_start_fail: - /* - * If we ran into a problem during ibd_m_start() and ran into +start_fail: + /* + * If we ran into a problem during ibd_start() and ran into * some other problem during undoing our partial work, we can't * do anything about it. Ignore any errors we might get from - * ibd_undo_m_start() and just return the original error we got. - */ - (void) ibd_undo_m_start(state); + * ibd_undo_start() and just return the original error we got. + */ + (void) ibd_undo_start(state, LINK_STATE_DOWN); return (err); } @@ -4674,15 +4746,13 @@ static void ibd_m_stop(void *arg) { - ibd_state_t *state = arg; - - /* - * Since ibd_m_stop() doesn't expect any return, we cannot - * fail even if we run into some problem with ibd_undo_m_start(). - * The best we can do is to leave it in a good state, so - * perhaps a future unplumb will succeed. - */ - (void) ibd_undo_m_start(state); + ibd_state_t *state = (ibd_state_t *)arg; + + ibd_set_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS); + + (void) ibd_undo_start(state, state->id_link_state); + + ibd_clr_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS); } /*
--- a/usr/src/uts/common/io/igb/igb_82575.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_82575.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,13 +26,14 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.94 v2008-10-7 */ +/* IntelVersion: 1.123 v2-9-8_2009-6-12 */ /* * 82575EB Gigabit Network Connection * 82575EB Gigabit Backplane Connection * 82575GB Gigabit Network Connection * 82576 Gigabit Network Connection + * 82576 Quad Port Gigabit Mezzanine Adapter */ #include "igb_api.h" @@ -71,11 +72,8 @@ static s32 e1000_reset_init_script_82575(struct e1000_hw *hw); static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw); static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw); -static void e1000_init_rx_addrs_82575(struct e1000_hw *hw, u16 rar_count); -static void e1000_update_mc_addr_list_82575(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count, - u32 rar_used_count, u32 rar_count); void e1000_shutdown_fiber_serdes_link_82575(struct e1000_hw *hw); +static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw); /* * e1000_init_phy_params_82575 - Init PHY func ptrs. @@ -279,13 +277,15 @@ /* read mac address */ mac->ops.read_mac_addr = e1000_read_mac_addr_82575; /* multicast address update */ - mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_82575; + mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; /* writing VFTA */ mac->ops.write_vfta = e1000_write_vfta_generic; /* clearing VFTA */ mac->ops.clear_vfta = e1000_clear_vfta_generic; /* setting MTA */ mac->ops.mta_set = e1000_mta_set_generic; + /* ID LED init */ + mac->ops.id_led_init = e1000_id_led_init_generic; /* blink LED */ mac->ops.blink_led = e1000_blink_led_generic; /* setup LED */ @@ -328,11 +328,12 @@ static s32 e1000_acquire_phy_82575(struct e1000_hw *hw) { - u16 mask; + u16 mask = E1000_SWFW_PHY0_SM; DEBUGFUNC("e1000_acquire_phy_82575"); - mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; + if (hw->bus.func == E1000_FUNC_1) + mask = E1000_SWFW_PHY1_SM; return (e1000_acquire_swfw_sync_82575(hw, mask)); } @@ -346,11 +347,13 @@ static void e1000_release_phy_82575(struct e1000_hw *hw) { - u16 mask; + u16 mask = E1000_SWFW_PHY0_SM; DEBUGFUNC("e1000_release_phy_82575"); - mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; + if (hw->bus.func == E1000_FUNC_1) + mask = E1000_SWFW_PHY1_SM; + e1000_release_swfw_sync_82575(hw, mask); } @@ -801,7 +804,7 @@ DEBUGFUNC("e1000_get_cfg_done_82575"); - if (hw->bus.func == 1) + if (hw->bus.func == E1000_FUNC_1) mask = E1000_NVM_CFG_DONE_PORT_1; while (timeout) { @@ -868,11 +871,18 @@ /* SGMII link check is done through the PCS register. */ if ((hw->phy.media_type != e1000_media_type_copper) || - (e1000_sgmii_active_82575(hw))) + (e1000_sgmii_active_82575(hw))) { ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed, &duplex); - else + /* + * Use this flag to determine if link needs to be checked or + * not. If we have link clear the flag so that we do not + * continue to check for link. + */ + hw->mac.get_link_status = !hw->mac.serdes_has_link; + } else { ret_val = e1000_check_for_copper_link_generic(hw); + } return (ret_val); } @@ -936,103 +946,6 @@ } /* - * e1000_init_rx_addrs_82575 - Initialize receive address's - * @hw: pointer to the HW structure - * @rar_count: receive address registers - * - * Setups the receive address registers by setting the base receive address - * register to the devices MAC address and clearing all the other receive - * address registers to 0. - */ -static void -e1000_init_rx_addrs_82575(struct e1000_hw *hw, u16 rar_count) -{ - u32 i; - u8 addr[6] = {0, 0, 0, 0, 0, 0}; - /* - * This function is essentially the same as that of - * e1000_init_rx_addrs_generic. However it also takes care - * of the special case where the register offset of the - * second set of RARs begins elsewhere. This is implicitly taken care by - * function e1000_rar_set_generic. - */ - - DEBUGFUNC("e1000_init_rx_addrs_82575"); - - /* Setup the receive address */ - DEBUGOUT("Programming MAC Address into RAR[0]\n"); - hw->mac.ops.rar_set(hw, hw->mac.addr, 0); - - /* Zero out the other (rar_entry_count - 1) receive addresses */ - DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count - 1); - for (i = 1; i < rar_count; i++) { - hw->mac.ops.rar_set(hw, addr, i); - } -} - -/* - * e1000_update_mc_addr_list_82575 - Update Multicast addresses - * @hw: pointer to the HW structure - * @mc_addr_list: array of multicast addresses to program - * @mc_addr_count: number of multicast addresses to program - * @rar_used_count: the first RAR register free to program - * @rar_count: total number of supported Receive Address Registers - * - * Updates the Receive Address Registers and Multicast Table Array. - * The caller must have a packed mc_addr_list of multicast addresses. - * The parameter rar_count will usually be hw->mac.rar_entry_count - * unless there are workarounds that change this. - */ -static void -e1000_update_mc_addr_list_82575(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count, - u32 rar_used_count, u32 rar_count) -{ - u32 hash_value; - u32 i; - u8 addr[6] = {0, 0, 0, 0, 0, 0}; - /* - * This function is essentially the same as that of - * e1000_update_mc_addr_list_generic. However it also takes care - * of the special case where the register offset of the - * second set of RARs begins elsewhere. This is implicitly taken care by - * function e1000_rar_set_generic. - */ - - DEBUGFUNC("e1000_update_mc_addr_list_82575"); - - /* - * Load the first set of multicast addresses into the exact - * filters (RAR). If there are not enough to fill the RAR - * array, clear the filters. - */ - for (i = rar_used_count; i < rar_count; i++) { - if (mc_addr_count) { - e1000_rar_set_generic(hw, mc_addr_list, i); - mc_addr_count--; - mc_addr_list += ETH_ADDR_LEN; - } else { - e1000_rar_set_generic(hw, addr, i); - } - } - - /* Clear the old settings from the MTA */ - DEBUGOUT("Clearing MTA\n"); - for (i = 0; i < hw->mac.mta_reg_count; i++) { - E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); - E1000_WRITE_FLUSH(hw); - } - - /* Load any remaining multicast addresses into the hash table. */ - for (; mc_addr_count > 0; mc_addr_count--) { - hash_value = e1000_hash_mc_addr(hw, mc_addr_list); - DEBUGOUT1("Hash value = 0x%03X\n", hash_value); - hw->mac.ops.mta_set(hw, hash_value); - mc_addr_list += ETH_ADDR_LEN; - } -} - -/* * e1000_shutdown_fiber_serdes_link_82575 - Remove link during power down * @hw: pointer to the HW structure * @@ -1045,13 +958,13 @@ u32 reg; u16 eeprom_data = 0; - if (hw->mac.type != e1000_82576 || - (hw->phy.media_type != e1000_media_type_fiber && - hw->phy.media_type != e1000_media_type_internal_serdes)) + if (hw->phy.media_type != e1000_media_type_internal_serdes) return; - if (hw->bus.func == 0) + if (hw->bus.func == E1000_FUNC_0) hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); + else if (hw->bus.func == E1000_FUNC_1) + hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); /* * If APM is not enabled in the EEPROM and management interface is @@ -1076,6 +989,48 @@ } /* + * e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback + * @hw: pointer to the HW structure + * @enable: state to enter, either enabled or disabled + * + * enables/disables L2 switch loopback functionality + */ +void +e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) +{ + u32 reg; + + reg = E1000_READ_REG(hw, E1000_DTXSWC); + if (enable) + reg |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; + else + reg &= ~(E1000_DTXSWC_VMDQ_LOOPBACK_EN); + + E1000_WRITE_REG(hw, E1000_DTXSWC, reg); +} + +/* + * e1000_vmdq_set_replication_pf - enable or disable vmdq replication + * @hw: pointer to the HW structure + * @enable: state to enter, either enabled or disabled + * + * enables/disables replication of packets across multiple pools + */ +void +e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) +{ + u32 reg; + + reg = E1000_READ_REG(hw, E1000_VT_CTL); + if (enable) + reg |= E1000_VT_CTL_VM_REPL_EN; + else + reg &= ~(E1000_VT_CTL_VM_REPL_EN); + + E1000_WRITE_REG(hw, E1000_VT_CTL, reg); +} + +/* * e1000_reset_hw_82575 - Reset hardware * @hw: pointer to the HW structure * @@ -1098,6 +1053,12 @@ DEBUGOUT("PCI-E Master disable polling has failed.\n"); } + /* set the completion timeout for interface */ + ret_val = e1000_set_pcie_completion_timeout(hw); + if (ret_val) { + DEBUGOUT("PCI-E Set completion timeout has failed.\n"); + } + DEBUGOUT("Masking off all interrupts\n"); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); @@ -1130,7 +1091,8 @@ E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); (void) E1000_READ_REG(hw, E1000_ICR); - (void) e1000_check_alt_mac_addr_generic(hw); + /* Install any alternate MAC address into RAR0 */ + ret_val = e1000_check_alt_mac_addr_generic(hw); return (ret_val); } @@ -1151,7 +1113,7 @@ DEBUGFUNC("e1000_init_hw_82575"); /* Initialize identification LED */ - ret_val = e1000_id_led_init_generic(hw); + ret_val = mac->ops.id_led_init(hw); if (ret_val) { DEBUGOUT("Error initializing identification LED\n"); /* This is not fatal and we should not stop init due to this */ @@ -1162,7 +1124,7 @@ mac->ops.clear_vfta(hw); /* Setup the receive address */ - e1000_init_rx_addrs_82575(hw, rar_count); + e1000_init_rx_addrs_generic(hw, rar_count); /* Zero out the Multicast HASH table */ DEBUGOUT("Zeroing the MTA\n"); for (i = 0; i < mac->mta_reg_count; i++) @@ -1193,7 +1155,7 @@ static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw) { - u32 ctrl, led_ctrl; + u32 ctrl; s32 ret_val; bool link; @@ -1210,11 +1172,6 @@ break; case e1000_phy_igp_3: ret_val = e1000_copper_link_setup_igp(hw); - /* Setup activity LED */ - led_ctrl = E1000_READ_REG(hw, E1000_LEDCTL); - led_ctrl &= IGP_ACTIVITY_LED_MASK; - led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE); - E1000_WRITE_REG(hw, E1000_LEDCTL, led_ctrl); break; default: ret_val = -E1000_ERR_PHY; @@ -1294,13 +1251,13 @@ */ E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); - /* Force link up, set 1gb, set both sw defined pins */ + /* Force link up, set 1gb */ reg = E1000_READ_REG(hw, E1000_CTRL); - reg |= E1000_CTRL_SLU | - E1000_CTRL_SPD_1000 | - E1000_CTRL_FRCSPD | - E1000_CTRL_SWDPIN0 | - E1000_CTRL_SWDPIN1; + reg |= E1000_CTRL_SLU | E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD; + if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) { + /* set both sw defined pins */ + reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1; + } E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Power on phy for 82576 fiber adapters */ @@ -1376,7 +1333,6 @@ if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { switch (hw->phy.media_type) { - case e1000_media_type_fiber: case e1000_media_type_internal_serdes: *data = ID_LED_DEFAULT_82575_SERDES; break; @@ -1469,12 +1425,6 @@ e1000_sgmii_active_82575(struct e1000_hw *hw) { struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; - - DEBUGFUNC("e1000_sgmii_active_82575"); - - if (hw->mac.type != e1000_82575 && hw->mac.type != e1000_82576) - return (false); - return (dev_spec->sgmii_active); } @@ -1540,9 +1490,19 @@ s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_read_mac_addr_82575"); - if (e1000_check_alt_mac_addr_generic(hw)) - ret_val = e1000_read_mac_addr_generic(hw); + /* + * If there's an alternate MAC address place it in RAR0 + * so that it will override the Si installed default perm + * address. + */ + ret_val = e1000_check_alt_mac_addr_generic(hw); + if (ret_val) + goto out; + + ret_val = e1000_read_mac_addr_generic(hw); + +out: return (ret_val); } @@ -1708,3 +1668,55 @@ (void) E1000_READ_REG(hw, E1000_RNBC); (void) E1000_READ_REG(hw, E1000_MPC); } + +/* + * e1000_set_pcie_completion_timeout - set pci-e completion timeout + * @hw: pointer to the HW structure + * + * The defaults for 82575 and 82576 should be in the range of 50us to 50ms, + * however the hardware default for these parts is 500us to 1ms which is less + * than the 10ms recommended by the pci-e spec. To address this we need to + * increase the value to either 10ms to 200ms for capability version 1 config, + * or 16ms to 55ms for version 2. + */ +static s32 +e1000_set_pcie_completion_timeout(struct e1000_hw *hw) +{ + u32 gcr = E1000_READ_REG(hw, E1000_GCR); + s32 ret_val = E1000_SUCCESS; + u16 pcie_devctl2; + + /* only take action if timeout value is defaulted to 0 */ + if (gcr & E1000_GCR_CMPL_TMOUT_MASK) + goto out; + + /* + * if capababilities version is type 1 we can write the + * timeout of 10ms to 200ms through the GCR register + */ + if (!(gcr & E1000_GCR_CAP_VER2)) { + gcr |= E1000_GCR_CMPL_TMOUT_10ms; + goto out; + } + + /* + * for version 2 capabilities we need to write the config space + * directly in order to set the completion timeout value for + * 16ms to 55ms + */ + ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, + &pcie_devctl2); + if (ret_val) + goto out; + + pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; + + ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, + &pcie_devctl2); +out: + /* disable completion timeout resend */ + gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND; + + E1000_WRITE_REG(hw, E1000_GCR, gcr); + return (ret_val); +}
--- a/usr/src/uts/common/io/igb/igb_82575.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_82575.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.57 v2008-10-7 */ +/* IntelVersion: 1.77 v2-9-8_2009-6-12 */ #ifndef _IGB_82575_H #define _IGB_82575_H @@ -129,6 +129,7 @@ #define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION 0x06000000 #define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000 #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 +#define E1000_SRRCTL_DROP_EN 0x80000000 #define E1000_SRRCTL_BSIZEPKT_MASK 0x0000007F #define E1000_SRRCTL_BSIZEHDR_MASK 0x00003F00 @@ -138,6 +139,7 @@ #define E1000_MRQC_ENABLE_RSS_4Q 0x00000002 #define E1000_MRQC_ENABLE_VMDQ 0x00000003 +#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x80000000 #define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 @@ -214,7 +216,7 @@ } wb; /* writeback */ }; -#define E1000_RXDADV_RSSTYPE_MASK 0x0000F000 +#define E1000_RXDADV_RSSTYPE_MASK 0x0000000F #define E1000_RXDADV_RSSTYPE_SHIFT 12 #define E1000_RXDADV_HDRBUFLEN_MASK 0x7FE0 #define E1000_RXDADV_HDRBUFLEN_SHIFT 5 @@ -316,6 +318,7 @@ #define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ #define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ +#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ #define E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ /* IPSec Encrypt Enable for ESP */ #define E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000 @@ -358,11 +361,10 @@ #define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ #define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */ -/* Additional DCA related definitions, note change in position of CPUID */ #define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */ #define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */ -#define E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */ -#define E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */ +#define E1000_DCA_TXCTRL_CPUID_SHIFT_82576 24 /* Tx CPUID */ +#define E1000_DCA_RXCTRL_CPUID_SHIFT_82576 24 /* Rx CPUID */ /* Additional interrupt register bit definitions */ #define E1000_ICR_LSECPNS 0x00000020 /* PN threshold - server */ @@ -387,10 +389,14 @@ #define E1000_NVM_APME_82575 0x0400 #define MAX_NUM_VFS 8 -#define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof control */ -#define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof control */ +/* Per VF MAC spoof control */ +#define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF +/* Per VF VLAN spoof control */ +#define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 #define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */ -#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31) /* global VF LB enable */ +#define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8 +#define E1000_DTXSWC_LLE_SHIFT 16 +#define E1000_DTXSWC_VMDQ_LOOPBACK_EN ((u32)1 << 31) /* global VF LB enable */ /* Easy defines for setting default pool, would normally be left a zero */ #define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7 @@ -402,84 +408,35 @@ #define E1000_VT_CTL_VM_REPL_EN (1 << 30) /* Per VM Offload register setup */ +#define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */ #define E1000_VMOLR_LPE 0x00010000 /* Accept Long packet */ +#define E1000_VMOLR_RSSE 0x00020000 /* Enable RSS */ #define E1000_VMOLR_AUPE 0x01000000 /* Accept untagged packets */ +#define E1000_VMOLR_ROMPE 0x02000000 /* Accept overflow multicast */ +#define E1000_VMOLR_ROPE 0x04000000 /* Accept overflow unicast */ #define E1000_VMOLR_BAM 0x08000000 /* Accept Broadcast packets */ #define E1000_VMOLR_MPME 0x10000000 /* Multicast promiscuous mode */ #define E1000_VMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ - -#define E1000_V2PMAILBOX_REQ 0x00000001 /* Request for PF Ready bit */ -#define E1000_V2PMAILBOX_ACK 0x00000002 /* Ack PF message received */ -#define E1000_V2PMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ -#define E1000_V2PMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ -#define E1000_V2PMAILBOX_PFSTS 0x00000010 /* PF wrote a message in the MB */ -#define E1000_V2PMAILBOX_PFACK 0x00000020 /* PF ack the previous VF msg */ -#define E1000_V2PMAILBOX_RSTI 0x00000040 /* PF has reset indication */ - -#define E1000_P2VMAILBOX_STS 0x00000001 /* Initiate message send to VF */ -#define E1000_P2VMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ -#define E1000_P2VMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ -#define E1000_P2VMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ -#define E1000_P2VMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ - -#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ +#define E1000_VMOLR_STRCRC 0x80000000 /* CRC stripping enable */ -/* - * If it's a E1000_VF_* msg then it originates in the VF and is sent to the - * PF. The reverse is true if it is E1000_PF_*. - * Message ACK's are the value or'd with 0xF0000000 - */ -/* Messages below or'd with this are the ACK */ -#define E1000_VT_MSGTYPE_ACK 0xF0000000 -/* Messages below or'd with this are the NACK */ -#define E1000_VT_MSGTYPE_NACK 0xFF000000 -#define E1000_VT_MSGINFO_SHIFT 16 -/* bits 23:16 are used for exra info for certain messages */ -#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT) - -#define E1000_VF_MSGTYPE_REQ_MAC 1 /* VF needs to know its MAC */ -#define E1000_VF_MSGTYPE_VFLR 2 /* VF notifies VFLR to PF */ -#define E1000_VF_SET_MULTICAST 3 /* VF requests PF to set MC addr */ -#define E1000_VF_SET_VLAN 4 /* VF requests PF to set VLAN */ +#define E1000_VLVF_ARRAY_SIZE 32 +#define E1000_VLVF_VLANID_MASK 0x00000FFF +#define E1000_VLVF_POOLSEL_SHIFT 12 +#define E1000_VLVF_POOLSEL_MASK (0xFF << E1000_VLVF_POOLSEL_SHIFT) +#define E1000_VLVF_LVLAN 0x00100000 +#define E1000_VLVF_VLANID_ENABLE 0x80000000 -/* - * Add 100h to all PF msgs, leaves room for up to 255 discrete message types - * from VF to PF - way more than we'll ever need - */ -/* PF notifies global reset imminent to VF */ -#define E1000_PF_MSGTYPE_RESET (1 + 0x100) -/* PF notifies VF of LSC... VF will see extra msg info for status */ -#define E1000_PF_MSGTYPE_LSC (2 + 0x100) +#define E1000_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ -#define E1000_PF_MSG_LSCDOWN (1 << E1000_VT_MSGINFO_SHIFT) -#define E1000_PF_MSG_LSCUP (2 << E1000_VT_MSGINFO_SHIFT) - -#define ALL_QUEUES 0xFFFF +#define E1000_IOVCTL 0x05BBC +#define E1000_IOVCTL_REUSE_VFQ 0x00000001 -s32 e1000_send_mail_to_pf_vf(struct e1000_hw *hw, u32 *msg, - s16 size); -s32 e1000_receive_mail_from_pf_vf(struct e1000_hw *hw, - u32 *msg, s16 size); -s32 e1000_send_mail_to_vf(struct e1000_hw *hw, u32 *msg, - u32 vf_number, s16 size); -s32 e1000_receive_mail_from_vf(struct e1000_hw *hw, u32 *msg, - u32 vf_number, s16 size); -void e1000_vmdq_loopback_enable_vf(struct e1000_hw *hw); -void e1000_vmdq_loopback_disable_vf(struct e1000_hw *hw); -void e1000_vmdq_replication_enable_vf(struct e1000_hw *hw, u32 enables); -void e1000_vmdq_replication_disable_vf(struct e1000_hw *hw); -void e1000_vmdq_enable_replication_mode_vf(struct e1000_hw *hw); -void e1000_vmdq_broadcast_replication_enable_vf(struct e1000_hw *hw, - u32 enables); -void e1000_vmdq_multicast_replication_enable_vf(struct e1000_hw *hw, - u32 enables); -void e1000_vmdq_broadcast_replication_disable_vf(struct e1000_hw *hw, - u32 disables); -void e1000_vmdq_multicast_replication_disable_vf(struct e1000_hw *hw, - u32 disables); -bool e1000_check_for_pf_ack_vf(struct e1000_hw *hw); -bool e1000_check_for_pf_mail_vf(struct e1000_hw *hw, u32*); +#define E1000_RPLOLR_STRVLAN 0x40000000 +#define ALL_QUEUES 0xFFFF + +void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable); +void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable); #ifdef __cplusplus }
--- a/usr/src/uts/common/io/igb/igb_api.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_api.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.107 v2008-10-7 */ +/* IntelVersion: 1.122 v2-9-8_2009-6-12 */ #include "igb_api.h" @@ -138,6 +138,8 @@ case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_NS: + case E1000_DEV_ID_82576_SERDES_QUAD: mac->type = e1000_82576; break; default: @@ -278,26 +280,17 @@ * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program - * @rar_used_count: the first RAR register free to program - * @rar_count: total number of supported Receive Address Registers * - * Updates the Receive Address Registers and Multicast Table Array. + * Updates the Multicast Table Array. * The caller must have a packed mc_addr_list of multicast addresses. - * The parameter rar_count will usually be hw->mac.rar_entry_count - * unless there are workarounds that change this. Currently no func pointer - * exists and all implementations are handled in the generic version of this - * function. */ void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, - u32 mc_addr_count, u32 rar_used_count, u32 rar_count) + u32 mc_addr_count) { if (hw->mac.ops.update_mc_addr_list) hw->mac.ops.update_mc_addr_list(hw, - mc_addr_list, - mc_addr_count, - rar_used_count, - rar_count); + mc_addr_list, mc_addr_count); } /* @@ -480,6 +473,22 @@ } /* + * e1000_id_led_init - store LED configurations in SW + * @hw: pointer to the HW structure + * + * Initializes the LED config in SW. This is a function pointer entry point + * called by drivers. + */ +s32 +e1000_id_led_init(struct e1000_hw *hw) +{ + if (hw->mac.ops.id_led_init) + return (hw->mac.ops.id_led_init(hw)); + + return (E1000_SUCCESS); +} + +/* * e1000_led_on - Turn on SW controllable LED * @hw: pointer to the HW structure *
--- a/usr/src/uts/common/io/igb/igb_api.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_api.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.50 v2008-10-7 */ +/* IntelVersion: 1.52 v2-9-8_2009-6-12 */ #ifndef _IGB_API_H #define _IGB_API_H @@ -62,14 +62,14 @@ void e1000_mta_set(struct e1000_hw *hw, u32 hash_value); u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr); void e1000_update_mc_addr_list(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count, - u32 rar_used_count, u32 rar_count); + u8 *mc_addr_list, u32 mc_addr_count); s32 e1000_setup_led(struct e1000_hw *hw); s32 e1000_cleanup_led(struct e1000_hw *hw); s32 e1000_check_reset_block(struct e1000_hw *hw); s32 e1000_blink_led(struct e1000_hw *hw); s32 e1000_led_on(struct e1000_hw *hw); s32 e1000_led_off(struct e1000_hw *hw); +s32 e1000_id_led_init(struct e1000_hw *hw); void e1000_reset_adaptive(struct e1000_hw *hw); void e1000_update_adaptive(struct e1000_hw *hw); s32 e1000_get_cable_length(struct e1000_hw *hw);
--- a/usr/src/uts/common/io/igb/igb_defines.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_defines.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.79 v2008-10-7 */ +/* IntelVersion: 1.111 v2-9-8_2009-6-12 */ #ifndef _IGB_DEFINES_H #define _IGB_DEFINES_H @@ -131,6 +131,8 @@ #define E1000_CTRL_EXT_PFRSTD 0x00004000 #define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ #define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ +/* DMA Dynamic Clock Gating */ +#define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 #define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 #define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 #define E1000_CTRL_EXT_LINK_MODE_TBI 0x00C00000 @@ -382,6 +384,7 @@ #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ #define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ #define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ #define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ #define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */ @@ -458,6 +461,7 @@ #define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion by NVM */ #define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */ /* Change in Dock/Undock state. Clear on write '0'. */ +#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ #define E1000_STATUS_DOCK_CI 0x00000800 #define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master request status */ #define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */ @@ -667,14 +671,19 @@ /* PBA constants */ #define E1000_PBA_6K 0x0006 /* 6KB */ #define E1000_PBA_8K 0x0008 /* 8KB */ +#define E1000_PBA_10K 0x000A /* 10KB */ #define E1000_PBA_12K 0x000C /* 12KB */ +#define E1000_PBA_14K 0x000E /* 14KB */ #define E1000_PBA_16K 0x0010 /* 16KB */ +#define E1000_PBA_18K 0x0012 #define E1000_PBA_20K 0x0014 #define E1000_PBA_22K 0x0016 #define E1000_PBA_24K 0x0018 +#define E1000_PBA_26K 0x001A #define E1000_PBA_30K 0x001E #define E1000_PBA_32K 0x0020 #define E1000_PBA_34K 0x0022 +#define E1000_PBA_35K 0x0023 #define E1000_PBA_38K 0x0026 #define E1000_PBA_40K 0x0028 #define E1000_PBA_48K 0x0030 /* 48KB */ @@ -695,6 +704,9 @@ #define E1000_SWSM_WMNG 0x00000004 /* Wake MNG Clock */ #define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */ +/* Secondary driver semaphore bit */ +#define E1000_SWSM2_LOCK 0x00000002 + /* Interrupt Cause Read */ #define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ #define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ @@ -871,6 +883,8 @@ #define E1000_EICS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ #define E1000_EICS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ +#define E1000_EITR_ITR_INT_MASK 0x0000FFFF + /* Transmit Descriptor Control */ #define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ #define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */ @@ -901,6 +915,10 @@ */ #define E1000_RAR_ENTRIES 15 #define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ +#define E1000_RAL_MAC_ADDR_LEN 4 +#define E1000_RAH_MAC_ADDR_LEN 2 +#define E1000_RAH_POOL_MASK 0x03FC0000 +#define E1000_RAH_POOL_1 0x00040000 /* Error Codes */ #define E1000_SUCCESS 0 @@ -916,6 +934,7 @@ #define E1000_BLK_PHY_RESET 12 #define E1000_ERR_SWFW_SYNC 13 #define E1000_NOT_IMPLEMENTED 14 +#define E1000_ERR_MBX 15 /* Loop limit on how long we wait for auto-negotiation to complete */ #define FIBER_LINK_UP_LIMIT 50 @@ -965,6 +984,10 @@ #define E1000_GCR_TXD_NO_SNOOP 0x00000008 #define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 #define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 +#define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000 +#define E1000_GCR_CMPL_TMOUT_10ms 0x00001000 +#define E1000_GCR_CMPL_TMOUT_RESEND 0x00010000 +#define E1000_GCR_CAP_VER2 0x00040000 #define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ E1000_GCR_RXDSCW_NO_SNOOP | \ @@ -1080,6 +1103,8 @@ #define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ #define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ +#define PHY_CONTROL_LB 0x4000 /* PHY Loopback bit */ + /* NVM Control */ #define E1000_EECD_SK 0x00000001 /* NVM Clock */ #define E1000_EECD_CS 0x00000002 /* NVM Chip Select */ @@ -1110,6 +1135,7 @@ #define E1000_EECD_SHADV 0x00200000 /* Shadow RAM Data Valid */ #define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ #define E1000_EECD_SECVAL_SHIFT 22 +#define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES) #define E1000_NVM_SWDPIN0 0x0001 /* SWDPIN 0 NVM Value */ #define E1000_NVM_LED_LOGIC 0x0020 /* Led Logic Word */ @@ -1213,22 +1239,14 @@ #define IGP_LED3_MODE 0x07000000 /* PCI/PCI-X/PCI-EX Config space */ -#define PCIX_COMMAND_REGISTER 0xE6 -#define PCIX_STATUS_REGISTER_LO 0xE8 -#define PCIX_STATUS_REGISTER_HI 0xEA #define PCI_HEADER_TYPE_REGISTER 0x0E #define PCIE_LINK_STATUS 0x12 +#define PCIE_DEVICE_CONTROL2 0x28 -#define PCIX_COMMAND_MMRBC_MASK 0x000C -#define PCIX_COMMAND_MMRBC_SHIFT 0x2 -#define PCIX_STATUS_HI_MMRBC_MASK 0x0060 -#define PCIX_STATUS_HI_MMRBC_SHIFT 0x5 -#define PCIX_STATUS_HI_MMRBC_4K 0x3 -#define PCIX_STATUS_HI_MMRBC_2K 0x2 -#define PCIX_STATUS_LO_FUNC_MASK 0x7 #define PCI_HEADER_TYPE_MULTIFUNC 0x80 #define PCIE_LINK_WIDTH_MASK 0x3F0 #define PCIE_LINK_WIDTH_SHIFT 4 +#define PCIE_DEVICE_CONTROL2_16ms 0x0005 #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6
--- a/usr/src/uts/common/io/igb/igb_hw.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_hw.h Mon Aug 17 09:14:35 2009 -0400 @@ -45,6 +45,8 @@ #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 #define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_NS 0x150A +#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D #define E1000_DEV_ID_82575EB_COPPER 0x10A7 #define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 #define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 @@ -57,6 +59,8 @@ #define E1000_FUNC_0 0 #define E1000_FUNC_1 1 +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 enum e1000_mac_type { e1000_undefined = 0, @@ -166,6 +170,13 @@ e1000_smart_speed_off }; +enum e1000_serdes_link_state { + e1000_serdes_link_down = 0, + e1000_serdes_link_autoneg_progress, + e1000_serdes_link_autoneg_complete, + e1000_serdes_link_forced_up +}; + /* Receive Descriptor */ struct e1000_rx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ @@ -436,6 +447,7 @@ struct e1000_mac_operations { /* Function pointers for the MAC. */ s32 (*init_params)(struct e1000_hw *); + s32 (*id_led_init)(struct e1000_hw *); s32 (*blink_led)(struct e1000_hw *); s32 (*check_for_link)(struct e1000_hw *); bool (*check_mng_mode)(struct e1000_hw *hw); @@ -443,10 +455,11 @@ void (*clear_hw_cntrs)(struct e1000_hw *); void (*clear_vfta)(struct e1000_hw *); s32 (*get_bus_info)(struct e1000_hw *); + void (*set_lan_id)(struct e1000_hw *); s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); s32 (*led_on)(struct e1000_hw *); s32 (*led_off)(struct e1000_hw *); - void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32, u32, u32); + void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); s32 (*reset_hw)(struct e1000_hw *); s32 (*init_hw)(struct e1000_hw *); void (*shutdown_serdes)(struct e1000_hw *); @@ -519,6 +532,10 @@ u16 ifs_ratio; u16 ifs_step_size; u16 mta_reg_count; + + /* Maximum size of the MTA register table in all supported adapters */ +#define MAX_MTA_REG 128 + u32 mta_shadow[MAX_MTA_REG]; u16 rar_entry_count; u8 forced_speed_duplex; @@ -530,6 +547,7 @@ bool autoneg_failed; bool get_link_status; bool in_ifs_mode; + enum e1000_serdes_link_state serdes_link_state; bool serdes_has_link; bool tx_pkt_filtering; }; @@ -604,10 +622,12 @@ struct e1000_dev_spec_82575 { bool sgmii_active; + bool global_device_reset; }; struct e1000_dev_spec_vf { u32 vf_number; + u32 v2p_mailbox; }; struct e1000_hw { @@ -641,8 +661,7 @@ /* These functions must be implemented by drivers */ s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); -void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); -void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); +s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); #ifdef __cplusplus }
--- a/usr/src/uts/common/io/igb/igb_mac.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_mac.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.92 v2008-10-7 */ +/* IntelVersion: 1.104 v2-9-8_2009-6-12 */ #include "igb_api.h" @@ -34,6 +34,7 @@ static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw); static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw); static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw); +static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw); /* * e1000_init_mac_ops_generic - Initialize MAC function pointers @@ -53,6 +54,7 @@ mac->ops.reset_hw = e1000_null_ops_generic; mac->ops.setup_physical_interface = e1000_null_ops_generic; mac->ops.get_bus_info = e1000_null_ops_generic; + mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie; mac->ops.read_mac_addr = e1000_read_mac_addr_generic; mac->ops.config_collision_dist = e1000_config_collision_dist_generic; mac->ops.clear_hw_cntrs = e1000_null_mac_generic; @@ -133,10 +135,10 @@ * @hw: pointer to the HW structure */ void -e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a, u32 b, u32 c) +e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a) { DEBUGFUNC("e1000_null_update_mc"); - UNREFERENCED_5PARAMETER(hw, h, a, b, c); + UNREFERENCED_3PARAMETER(hw, h, a); } /* @@ -173,67 +175,6 @@ } /* - * e1000_get_bus_info_pci_generic - Get PCI(x) bus information - * @hw: pointer to the HW structure - * - * Determines and stores the system bus information for a particular - * network interface. The following bus information is determined and stored: - * bus speed, bus width, type (PCI/PCIx), and PCI(-x) function. - */ -s32 -e1000_get_bus_info_pci_generic(struct e1000_hw *hw) -{ - struct e1000_bus_info *bus = &hw->bus; - u32 status = E1000_READ_REG(hw, E1000_STATUS); - s32 ret_val = E1000_SUCCESS; - u16 pci_header_type; - - DEBUGFUNC("e1000_get_bus_info_pci_generic"); - - /* PCI or PCI-X? */ - bus->type = (status & E1000_STATUS_PCIX_MODE) - ? e1000_bus_type_pcix - : e1000_bus_type_pci; - - /* Bus speed */ - if (bus->type == e1000_bus_type_pci) { - bus->speed = (status & E1000_STATUS_PCI66) - ? e1000_bus_speed_66 - : e1000_bus_speed_33; - } else { - switch (status & E1000_STATUS_PCIX_SPEED) { - case E1000_STATUS_PCIX_SPEED_66: - bus->speed = e1000_bus_speed_66; - break; - case E1000_STATUS_PCIX_SPEED_100: - bus->speed = e1000_bus_speed_100; - break; - case E1000_STATUS_PCIX_SPEED_133: - bus->speed = e1000_bus_speed_133; - break; - default: - bus->speed = e1000_bus_speed_reserved; - break; - } - } - - /* Bus width */ - bus->width = (status & E1000_STATUS_BUS64) - ? e1000_bus_width_64 - : e1000_bus_width_32; - - /* Which PCI(-X) function? */ - e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type); - if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC) - bus->func = (status & E1000_STATUS_FUNC_MASK) - >> E1000_STATUS_FUNC_SHIFT; - else - bus->func = 0; - - return (ret_val); -} - -/* * e1000_get_bus_info_pcie_generic - Get PCIe bus information * @hw: pointer to the HW structure * @@ -244,10 +185,10 @@ s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw) { + struct e1000_mac_info *mac = &hw->mac; struct e1000_bus_info *bus = &hw->bus; s32 ret_val; - u32 status; - u16 pcie_link_status, pci_header_type; + u16 pcie_link_status; DEBUGFUNC("e1000_get_bus_info_pcie_generic"); @@ -262,19 +203,48 @@ bus->width = (enum e1000_bus_width)((pcie_link_status & PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT); - e1000_read_pci_cfg(hw, PCI_HEADER_TYPE_REGISTER, &pci_header_type); - if (pci_header_type & PCI_HEADER_TYPE_MULTIFUNC) { - status = E1000_READ_REG(hw, E1000_STATUS); - bus->func = (status & E1000_STATUS_FUNC_MASK) - >> E1000_STATUS_FUNC_SHIFT; - } else { - bus->func = 0; - } + mac->ops.set_lan_id(hw); return (E1000_SUCCESS); } /* + * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices + * + * @hw: pointer to the HW structure + * + * Determines the LAN function id by reading memory-mapped registers + * and swaps the port value if requested. + */ +static void +e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) +{ + struct e1000_bus_info *bus = &hw->bus; + u32 reg; + + /* + * The status register reports the correct function number + * for the device regardless of function swap state. + */ + reg = E1000_READ_REG(hw, E1000_STATUS); + bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; +} + +/* + * e1000_set_lan_id_single_port - Set LAN id for a single port device + * @hw: pointer to the HW structure + * + * Sets the LAN function id to zero for a single port device. + */ +void +e1000_set_lan_id_single_port(struct e1000_hw *hw) +{ + struct e1000_bus_info *bus = &hw->bus; + + bus->func = 0; +} + +/* * e1000_clear_vfta_generic - Clear VLAN filter table * @hw: pointer to the HW structure * @@ -325,6 +295,7 @@ e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count) { u32 i; + u8 mac_addr[ETH_ADDR_LEN] = {0}; DEBUGFUNC("e1000_init_rx_addrs_generic"); @@ -335,12 +306,8 @@ /* Zero out the other (rar_entry_count - 1) receive addresses */ DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1); - for (i = 1; i < rar_count; i++) { - E1000_WRITE_REG_ARRAY(hw, E1000_RA, (i << 1), 0); - E1000_WRITE_FLUSH(hw); - E1000_WRITE_REG_ARRAY(hw, E1000_RA, ((i << 1) + 1), 0); - E1000_WRITE_FLUSH(hw); - } + for (i = 1; i < rar_count; i++) + hw->mac.ops.rar_set(hw, mac_addr, i); } /* @@ -350,9 +317,10 @@ * Checks the nvm for an alternate MAC address. An alternate MAC address * can be setup by pre-boot software and must be treated like a permanent * address and must override the actual permanent MAC address. If an - * alternate MAC address is found it is saved in the hw struct and - * programmed into RAR0 and the function returns success, otherwise the - * function returns an error. + * alternate MAC address is found it is programmed into RAR0, replacing + * the permanent address that was installed into RAR0 by the Si on reset. + * This function will return SUCCESS unless it encounters an error while + * reading the EEPROM. */ s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) @@ -372,13 +340,12 @@ } if (nvm_alt_mac_addr_offset == 0xFFFF) { - ret_val = -(E1000_NOT_IMPLEMENTED); + /* There is no Alternate MAC Address */ goto out; } if (hw->bus.func == E1000_FUNC_1) - nvm_alt_mac_addr_offset += ETH_ADDR_LEN / sizeof (u16); - + nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1; for (i = 0; i < ETH_ADDR_LEN; i += 2) { offset = nvm_alt_mac_addr_offset + (i >> 1); ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); @@ -393,14 +360,16 @@ /* if multicast bit is set, the alternate address will not be used */ if (alt_mac_addr[0] & 0x01) { - ret_val = -(E1000_NOT_IMPLEMENTED); + DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n"); goto out; } - for (i = 0; i < ETH_ADDR_LEN; i++) - hw->mac.addr[i] = hw->mac.perm_addr[i] = alt_mac_addr[i]; - - hw->mac.ops.rar_set(hw, hw->mac.perm_addr, 0); + /* + * We have a valid alternate MAC address, and we want to treat it the + * same as the normal permanent MAC address stored by the HW into the + * RAR. Do this by mapping this address into RAR0. + */ + hw->mac.ops.rar_set(hw, alt_mac_addr, 0); out: return (ret_val); @@ -436,8 +405,15 @@ if (rar_low || rar_high) rar_high |= E1000_RAH_AV; + /* + * Some bridges will combine consecutive 32-bit writes into + * a single burst write, which will malfunction on some parts. + * The flushes avoid this. + */ E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); + E1000_WRITE_FLUSH(hw); E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); + E1000_WRITE_FLUSH(hw); } /* @@ -482,56 +458,37 @@ * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program - * @rar_used_count: the first RAR register free to program - * @rar_count: total number of supported Receive Address Registers * - * Updates the Receive Address Registers and Multicast Table Array. + * Updates the Multicast Table Array. * The caller must have a packed mc_addr_list of multicast addresses. - * The parameter rar_count will usually be hw->mac.rar_entry_count - * unless there are workarounds that change this. */ void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count, - u32 rar_used_count, u32 rar_count) + u8 *mc_addr_list, u32 mc_addr_count) { - u32 hash_value; - u32 i; + u32 hash_value, hash_bit, hash_reg; + int i; DEBUGFUNC("e1000_update_mc_addr_list_generic"); - /* - * Load the first set of multicast addresses into the exact - * filters (RAR). If there are not enough to fill the RAR - * array, clear the filters. - */ - for (i = rar_used_count; i < rar_count; i++) { - if (mc_addr_count) { - hw->mac.ops.rar_set(hw, mc_addr_list, i); - mc_addr_count--; - mc_addr_list += ETH_ADDR_LEN; - } else { - E1000_WRITE_REG_ARRAY(hw, E1000_RA, i << 1, 0); - E1000_WRITE_FLUSH(hw); - E1000_WRITE_REG_ARRAY(hw, E1000_RA, (i << 1) + 1, 0); - E1000_WRITE_FLUSH(hw); - } + /* clear mta_shadow */ + memset(&hw->mac.mta_shadow, 0, sizeof (hw->mac.mta_shadow)); + + /* update mta_shadow from mc_addr_list */ + for (i = 0; (u32) i < mc_addr_count; i++) { + hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list); + + hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); + hash_bit = hash_value & 0x1F; + + hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); + mc_addr_list += (ETH_ADDR_LEN); } - /* Clear the old settings from the MTA */ - DEBUGOUT("Clearing MTA\n"); - for (i = 0; i < hw->mac.mta_reg_count; i++) { - E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); - E1000_WRITE_FLUSH(hw); - } - - /* Load any remaining multicast addresses into the hash table. */ - for (; mc_addr_count > 0; mc_addr_count--) { - hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list); - DEBUGOUT1("Hash value = 0x%03X\n", hash_value); - hw->mac.ops.mta_set(hw, hash_value); - mc_addr_list += ETH_ADDR_LEN; - } + /* replace the entire MTA table */ + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) + E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); + E1000_WRITE_FLUSH(hw); } /* @@ -609,44 +566,6 @@ } /* - * e1000_pcix_mmrbc_workaround_generic - Fix incorrect MMRBC value - * @hw: pointer to the HW structure - * - * In certain situations, a system BIOS may report that the PCIx maximum - * memory read byte count (MMRBC) value is higher than than the actual - * value. We check the PCIx command register with the current PCIx status - * register. - */ -void -e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw) -{ - u16 cmd_mmrbc; - u16 pcix_cmd; - u16 pcix_stat_hi_word; - u16 stat_mmrbc; - - DEBUGFUNC("e1000_pcix_mmrbc_workaround_generic"); - - /* Workaround for PCI-X issue when BIOS sets MMRBC incorrectly */ - if (hw->bus.type != e1000_bus_type_pcix) - return; - - e1000_read_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd); - e1000_read_pci_cfg(hw, PCIX_STATUS_REGISTER_HI, &pcix_stat_hi_word); - cmd_mmrbc = (pcix_cmd & PCIX_COMMAND_MMRBC_MASK) >> - PCIX_COMMAND_MMRBC_SHIFT; - stat_mmrbc = (pcix_stat_hi_word & PCIX_STATUS_HI_MMRBC_MASK) >> - PCIX_STATUS_HI_MMRBC_SHIFT; - if (stat_mmrbc == PCIX_STATUS_HI_MMRBC_4K) - stat_mmrbc = PCIX_STATUS_HI_MMRBC_2K; - if (cmd_mmrbc > stat_mmrbc) { - pcix_cmd &= ~PCIX_COMMAND_MMRBC_MASK; - pcix_cmd |= stat_mmrbc << PCIX_COMMAND_MMRBC_SHIFT; - e1000_write_pci_cfg(hw, PCIX_COMMAND_REGISTER, &pcix_cmd); - } -} - -/* * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters * @hw: pointer to the HW structure *
--- a/usr/src/uts/common/io/igb/igb_mac.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_mac.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.29 v2008-10-7 */ +/* IntelVersion: 1.32 v2-9-8_2009-6-12 */ #ifndef _IGB_MAC_H #define _IGB_MAC_H @@ -44,7 +44,7 @@ s32 e1000_null_ops_generic(struct e1000_hw *hw); s32 e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d); bool e1000_null_mng_mode(struct e1000_hw *hw); -void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a, u32 b, u32 c); +void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a); void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b); void e1000_null_mta_set(struct e1000_hw *hw, u32 a); void e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a); @@ -57,8 +57,8 @@ s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw); s32 e1000_force_mac_fc_generic(struct e1000_hw *hw); s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw); -s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw); s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); +void e1000_set_lan_id_single_port(struct e1000_hw *hw); s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex); @@ -68,8 +68,7 @@ s32 e1000_led_on_generic(struct e1000_hw *hw); s32 e1000_led_off_generic(struct e1000_hw *hw); void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count, - u32 rar_used_count, u32 rar_count); + u8 *mc_addr_list, u32 mc_addr_count); s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw); s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw); s32 e1000_setup_led_generic(struct e1000_hw *hw);
--- a/usr/src/uts/common/io/igb/igb_main.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_main.c Mon Aug 17 09:14:35 2009 -0400 @@ -29,7 +29,7 @@ #include "igb_sw.h" static char ident[] = "Intel 1Gb Ethernet"; -static char igb_version[] = "igb 1.1.7"; +static char igb_version[] = "igb 1.1.8"; /* * Local function protoypes @@ -2584,8 +2584,7 @@ /* * Update the multicase addresses to the MTA registers */ - e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count, - igb->unicst_total, hw->mac.rar_entry_count); + e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count); } /* @@ -2680,7 +2679,7 @@ igb->rx_ring_size = igb_get_prop(igb, PROP_RX_RING_SIZE, MIN_RX_RING_SIZE, MAX_RX_RING_SIZE, DEFAULT_RX_RING_SIZE); - igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 1); + igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 0); igb->num_rx_groups = igb_get_prop(igb, PROP_RX_GROUP_NUM, MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM); /*
--- a/usr/src/uts/common/io/igb/igb_manage.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_manage.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.27 v2008-10-7 */ +/* IntelVersion: 1.27 v2-9-8_2009-6-12 */ #include "igb_api.h"
--- a/usr/src/uts/common/io/igb/igb_manage.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_manage.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.18 v2008-10-7 */ +/* IntelVersion: 1.18 v2-9-8_2009-6-12 */ #ifndef _IGB_MANAGE_H #define _IGB_MANAGE_H
--- a/usr/src/uts/common/io/igb/igb_nvm.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_nvm.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.46 v2008-10-7 */ +/* IntelVersion: 1.49 v2-9-8_2009-6-12 */ #include "igb_api.h" @@ -771,31 +771,23 @@ s32 e1000_read_mac_addr_generic(struct e1000_hw *hw) { - s32 ret_val = E1000_SUCCESS; - u16 offset, nvm_data, i; - - DEBUGFUNC("e1000_read_mac_addr"); + u32 rar_high; + u32 rar_low; + u16 i; - for (i = 0; i < ETH_ADDR_LEN; i += 2) { - offset = i >> 1; - ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); - if (ret_val) { - DEBUGOUT("NVM Read Error\n"); - goto out; - } - hw->mac.perm_addr[i] = (u8)(nvm_data & 0xFF); - hw->mac.perm_addr[i+1] = (u8)(nvm_data >> 8); - } + rar_high = E1000_READ_REG(hw, E1000_RAH(0)); + rar_low = E1000_READ_REG(hw, E1000_RAL(0)); - /* Flip last bit of mac address if we're on second port */ - if (hw->bus.func == E1000_FUNC_1) - hw->mac.perm_addr[5] ^= 1; + for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++) + hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8)); + + for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++) + hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8)); for (i = 0; i < ETH_ADDR_LEN; i++) hw->mac.addr[i] = hw->mac.perm_addr[i]; -out: - return (ret_val); + return (E1000_SUCCESS); } /*
--- a/usr/src/uts/common/io/igb/igb_nvm.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_nvm.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.18 v2008-10-7 */ +/* IntelVersion: 1.18 v2-9-8_2009-6-12 */ #ifndef _IGB_NVM_H #define _IGB_NVM_H
--- a/usr/src/uts/common/io/igb/igb_osdep.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_osdep.c Mon Aug 17 09:14:35 2009 -0400 @@ -31,22 +31,6 @@ void -e1000_pci_set_mwi(struct e1000_hw *hw) -{ - uint16_t val = hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE; - - e1000_write_pci_cfg(hw, PCI_COMMAND_REGISTER, &val); -} - -void -e1000_pci_clear_mwi(struct e1000_hw *hw) -{ - uint16_t val = hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE; - - e1000_write_pci_cfg(hw, PCI_COMMAND_REGISTER, &val); -} - -void e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) { pci_config_put16(OS_DEP(hw)->cfg_handle, reg, *value); @@ -61,25 +45,50 @@ /* * Return the 16-bit value from pci-e config space at offset reg into the pci-e - * capability block. + * capability block. Note that this refers to the pci-e capability block in + * standard pci config space, not the block in pci-e extended config space. */ int32_t e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) { - uint32_t pcie_cap = PCI_EX_CONF_CAP; /* default */ + uint8_t pcie_id = PCI_CAP_ID_PCI_E; + uint16_t pcie_cap; + int32_t status; - switch (hw->mac.type) { - case e1000_82575: - pcie_cap = 0xa0; - break; - case e1000_82576: - pcie_cap = 0xa0; - break; + /* locate the pci-e capability block */ + status = pci_lcap_locate((OS_DEP(hw))->cfg_handle, pcie_id, &pcie_cap); + if (status == DDI_SUCCESS) { + + /* read at given offset into block */ + *value = pci_config_get16(OS_DEP(hw)->cfg_handle, + (pcie_cap + reg)); } - *value = pci_config_get16(OS_DEP(hw)->cfg_handle, (pcie_cap + reg)); + return (status); +} - return (0); +/* + * Write the given 16-bit value to pci-e config space at offset reg into the + * pci-e capability block. Note that this refers to the pci-e capability block + * in standard pci config space, not the block in pci-e extended config space. + */ +int32_t +e1000_write_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value) +{ + uint8_t pcie_id = PCI_CAP_ID_PCI_E; + uint16_t pcie_cap; + int32_t status; + + /* locate the pci-e capability block */ + status = pci_lcap_locate(OS_DEP(hw)->cfg_handle, pcie_id, &pcie_cap); + if (status == DDI_SUCCESS) { + + /* write at given offset into block */ + pci_config_put16(OS_DEP(hw)->cfg_handle, + (off_t)(pcie_cap + reg), *value); + } + + return (status); } /*
--- a/usr/src/uts/common/io/igb/igb_osdep.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_osdep.h Mon Aug 17 09:14:35 2009 -0400 @@ -48,6 +48,7 @@ #include <sys/dditypes.h> #include <sys/sunddi.h> #include <sys/pci.h> +#include <sys/pci_cap.h> #include <sys/atomic.h> #include <sys/note.h> #include "igb_debug.h"
--- a/usr/src/uts/common/io/igb/igb_phy.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_phy.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.112 v2008-10-7 */ +/* IntelVersion: 1.140 v2-9-8_2009-6-12 */ #include "igb_api.h" @@ -1283,6 +1283,76 @@ } /* + * e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex + * @hw: pointer to the HW structure + * + * Forces the speed and duplex settings of the PHY. + * This is a function pointer entry point only called by + * PHY setup routines. + */ +s32 +e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_ife"); + + if (phy->type != e1000_phy_ife) { + ret_val = e1000_phy_force_speed_duplex_igp(hw); + goto out; + } + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data); + if (ret_val) + goto out; + + e1000_phy_force_speed_duplex_setup(hw, &data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data); + if (ret_val) + goto out; + + /* Disable MDI-X support for 10/100 */ + ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); + if (ret_val) + goto out; + + data &= ~IFE_PMC_AUTO_MDIX; + data &= ~IFE_PMC_FORCE_MDIX; + + ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data); + if (ret_val) + goto out; + + DEBUGOUT1("IFE PMC: %X\n", data); + + usec_delay(1); + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n"); + + ret_val = e1000_phy_has_link_generic(hw, + PHY_FORCE_LIMIT, 100000, &link); + if (ret_val) + goto out; + + if (!link) + DEBUGOUT("Link taking longer than expected.\n"); + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, + PHY_FORCE_LIMIT, 100000, &link); + if (ret_val) + goto out; + } + +out: + return (ret_val); +} +/* * e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex * @hw: pointer to the HW structure * @phy_ctrl: pointer to current value of PHY_CONTROL @@ -1566,6 +1636,41 @@ } /* + * e1000_check_polarity_ife - Check cable polarity for IFE PHY + * @hw: pointer to the HW structure + * + * Polarity is determined on the polarity reversal feature being enabled. + */ +s32 +e1000_check_polarity_ife(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, offset, mask; + + DEBUGFUNC("e1000_check_polarity_ife"); + + /* + * Polarity is determined based on the reversal feature being enabled. + */ + if (phy->polarity_correction) { + offset = IFE_PHY_EXTENDED_STATUS_CONTROL; + mask = IFE_PESC_POLARITY_REVERSED; + } else { + offset = IFE_PHY_SPECIAL_CONTROL; + mask = IFE_PSC_FORCE_POLARITY; + } + + ret_val = phy->ops.read_reg(hw, offset, &phy_data); + + if (!ret_val) + phy->cable_polarity = (phy_data & mask) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal; + + return (ret_val); +} +/* * e1000_wait_autoneg_generic - Wait for auto-neg completion * @hw: pointer to the HW structure * @@ -1631,9 +1736,16 @@ * it across the board. */ ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); - if (ret_val) - break; - ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) { + /* + * If the first read fails, another entity may have + * ownership of the resources, wait and try again to + * see if they have relinquished the resources yet. + */ + usec_delay(usec_interval); + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, + &phy_status); + } if (ret_val) break; if (phy_status & MII_SR_LINK_STATUS) @@ -1680,14 +1792,15 @@ index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> M88E1000_PSSR_CABLE_LENGTH_SHIFT; if (index < (M88E1000_CABLE_LENGTH_TABLE_SIZE + 1)) { - phy->min_cable_length = e1000_m88_cable_length_table[index]; - phy->max_cable_length = e1000_m88_cable_length_table[index+1]; + ret_val = E1000_ERR_PHY; + goto out; + } - phy->cable_length = (phy->min_cable_length + - phy->max_cable_length) / 2; - } else { - ret_val = E1000_ERR_PHY; - } + phy->min_cable_length = e1000_m88_cable_length_table[index]; + phy->max_cable_length = e1000_m88_cable_length_table[index+1]; + + phy->cable_length = (phy->min_cable_length + + phy->max_cable_length) / 2; out: return (ret_val); @@ -2140,6 +2253,48 @@ } /* + * e1000_determine_phy_address - Determines PHY address. + * @hw: pointer to the HW structure + * + * This uses a trial and error method to loop through possible PHY + * addresses. It tests each by reading the PHY ID registers and + * checking for a match. + */ +s32 +e1000_determine_phy_address(struct e1000_hw *hw) +{ + s32 ret_val = -E1000_ERR_PHY_TYPE; + u32 phy_addr = 0; + u32 i; + enum e1000_phy_type phy_type = e1000_phy_unknown; + + hw->phy.id = phy_type; + + for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) { + hw->phy.addr = phy_addr; + i = 0; + + do { + e1000_get_phy_id(hw); + phy_type = e1000_get_phy_type_from_id(hw->phy.id); + + /* + * If phy_type is valid, break - we found our + * PHY address + */ + if (phy_type != e1000_phy_unknown) { + ret_val = E1000_SUCCESS; + goto out; + } + msec_delay(1); + i++; + } while (i < 10); + } + +out: + return (ret_val); +} +/* * e1000_power_up_phy_copper - Restore copper link in case of PHY power down * @hw: pointer to the HW structure *
--- a/usr/src/uts/common/io/igb/igb_phy.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_phy.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.55 v2008-10-7 */ +/* IntelVersion: 1.69 v2-9-8_2009-6-12 */ #ifndef _IGB_PHY_H #define _IGB_PHY_H @@ -43,12 +43,14 @@ s32 e1000_check_downshift_generic(struct e1000_hw *hw); s32 e1000_check_polarity_m88(struct e1000_hw *hw); s32 e1000_check_polarity_igp(struct e1000_hw *hw); +s32 e1000_check_polarity_ife(struct e1000_hw *hw); s32 e1000_check_reset_block_generic(struct e1000_hw *hw); s32 e1000_copper_link_autoneg(struct e1000_hw *hw); s32 e1000_copper_link_setup_igp(struct e1000_hw *hw); s32 e1000_copper_link_setup_m88(struct e1000_hw *hw); s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw); s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw); s32 e1000_get_cable_length_m88(struct e1000_hw *hw); s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw); s32 e1000_get_cfg_done_generic(struct e1000_hw *hw); @@ -73,6 +75,7 @@ u32 usec_interval, bool *success); s32 e1000_phy_init_script_igp3(struct e1000_hw *hw); enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id); +s32 e1000_determine_phy_address(struct e1000_hw *hw); void e1000_power_up_phy_copper(struct e1000_hw *hw); void e1000_power_down_phy_copper(struct e1000_hw *hw); s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); @@ -112,7 +115,7 @@ #define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000 #define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002 -#define IGP01E1000_PSSR_MDIX 0x0008 +#define IGP01E1000_PSSR_MDIX 0x0800 #define IGP01E1000_PSSR_SPEED_MASK 0xC000 #define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000 @@ -135,6 +138,8 @@ #define E1000_KMRNCTRLSTA_OFFSET_SHIFT 16 #define E1000_KMRNCTRLSTA_REN 0x00200000 #define E1000_KMRNCTRLSTA_DIAG_OFFSET 0x3 /* Kumeran Diagnostic */ +#define E1000_KMRNCTRLSTA_TIMEOUTS 0x4 /* Kumeran Timeouts */ +#define E1000_KMRNCTRLSTA_INBAND_PARAM 0x9 /* Kumeran InBand Parameters */ #define E1000_KMRNCTRLSTA_DIAG_NELPBK 0x1000 /* Nearend Loopback mode */ #define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10
--- a/usr/src/uts/common/io/igb/igb_regs.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_regs.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms of the CDDL. */ -/* IntelVersion: 1.61 v2008-10-7 */ +/* IntelVersion: 1.70 v2-9-8_2009-6-12 */ #ifndef _IGB_REGS_H #define _IGB_REGS_H @@ -426,6 +426,7 @@ #define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */ #define E1000_SCCTL 0x05B4C /* PCIc PLL Configuration Register */ #define E1000_GCR 0x05B00 /* PCI-Ex Control */ +#define E1000_GCR2 0x05B64 /* PCI-Ex Control #2 */ #define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ #define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ #define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ @@ -434,6 +435,8 @@ #define E1000_FACTPS 0x05B30 #define E1000_SWSM 0x05B50 /* SW Semaphore */ #define E1000_FWSM 0x05B54 /* FW Semaphore */ +/* Driver-only SW semaphore (not used by BOOT agents) */ +#define E1000_SWSM2 0x05B58 #define E1000_DCA_ID 0x05B70 /* DCA Requester ID Information - RO */ #define E1000_DCA_CTRL 0x05B74 /* DCA Control - RW */ #define E1000_FFLT_DBG 0x05F04 /* Debug Register */ @@ -472,7 +475,6 @@ #define E1000_VFTE 0x00C90 /* VF Transmit Enables */ #define E1000_QDE 0x02408 /* Queue Drop Enable - RW */ #define E1000_DTXSWC 0x03500 /* DMA Tx Switch Control - RW */ -#define E1000_VLVF 0x05D00 /* VLAN Virtual Machine Filter - RW */ #define E1000_RPLOLR 0x05AF0 /* Replication Offload - RW */ #define E1000_UTA 0x0A000 /* Unicast Table Array - RW */ #define E1000_IOVTCL 0x05BBC /* IOV Control Register */ @@ -483,12 +485,15 @@ #define E1000_VMBMEM(_n) (0x00800 + (64 * (_n))) #define E1000_VFVMBMEM(_n) (0x00800 + (_n)) #define E1000_VMOLR(_n) (0x05AD0 + (4 * (_n))) +/* VLAN Virtual Machine Filter - RW */ +#define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) /* Filtering Registers */ #define E1000_SAQF(_n) (0x05980 + (4 * (_n))) /* Source Address Queue Fltr */ #define E1000_DAQF(_n) (0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */ #define E1000_SPQF(_n) (0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */ #define E1000_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ +#define E1000_TTQF(_n) (0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */ #define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */ #define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
--- a/usr/src/uts/common/io/igb/igb_sw.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/igb/igb_sw.h Mon Aug 17 09:14:35 2009 -0400 @@ -830,6 +830,10 @@ /* * Function prototypes in e1000_osdep.c */ +void e1000_write_pci_cfg(struct e1000_hw *, uint32_t, uint16_t *); +void e1000_read_pci_cfg(struct e1000_hw *, uint32_t, uint16_t *); +int32_t e1000_read_pcie_cap_reg(struct e1000_hw *, uint32_t, uint16_t *); +int32_t e1000_write_pcie_cap_reg(struct e1000_hw *, uint32_t, uint16_t *); void e1000_rar_clear(struct e1000_hw *hw, uint32_t); void e1000_rar_set_vmdq(struct e1000_hw *hw, const uint8_t *, uint32_t, uint32_t, uint8_t);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_82598.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_82598.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,13 +26,14 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.138 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.144 v2-9-1-1_2009-6-10_NSW1 */ #include "ixgbe_type.h" #include "ixgbe_api.h" #include "ixgbe_common.h" #include "ixgbe_phy.h" +u32 ixgbe_get_pcie_msix_count_82598(struct ixgbe_hw *hw); s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw); static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, ixgbe_link_speed *speed, bool *autoneg); @@ -59,6 +60,7 @@ u8 *eeprom_data); u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw); s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw); +void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw); /* * ixgbe_get_pcie_msix_count_82598 - Gets MSI-X vector count @@ -113,6 +115,7 @@ &ixgbe_get_supported_physical_layer_82598; mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82598; mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82598; + mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie_82598; /* RAR, Multicast, VLAN */ mac->ops.set_vmdq = &ixgbe_set_vmdq_82598; @@ -133,12 +136,6 @@ /* SFP+ Module */ phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_82598; - /* - * Call PHY identify routine to assign the phy type for PHY init - * and media type determination - */ - phy->ops.identify(hw); - /* Link */ mac->ops.check_link = &ixgbe_check_mac_link_82598; mac->ops.setup_link = &ixgbe_setup_mac_link_82598; @@ -184,6 +181,10 @@ phy->ops.get_firmware_version = &ixgbe_get_phy_firmware_version_tnx; break; + case ixgbe_phy_aq: + phy->ops.get_firmware_version = + &ixgbe_get_phy_firmware_version_aq; + break; case ixgbe_phy_nl: phy->ops.reset = &ixgbe_reset_phy_nl; @@ -283,7 +284,8 @@ /* Detect if there is a copper PHY attached. */ if (hw->phy.type == ixgbe_phy_cu_unknown || - hw->phy.type == ixgbe_phy_tn) { + hw->phy.type == ixgbe_phy_tn || + hw->phy.type == ixgbe_phy_aq) { media_type = ixgbe_media_type_copper; goto out; } @@ -411,7 +413,7 @@ } /* Configure pause time (2 TCs per register) */ - reg = IXGBE_READ_REG(hw, IXGBE_FCTTV(packetbuf_num)); + reg = IXGBE_READ_REG(hw, IXGBE_FCTTV(packetbuf_num / 2)); if ((packetbuf_num & 1) == 0) reg = (reg & 0xFFFF0000) | hw->fc.pause_time; else @@ -673,6 +675,7 @@ ixgbe_reset_hw_82598(struct ixgbe_hw *hw) { s32 status = IXGBE_SUCCESS; + s32 phy_status = IXGBE_SUCCESS; u32 ctrl; u32 gheccr; u32 i; @@ -720,13 +723,16 @@ /* PHY ops must be identified and initialized prior to reset */ /* Init PHY and function pointers, perform SFP setup */ - status = hw->phy.ops.init(hw); - /* Do not return error if SFP module is not supported. */ - status = IXGBE_SUCCESS; + phy_status = hw->phy.ops.init(hw); + if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + else if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT) + goto no_phy_reset; hw->phy.ops.reset(hw); } +no_phy_reset: /* * Prevent the PCI-E bus from from hanging by disabling PCI-E master * access and verify no pending requests before reset @@ -776,14 +782,18 @@ IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc); } + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + /* * Store MAC address from RAR0, clear receive address registers, and * clear the multicast table */ hw->mac.ops.init_rx_addrs(hw); - /* Store the permanent mac address */ - hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); +reset_hw_out: + if (phy_status != IXGBE_SUCCESS) + status = phy_status; return (status); } @@ -1107,3 +1117,33 @@ out: return (physical_layer); } + +/* + * ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple + * port devices. + * @hw: pointer to the HW structure + * + * Calls common function and corrects issue with some single port devices + * that enable LAN1 but not LAN0. + */ +void +ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw) +{ + struct ixgbe_bus_info *bus = &hw->bus; + u16 pci_gen, pci_ctrl2; + + ixgbe_set_lan_id_multi_port_pcie(hw); + + /* check if LAN0 is disabled */ + hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen); + if ((pci_gen != 0) && (pci_gen != 0xFFFF)) { + hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2); + + /* if LAN0 is completely disabled force function to 0 */ + if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) && + !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) && + !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) { + bus->func = 0; + } + } +}
--- a/usr/src/uts/common/io/ixgbe/ixgbe_82599.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_82599.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,13 +26,14 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.155 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.176 v2-9-1-1_2009-6-10_NSW1 */ #include "ixgbe_type.h" #include "ixgbe_api.h" #include "ixgbe_common.h" #include "ixgbe_phy.h" +u32 ixgbe_get_pcie_msix_count_82599(struct ixgbe_hw *hw); s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw); s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, ixgbe_link_speed *speed, bool *autoneg); @@ -73,6 +74,7 @@ s32 ixgbe_get_san_mac_addr_82599(struct ixgbe_hw *hw, u8 *san_mac_addr); s32 ixgbe_set_san_mac_addr_82599(struct ixgbe_hw *hw, u8 *san_mac_addr); s32 ixgbe_get_device_caps_82599(struct ixgbe_hw *hw, u16 *device_caps); +static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw); void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) @@ -115,6 +117,8 @@ /* Identify the PHY or SFP module */ ret_val = phy->ops.identify(hw); + if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto init_phy_ops_out; /* Setup function pointers based on detected SFP module and speeds */ ixgbe_init_mac_link_ops_82599(hw); @@ -137,10 +141,15 @@ phy->ops.get_firmware_version = &ixgbe_get_phy_firmware_version_tnx; break; + case ixgbe_phy_aq: + phy->ops.get_firmware_version = + &ixgbe_get_phy_firmware_version_aq; + break; default: break; } +init_phy_ops_out: return (ret_val); } @@ -163,16 +172,27 @@ if (ret_val != IXGBE_SUCCESS) goto setup_sfp_out; + /* PHY config will finish before releasing the semaphore */ + ret_val = ixgbe_acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); + if (ret_val != IXGBE_SUCCESS) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto setup_sfp_out; + } + hw->eeprom.ops.read(hw, ++data_offset, &data_value); while (data_value != 0xffff) { IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value); IXGBE_WRITE_FLUSH(hw); hw->eeprom.ops.read(hw, ++data_offset, &data_value); } - /* Now restart DSP */ - IXGBE_WRITE_REG(hw, IXGBE_CORECTL, 0x00000102); - IXGBE_WRITE_REG(hw, IXGBE_CORECTL, 0x00000b1d); - IXGBE_WRITE_FLUSH(hw); + /* Now restart DSP by setting Restart_AN */ + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, + (IXGBE_READ_REG(hw, IXGBE_AUTOC) | IXGBE_AUTOC_AN_RESTART)); + + /* Release the semaphore */ + ixgbe_release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); + /* Delay obtaining semaphore again to allow FW access */ + msec_delay(hw->eeprom.semaphore_delay); } setup_sfp_out: @@ -368,7 +388,8 @@ /* Detect if there is a copper PHY attached. */ if (hw->phy.type == ixgbe_phy_cu_unknown || - hw->phy.type == ixgbe_phy_tn) { + hw->phy.type == ixgbe_phy_tn || + hw->phy.type == ixgbe_phy_aq) { media_type = ixgbe_media_type_copper; goto out; } @@ -376,6 +397,7 @@ switch (hw->device_id) { case IXGBE_DEV_ID_82599_KX4: case IXGBE_DEV_ID_82599_KX4_SIK: + case IXGBE_DEV_ID_82599_XAUI_LOM: /* Default device ID is mezzanine card KX/KX4 */ media_type = ixgbe_media_type_backplane; break; @@ -481,6 +503,7 @@ ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN; u32 speedcnt = 0; u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + u32 i = 0; bool link_up = false; bool negotiation; @@ -491,6 +514,25 @@ speed &= link_speed; + /* Set autoneg_advertised value based on input link speed */ + hw->phy.autoneg_advertised = 0; + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + /* + * When the driver changes the link speeds that it can support, + * it sets autotry_restart to true to indicate that we need to + * initiate a new autotry session with the link partner. To do + * so, we set the speed then disable and re-enable the tx laser, to + * alert the link partner that it also needs to restart autotry on its + * end. This is consistent with true clause 37 autoneg, which also + * involves a loss of signal. + */ + /* * Try each speed one by one, highest priority first. We do this in * software because 10gb fiber doesn't support speed autonegotiation. @@ -507,7 +549,7 @@ if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up) goto out; - /* Set hardware SDP's */ + /* Set the module link speed */ esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5); IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); @@ -520,15 +562,35 @@ if (status != IXGBE_SUCCESS) goto out; - msec_delay(100); + /* Flap the tx laser if it has not already been done */ + if (hw->mac.autotry_restart) { + /* Disable tx laser; allow 100us to go dark per spec */ + esdp_reg |= IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + usec_delay(100); + + /* Enable tx laser; allow 2ms to light up per spec */ + esdp_reg &= ~IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + msec_delay(2); + + hw->mac.autotry_restart = false; + } - /* If we have link, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != IXGBE_SUCCESS) - goto out; + /* The controller may take up to 500ms at 10g to acquire link */ + for (i = 0; i < 5; i++) { + /* Wait for the link partner to also set speed */ + msec_delay(100); - if (link_up) - goto out; + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, + &link_up, false); + if (status != IXGBE_SUCCESS) + goto out; + + if (link_up) + goto out; + } } if (speed & IXGBE_LINK_SPEED_1GB_FULL) { @@ -544,7 +606,7 @@ if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up) goto out; - /* Set hardware SDP's */ + /* Set the module link speed */ esdp_reg &= ~IXGBE_ESDP_SDP5; esdp_reg |= IXGBE_ESDP_SDP5_DIR; IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); @@ -558,6 +620,22 @@ if (status != IXGBE_SUCCESS) goto out; + /* Flap the tx laser if it has not already been done */ + if (hw->mac.autotry_restart) { + /* Disable tx laser; allow 100us to go dark per spec */ + esdp_reg |= IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + usec_delay(100); + + /* Enable tx laser; allow 2ms to light up per spec */ + esdp_reg &= ~IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + msec_delay(2); + + hw->mac.autotry_restart = false; + } + + /* Wait for the link partner to also set speed */ msec_delay(100); /* If we have link, just jump out */ @@ -651,6 +729,8 @@ s32 status = IXGBE_SUCCESS; u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + u32 start_autoc = autoc; + u32 orig_autoc = 0; u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK; u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK; @@ -667,15 +747,26 @@ if (speed == IXGBE_LINK_SPEED_UNKNOWN) { status = IXGBE_ERR_LINK_SETUP; - } else if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR || + goto out; + } + + /* + * Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support + */ + if (hw->mac.orig_link_settings_stored) + orig_autoc = hw->mac.orig_autoc; + else + orig_autoc = autoc; + + if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR || link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { /* Set KX4/KX/KR support according to speed requested */ autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP); if (speed & IXGBE_LINK_SPEED_10GB_FULL) - if (autoc & IXGBE_AUTOC_KX4_SUPP) + if (orig_autoc & IXGBE_AUTOC_KX4_SUPP) autoc |= IXGBE_AUTOC_KX4_SUPP; - if (autoc & IXGBE_AUTOC_KR_SUPP) + if (orig_autoc & IXGBE_AUTOC_KR_SUPP) autoc |= IXGBE_AUTOC_KR_SUPP; if (speed & IXGBE_LINK_SPEED_1GB_FULL) autoc |= IXGBE_AUTOC_KX_SUPP; @@ -701,7 +792,7 @@ } } - if (status == IXGBE_SUCCESS) { + if (autoc != start_autoc) { /* Restart link */ autoc |= IXGBE_AUTOC_AN_RESTART; IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc); @@ -803,12 +894,18 @@ /* Identify PHY and related function pointers */ status = hw->phy.ops.init(hw); + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + /* Setup SFP module if there is one present. */ if (hw->phy.sfp_setup_needed) { status = hw->mac.ops.setup_sfp(hw); hw->phy.sfp_setup_needed = false; } + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + /* Reset PHY */ if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL) hw->phy.ops.reset(hw); @@ -877,6 +974,9 @@ } } + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + /* * Store MAC address from RAR0, clear receive address registers, and * clear the multicast table. Also reset num_rar_entries to 128, @@ -885,8 +985,8 @@ hw->mac.num_rar_entries = 128; hw->mac.ops.init_rx_addrs(hw); - /* Store the permanent mac address */ - hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + /* Store the permanent SAN mac address */ + hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); /* Add the SAN MAC address to the RAR only if it's a valid address */ if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { @@ -897,6 +997,7 @@ hw->mac.num_rar_entries--; } +reset_hw_out: return (status); } @@ -1011,6 +1112,7 @@ } } else { DEBUGOUT1("RAR index %d is out of range.\n", rar); + return (IXGBE_ERR_PARAM); } done: return (IXGBE_SUCCESS); @@ -1040,6 +1142,7 @@ } } else { DEBUGOUT1("RAR index %d is out of range.\n", rar); + return (IXGBE_ERR_PARAM); } return (IXGBE_SUCCESS); @@ -1115,6 +1218,7 @@ regindex = first_empty_slot; else { DEBUGOUT("No space in VLVF.\n"); + goto out; } } @@ -1162,6 +1266,7 @@ IXGBE_WRITE_REG(hw, IXGBE_VLVF(regindex), 0); } +out: return (IXGBE_SUCCESS); } @@ -1211,16 +1316,75 @@ s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw) { + int i; u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL); fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE; + + /* + * Before starting reinitialization process, + * FDIRCMD.CMD must be zero. + */ + for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) { + if (!(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & + IXGBE_FDIRCMD_CMD_MASK)) + break; + usec_delay(10); + } + if (i >= IXGBE_FDIRCMD_CMD_POLL) { + DEBUGOUT("Flow Director previous command isn't complete, " + "aborting table re-initialization. \n"); + return (IXGBE_ERR_FDIR_REINIT_FAILED); + } + IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0); IXGBE_WRITE_FLUSH(hw); + /* + * 82599 adapters flow director init flow cannot be restarted, + * Workaround 82599 silicon errata by performing the following steps + * before re-writing the FDIRCTRL control register with the same value. + * - write 1 to bit 8 of FDIRCMD register & + * - write 0 to bit 8 of FDIRCMD register + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) | + IXGBE_FDIRCMD_CLEARHT)); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & + ~IXGBE_FDIRCMD_CLEARHT)); + IXGBE_WRITE_FLUSH(hw); + /* + * Clear FDIR Hash register to clear any leftover hashes + * waiting to be programmed. + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll init-done after we write FDIRCTRL register */ + for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) { + if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) & + IXGBE_FDIRCTRL_INIT_DONE) + break; + usec_delay(10); + } + if (i >= IXGBE_FDIR_INIT_DONE_POLL) { + DEBUGOUT("Flow Director Signature poll time exceeded!\n"); + return (IXGBE_ERR_FDIR_REINIT_FAILED); + } + + /* Clear FDIR statistics registers (read to clear) */ + (void) IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT); + (void) IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT); + (void) IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); + (void) IXGBE_READ_REG(hw, IXGBE_FDIRMISS); + (void) IXGBE_READ_REG(hw, IXGBE_FDIRLEN); return (IXGBE_SUCCESS); } -#define IXGBE_FDIR_INIT_DONE_POLL 10 /* * ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters * @hw: pointer to hardware structure @@ -1446,26 +1610,33 @@ * hash[15:0] = 0; * for (i = 0; i < 351; i++) { * if (int_key[i]) - * hash ^= int_stream[(i + 15):i]; + * hash ^= int_stream[(i + 15):i]; * } */ union { - u32 key[11]; - u8 key_stream[44]; + u64 fill[6]; + u32 key[11]; + u8 key_stream[44]; } tmp_key; u8 *stream = (u8 *)atr_input; u8 int_key[44]; /* upper-most bit unused */ u8 hash_str[46]; /* upper-most 2 bits unused */ u16 hash_result = 0; - u16 tmp = 0; int i, j, k, h; - (void) memset(&tmp_key, 0, sizeof (tmp_key)); + /* + * Initialize the fill member to prevent warnings + * on some compilers + */ + tmp_key.fill[0] = 0; + /* First load the temporary key stream */ - for (i = 0; i < 11; i++) - tmp_key.key[i] = key; + for (i = 0; i < 6; i++) { + u64 fillkey = ((u64)key << 32) | key; + tmp_key.fill[i] = fillkey; + } /* * Set the interim key for the hashing. Bit 352 is unused, so we must @@ -1473,9 +1644,9 @@ */ int_key[0] = tmp_key.key_stream[0] >> 1; for (i = 1, j = 0; i < 44; i++) { - int_key[i] = (tmp_key.key_stream[j] & 0x1) << 7; + unsigned int this_key = tmp_key.key_stream[j] << 7; j++; - int_key[i] |= tmp_key.key_stream[j] >> 1; + int_key[i] = (u8)(this_key | (tmp_key.key_stream[j] >> 1)); } /* @@ -1484,11 +1655,11 @@ */ hash_str[0] = (stream[40] & 0x7f) >> 1; for (i = 1, j = 40; i < 46; i++) { - hash_str[i] = (stream[j] & 0x1) << 7; + unsigned int this_str = stream[j] << 7; j++; if (j > 41) j = 0; - hash_str[i] |= stream[j] >> 1; + hash_str[i] = (u8)(this_str | (stream[j] >> 1)); } /* @@ -1498,7 +1669,7 @@ */ for (i = 45, j = 43, k = 0; k < 351 && i >= 2 && j >= 0; i--, j--) { for (h = 0; h < 8 && k < 351; h++, k++) { - if ((int_key[j] >> h) & 0x1) { + if (int_key[j] & (1 << h)) { /* * Key bit is set, XOR in the current 16-bit * string. Example of processing: @@ -1513,11 +1684,11 @@ * (hash_str[i - 1] & 0xff << 1) | * (hash_str[i] & 0x80 >> 7) */ - tmp = ((hash_str[i] & (0xff << h)) >> h); - tmp |= ((hash_str[i - 1] & 0xff) << (8 - h)); - tmp |= (hash_str[i - 2] & (0xff >> (8 - h))) + int tmp = (hash_str[i] >> h); + tmp |= (hash_str[i - 1] << (8 - h)); + tmp |= (int)(hash_str[i - 2] & ((1 << h) - 1)) << (16 - h); - hash_result ^= tmp; + hash_result ^= (u16)tmp; } } } @@ -2152,6 +2323,12 @@ } IXGBE_WRITE_FLUSH(hw); + /* We need to run link autotry after the driver loads */ + hw->mac.autotry_restart = true; + + if (ret_val == IXGBE_SUCCESS) + ret_val = ixgbe_verify_fw_version_82599(hw); + return (ret_val); } @@ -2206,6 +2383,7 @@ hw->phy.ops.identify(hw); if (hw->phy.type == ixgbe_phy_tn || + hw->phy.type == ixgbe_phy_tn || hw->phy.type == ixgbe_phy_cu_unknown) { hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); @@ -2225,14 +2403,17 @@ physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX | IXGBE_PHYSICAL_LAYER_1000BASE_BX; goto out; - } else + } else { /* SFI mode so read SFP module */ goto sfp_check; + } case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_CX4) physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4; else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_KX4) physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_XAUI) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_XAUI; goto out; case IXGBE_AUTOC_LMS_10G_SERIAL: if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_KR) { @@ -2458,3 +2639,50 @@ san_mac_addr_out: return (status); } + +/* + * ixgbe_verify_fw_version_82599 - verify fw version for 82599 + * @hw: pointer to hardware structure + * + * Verifies that installed the firmware version is 0.6 or higher + * for SFI devices. All 82599 SFI devices should have version 0.6 or higher. + * + * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or + * if the FW version is not supported. + */ +static s32 +ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_EEPROM_VERSION; + u16 fw_offset, fw_ptp_cfg_offset; + u16 fw_version = 0; + + /* firmware check is only necessary for SFI devices */ + if (hw->phy.media_type != ixgbe_media_type_fiber) { + status = IXGBE_SUCCESS; + goto fw_version_out; + } + + /* get the offset to the Firmware Module block */ + hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset); + + if ((fw_offset == 0) || (fw_offset == 0xFFFF)) + goto fw_version_out; + + /* get the offset to the Pass Through Patch Configuration block */ + hw->eeprom.ops.read(hw, (fw_offset + + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR), &fw_ptp_cfg_offset); + + if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF)) + goto fw_version_out; + + /* get the firmware version */ + hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset + IXGBE_FW_PATCH_VERSION_4), + &fw_version); + + if (fw_version > 0x5) + status = IXGBE_SUCCESS; + +fw_version_out: + return (status); +}
--- a/usr/src/uts/common/io/ixgbe/ixgbe_api.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_api.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.117 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.120 v2-9-1-1_2009-6-10_NSW1 */ #include "ixgbe_api.h" #include "ixgbe_common.h" @@ -102,6 +102,7 @@ break; case IXGBE_DEV_ID_82599_KX4: case IXGBE_DEV_ID_82599_KX4_SIK: + case IXGBE_DEV_ID_82599_XAUI_LOM: case IXGBE_DEV_ID_82599_SFP: case IXGBE_DEV_ID_82599_SPW: case IXGBE_DEV_ID_82599_CX4: @@ -986,3 +987,33 @@ return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma, (hw, regval), IXGBE_NOT_IMPLEMENTED); } + +/* + * ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to acquire + * + * Acquires the SWFW semaphore through SW_FW_SYNC register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + */ +s32 +ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask) +{ + return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync, + (hw, mask), IXGBE_NOT_IMPLEMENTED); +} + +/* + * ixgbe_release_swfw_semaphore - Release SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to release + * + * Releases the SWFW semaphore through SW_FW_SYNC register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + */ +void +ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask) +{ + if (hw->mac.ops.release_swfw_sync) + hw->mac.ops.release_swfw_sync(hw, mask); +}
--- a/usr/src/uts/common/io/ixgbe/ixgbe_api.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_api.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.73 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.74 v2-9-1-1_2009-6-10_NSW1 */ #ifndef _IXGBE_API_H #define _IXGBE_API_H @@ -147,5 +147,7 @@ s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr); s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr); s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps); +s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask); +void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask); #endif /* _IXGBE_API_H */
--- a/usr/src/uts/common/io/ixgbe/ixgbe_common.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_common.c Mon Aug 17 09:14:35 2009 -0400 @@ -25,7 +25,7 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.199 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.206 v2-9-1-1_2009-6-10_NSW1 */ #include "ixgbe_common.h" #include "ixgbe_api.h" @@ -83,6 +83,8 @@ mac->ops.stop_adapter = &ixgbe_stop_adapter_generic; mac->ops.get_bus_info = &ixgbe_get_bus_info_generic; mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie; + mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync; + mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync; /* LEDs */ mac->ops.led_on = &ixgbe_led_on_generic; @@ -136,9 +138,6 @@ /* PHY ops initialization must be done in reset_hw() */ - /* Identify the PHY */ - hw->phy.ops.identify(hw); - /* Clear the VLAN filter table */ hw->mac.ops.clear_vfta(hw); @@ -624,8 +623,6 @@ ixgbe_shift_out_eeprom_bits(hw, data, 16); ixgbe_standby_eeprom(hw); - msec_delay(hw->eeprom.semaphore_delay); - /* Done with writing - release the EEPROM */ ixgbe_release_eeprom(hw); } @@ -820,13 +817,10 @@ ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) { s32 status = IXGBE_ERR_EEPROM; - u32 timeout; + u32 timeout = 2000; u32 i; u32 swsm; - /* Set timeout value based on size of EEPROM */ - timeout = hw->eeprom.word_size + 1; - /* Get SMBI software semaphore between device drivers first */ for (i = 0; i < timeout; i++) { /* @@ -838,7 +832,7 @@ status = IXGBE_SUCCESS; break; } - msec_delay(1); + usec_delay(50); } /* Now get the semaphore between SW/FW through the SWESMBI bit */ @@ -1114,6 +1108,9 @@ IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); ixgbe_release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + + /* Delay before attempt to obtain semaphore again to allow FW access */ + msec_delay(hw->eeprom.semaphore_delay); } /* @@ -1685,6 +1682,7 @@ s32 ret_val = IXGBE_SUCCESS; u32 mflcn_reg, fccfg_reg; u32 reg; + u32 rx_pba_size; DEBUGFUNC("ixgbe_fc_enable_generic"); @@ -1751,22 +1749,42 @@ IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg); IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg); - /* Set up and enable Rx high/low water mark thresholds, enable XON. */ - if (hw->fc.current_mode & ixgbe_fc_tx_pause) { - if (hw->fc.send_xon) { - IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(packetbuf_num), - (hw->fc.low_water | IXGBE_FCRTL_XONE)); - } else { - IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(packetbuf_num), - hw->fc.low_water); + reg = IXGBE_READ_REG(hw, IXGBE_MTQC); + /* Thresholds are different for link flow control when in DCB mode */ + if (reg & IXGBE_MTQC_RT_ENA) { + rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(packetbuf_num)); + + /* Always disable XON for LFC when in DCB mode */ + reg = (rx_pba_size >> 5) & 0xFFE0; + IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(packetbuf_num), reg); + + reg = (rx_pba_size >> 2) & 0xFFE0; + if (hw->fc.current_mode & ixgbe_fc_tx_pause) + reg |= IXGBE_FCRTH_FCEN; + IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(packetbuf_num), reg); + } else { + /* + * Set up and enable Rx high/low water mark thresholds, + * enable XON. + */ + if (hw->fc.current_mode & ixgbe_fc_tx_pause) { + if (hw->fc.send_xon) { + IXGBE_WRITE_REG(hw, + IXGBE_FCRTL_82599(packetbuf_num), + (hw->fc.low_water | IXGBE_FCRTL_XONE)); + } else { + IXGBE_WRITE_REG(hw, + IXGBE_FCRTL_82599(packetbuf_num), + hw->fc.low_water); + } + + IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(packetbuf_num), + (hw->fc.high_water | IXGBE_FCRTH_FCEN)); } - - IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(packetbuf_num), - (hw->fc.high_water | IXGBE_FCRTH_FCEN)); } /* Configure pause time (2 TCs per register) */ - reg = IXGBE_READ_REG(hw, IXGBE_FCTTV(packetbuf_num)); + reg = IXGBE_READ_REG(hw, IXGBE_FCTTV(packetbuf_num / 2)); if ((packetbuf_num & 1) == 0) reg = (reg & 0xFFFF0000) | hw->fc.pause_time; else @@ -2134,6 +2152,7 @@ hw->mac.ops.check_link(hw, &speed, &link_up, false); if (!link_up) { + autoc_reg |= IXGBE_AUTOC_AN_RESTART; autoc_reg |= IXGBE_AUTOC_FLU; IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); msec_delay(10);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_common.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_common.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.90 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.90 v2-9-1-1_2009-6-10_NSW1 */ #ifndef _IXGBE_COMMON_H #define _IXGBE_COMMON_H
--- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c Mon Aug 17 09:14:35 2009 -0400 @@ -1212,7 +1212,7 @@ ixgbe_chip_start(ixgbe_t *ixgbe) { struct ixgbe_hw *hw = &ixgbe->hw; - int i; + int ret_val, i; ASSERT(mutex_owned(&ixgbe->gen_lock)); @@ -1237,9 +1237,17 @@ /* * Configure/Initialize hardware */ - if (ixgbe_init_hw(hw) != IXGBE_SUCCESS) { - ixgbe_error(ixgbe, "Failed to initialize hardware"); - return (IXGBE_FAILURE); + ret_val = ixgbe_init_hw(hw); + if (ret_val != IXGBE_SUCCESS) { + if (ret_val == IXGBE_ERR_EEPROM_VERSION) { + ixgbe_error(ixgbe, + "This 82599 device is pre-release and contains" + " outdated firmware, please contact your hardware" + " vendor for a replacement."); + } else { + ixgbe_error(ixgbe, "Failed to initialize hardware"); + return (IXGBE_FAILURE); + } } /* @@ -2476,8 +2484,8 @@ DEFAULT_RX_LIMIT_PER_INTR); /* - * Interrupt throttling is per 256ns in 82598 and 2 usec increments - * in 82599. + * Interrupt throttling is per 256ns in 82598 and 2.048usec + * (256ns * 8) increments in 82599. */ switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -2491,6 +2499,14 @@ PROP_INTR_THROTTLING, MIN_INTR_THROTTLING, MAX_INTR_THROTTLING_82599, DEFAULT_INTR_THROTTLING_82599); + + /* + * 82599 requires the interupt throttling rate is + * a multiple of 8. This is enforced by the register + * definiton. + */ + ixgbe->intr_throttling[0] = ixgbe->intr_throttling[0] & + 0xFF8; break; } } @@ -2740,7 +2756,9 @@ result = B_FALSE; for (i = 0; i < ixgbe->num_tx_rings; i++) { tx_ring = &ixgbe->tx_rings[i]; - tx_ring->tx_recycle(tx_ring); + if (tx_ring->tbd_free <= tx_ring->recycle_thresh) { + tx_ring->tx_recycle(tx_ring); + } if (tx_ring->recycle_fail > 0) tx_ring->stall_watchdog++; @@ -3296,8 +3314,8 @@ if ((ddi_taskq_dispatch(ixgbe->lsc_taskq, ixgbe_sfp_check, (void *)ixgbe, DDI_NOSLEEP)) != DDI_SUCCESS) { - ixgbe_log(ixgbe, - "No memory available to dispatch taskq"); + ixgbe_log(ixgbe, "No memory available to dispatch " + "taskq for SFP check"); } } } @@ -4633,6 +4651,13 @@ ixgbe_enable_ivar(ixgbe, r_idx, 0); BT_SET(ixgbe->vect_map[v_idx].rx_map, r_idx); + + /* + * To trigger a Rx interrupt to on this ring + */ + IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_EICS, (1 << v_idx)); + IXGBE_WRITE_FLUSH(&ixgbe->hw); + mutex_exit(&ixgbe->gen_lock); return (0);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_phy.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_phy.c Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.83 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.94 v2-9-1-1_2009-6-10_NSW1 */ #include "ixgbe_api.h" #include "ixgbe_common.h" @@ -189,6 +189,9 @@ case TN1010_PHY_ID: phy_type = ixgbe_phy_tn; break; + case AQ1002_PHY_ID: + phy_type = ixgbe_phy_aq; + break; case QT2022_PHY_ID: phy_type = ixgbe_phy_qt; break; @@ -450,7 +453,7 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) { - s32 status = IXGBE_NOT_IMPLEMENTED; + s32 status = IXGBE_SUCCESS; u32 time_out; u32 max_time_out = 10; u16 autoneg_reg = IXGBE_MII_AUTONEG_REG; @@ -491,7 +494,6 @@ autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE; if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE) { - status = IXGBE_SUCCESS; break; } } @@ -525,10 +527,14 @@ if (speed & IXGBE_LINK_SPEED_10GB_FULL) { hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; } + if (speed & IXGBE_LINK_SPEED_1GB_FULL) { hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; } + if (speed & IXGBE_LINK_SPEED_100_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; + /* Setup link based on the new speed settings */ hw->phy.ops.setup_link(hw); @@ -560,6 +566,8 @@ *speed |= IXGBE_LINK_SPEED_10GB_FULL; if (speed_ability & IXGBE_MDIO_PHY_SPEED_1G) *speed |= IXGBE_LINK_SPEED_1GB_FULL; + if (speed_ability & IXGBE_MDIO_PHY_SPEED_100M) + *speed |= IXGBE_LINK_SPEED_100_FULL; } return (status); @@ -631,6 +639,22 @@ } /* + * ixgbe_get_phy_firmware_version_aq - Gets the PHY Firmware Version + * @hw: pointer to hardware structure + * @firmware_version: pointer to the PHY Firmware Version + */ +s32 +ixgbe_get_phy_firmware_version_aq(struct ixgbe_hw *hw, u16 *firmware_version) +{ + s32 status = IXGBE_SUCCESS; + + status = hw->phy.ops.read_reg(hw, AQ_FW_REV, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, firmware_version); + + return (status); +} + +/* * ixgbe_reset_phy_nl - Performs a PHY reset * @hw: pointer to hardware structure */ @@ -743,9 +767,15 @@ u8 comp_codes_1g = 0; u8 comp_codes_10g = 0; u8 oui_bytes[3] = {0, 0, 0}; - u8 transmission_media = 0; + u8 cable_tech = 0; u16 enforce_sfp = 0; + if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) { + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + status = IXGBE_ERR_SFP_NOT_PRESENT; + goto out; + } + status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_IDENTIFIER, &identifier); @@ -762,13 +792,16 @@ /* LAN ID is needed for sfp_type determination */ hw->mac.ops.set_lan_id(hw); - if (identifier == IXGBE_SFF_IDENTIFIER_SFP) { + if (identifier != IXGBE_SFF_IDENTIFIER_SFP) { + hw->phy.type = ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + } else { hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_1GBE_COMP_CODES, &comp_codes_1g); hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_10GBE_COMP_CODES, &comp_codes_10g); - hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_TRANSMISSION_MEDIA, - &transmission_media); + hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_CABLE_TECHNOLOGY, + &cable_tech); /* * ID Module @@ -782,7 +815,7 @@ * 6 SFP_SR/LR_CORE1 - 82599-specific */ if (hw->mac.type == ixgbe_mac_82598EB) { - if (transmission_media & IXGBE_SFF_TWIN_AX_CAPABLE) + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) hw->phy.sfp_type = ixgbe_sfp_type_da_cu; else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE) hw->phy.sfp_type = ixgbe_sfp_type_sr; @@ -791,7 +824,7 @@ else hw->phy.sfp_type = ixgbe_sfp_type_unknown; } else if (hw->mac.type == ixgbe_mac_82599EB) { - if (transmission_media & IXGBE_SFF_TWIN_AX_CAPABLE) + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) if (hw->bus.lan_id == 0) hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core0; @@ -820,6 +853,7 @@ hw->phy.sfp_setup_needed = true; /* Determine if the SFP+ PHY is dual speed or not. */ + hw->phy.multispeed_fiber = false; if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) && (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) || ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) && @@ -844,8 +878,7 @@ switch (vendor_oui) { case IXGBE_SFF_VENDOR_OUI_TYCO: - if (transmission_media & - IXGBE_SFF_TWIN_AX_CAPABLE) + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) hw->phy.type = ixgbe_phy_tw_tyco; break; case IXGBE_SFF_VENDOR_OUI_FTL: @@ -858,8 +891,7 @@ hw->phy.type = ixgbe_phy_sfp_intel; break; default: - if (transmission_media & - IXGBE_SFF_TWIN_AX_CAPABLE) + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) hw->phy.type = ixgbe_phy_tw_unknown; else hw->phy.type = ixgbe_phy_sfp_unknown; @@ -867,11 +899,21 @@ } } - if (hw->mac.type == ixgbe_mac_82598EB || - (hw->phy.sfp_type != ixgbe_sfp_type_sr && - hw->phy.sfp_type != ixgbe_sfp_type_lr && - hw->phy.sfp_type != ixgbe_sfp_type_srlr_core0 && - hw->phy.sfp_type != ixgbe_sfp_type_srlr_core1)) { + /* All passive DA cables are supported */ + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) { + status = IXGBE_SUCCESS; + goto out; + } + + /* 1G SFP modules are not supported */ + if (comp_codes_10g == 0) { + hw->phy.type = ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + goto out; + } + + /* Anything else 82598-based is supported */ + if (hw->mac.type == ixgbe_mac_82598EB) { status = IXGBE_SUCCESS; goto out; } @@ -1009,7 +1051,7 @@ u8 dev_addr, u8 *data) { s32 status = IXGBE_SUCCESS; - u32 max_retry = 1; + u32 max_retry = 10; u32 retry = 0; u16 swfw_mask = 0; bool nack = 1; @@ -1021,12 +1063,12 @@ else swfw_mask = IXGBE_GSSR_PHY0_SM; - if (ixgbe_acquire_swfw_sync(hw, swfw_mask) != IXGBE_SUCCESS) { - status = IXGBE_ERR_SWFW_SYNC; - goto read_byte_out; - } + do { + if (ixgbe_acquire_swfw_sync(hw, swfw_mask) != IXGBE_SUCCESS) { + status = IXGBE_ERR_SWFW_SYNC; + goto read_byte_out; + } - do { ixgbe_i2c_start(hw); /* Device Address and write indication */ @@ -1069,6 +1111,8 @@ break; fail: + ixgbe_release_swfw_sync(hw, swfw_mask); + msec_delay(100); ixgbe_i2c_bus_clear(hw); retry++; if (retry < max_retry)
--- a/usr/src/uts/common/io/ixgbe/ixgbe_phy.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_phy.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.32 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.34 v2-9-1-1_2009-6-10_NSW1 */ #ifndef _IXGBE_PHY_H #define _IXGBE_PHY_H @@ -43,10 +43,10 @@ #define IXGBE_SFF_VENDOR_OUI_BYTE2 0x27 #define IXGBE_SFF_1GBE_COMP_CODES 0x6 #define IXGBE_SFF_10GBE_COMP_CODES 0x3 -#define IXGBE_SFF_TRANSMISSION_MEDIA 0x9 +#define IXGBE_SFF_CABLE_TECHNOLOGY 0x8 /* Bitmasks */ -#define IXGBE_SFF_TWIN_AX_CAPABLE 0x80 +#define IXGBE_SFF_DA_PASSIVE_CABLE 0x4 #define IXGBE_SFF_1GBASESX_CAPABLE 0x1 #define IXGBE_SFF_1GBASELX_CAPABLE 0x2 #define IXGBE_SFF_10GBASESR_CAPABLE 0x10 @@ -102,6 +102,8 @@ ixgbe_link_speed *speed, bool *link_up); s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw, u16 *firmware_version); +s32 ixgbe_get_phy_firmware_version_aq(struct ixgbe_hw *hw, + u16 *firmware_version); s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw); s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_sw.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_sw.h Mon Aug 17 09:14:35 2009 -0400 @@ -112,8 +112,8 @@ #define MAX_MTU 16366 #define MAX_RX_LIMIT_PER_INTR 4096 -#define MAX_INTR_THROTTLING_82598 65535 -#define MAX_INTR_THROTTLING_82599 0x7FC +#define MAX_INTR_THROTTLING_82598 0xFFFF +#define MAX_INTR_THROTTLING_82599 0xFF8 #define MAX_RX_COPY_THRESHOLD 9216 #define MAX_TX_COPY_THRESHOLD 9216 @@ -147,7 +147,7 @@ #define DEFAULT_MTU ETHERMTU #define DEFAULT_RX_LIMIT_PER_INTR 256 #define DEFAULT_INTR_THROTTLING_82598 200 /* In unit of 256 nsec */ -#define DEFAULT_INTR_THROTTLING_82599 26 /* In unit of 2 usec */ +#define DEFAULT_INTR_THROTTLING_82599 200 /* In unit of 256 nsec */ #define DEFAULT_RX_COPY_THRESHOLD 128 #define DEFAULT_TX_COPY_THRESHOLD 512 #define DEFAULT_TX_RECYCLE_THRESHOLD (MAX_COOKIE + 1)
--- a/usr/src/uts/common/io/ixgbe/ixgbe_tx.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_tx.c Mon Aug 17 09:14:35 2009 -0400 @@ -87,7 +87,7 @@ size_t mbsize; int desc_num; boolean_t copy_done, eop; - mblk_t *current_mp, *next_mp, *nmp; + mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; tx_control_block_t *tcb; ixgbe_tx_context_t tx_context, *ctx; link_list_t pending_list; @@ -400,34 +400,54 @@ /* * pull up the mblk and send it out with bind way */ - if ((nmp = msgpullup(mp, -1)) == NULL) { - freemsg(mp); - return (NULL); - } else { - freemsg(mp); - mp = nmp; + if ((pull_mp = msgpullup(mp, -1)) == NULL) { + tx_ring->reschedule = B_TRUE; + return (mp); } LINK_LIST_INIT(&pending_list); + desc_total = 0; + + /* + * if the packet is a LSO packet, we simply + * transmit the header in one descriptor using the copy way + */ + if ((ctx != NULL) && ctx->lso_flag) { + hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + + ctx->l4_hdr_len; + + tcb = ixgbe_get_free_list(tx_ring); + if (tcb == NULL) { + IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); + goto tx_failure; + } + desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, + hdr_len, B_TRUE); + LIST_PUSH_TAIL(&pending_list, &tcb->link); + desc_total += desc_num; + + pull_mp->b_rptr += hdr_len; + } + tcb = ixgbe_get_free_list(tx_ring); if (tcb == NULL) { IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); - freemsg(mp); - return (NULL); + goto tx_failure; + } + if ((ctx != NULL) && ctx->lso_flag) { + desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, + mbsize - hdr_len); + } else { + desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, + mbsize); + } + if (desc_num < 0) { + goto tx_failure; } LIST_PUSH_TAIL(&pending_list, &tcb->link); - desc_num = ixgbe_tx_bind(tx_ring, tcb, mp, mbsize); - if ((desc_num < 0) || - ((desc_num + 1) > IXGBE_TX_DESC_LIMIT)) { - ixgbe_free_tcb(tcb); - ixgbe_put_free_list(tx_ring, &pending_list); - freemsg(mp); - return (NULL); - } - - desc_total = desc_num; - tcb->mp = mp; + desc_total += desc_num; + tcb->mp = pull_mp; } /* @@ -462,10 +482,25 @@ mutex_exit(&tx_ring->tx_lock); + /* + * now that the transmission succeeds, need to free the original + * mp if we used the pulling up mblk for transmission. + */ + if (pull_mp) { + freemsg(mp); + } + return (NULL); tx_failure: /* + * If transmission fails, need to free the pulling up mblk. + */ + if (pull_mp) { + freemsg(pull_mp); + } + + /* * Discard the mblk and free the used resources */ tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list);
--- a/usr/src/uts/common/io/ixgbe/ixgbe_type.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/ixgbe/ixgbe_type.h Mon Aug 17 09:14:35 2009 -0400 @@ -26,7 +26,7 @@ * Use is subject to license terms. */ -/* IntelVersion: 1.316 v2-7-8_2009-4-7 */ +/* IntelVersion: 1.338 v2-9-1-1_2009-6-10_NSW1 */ #ifndef _IXGBE_TYPE_H #define _IXGBE_TYPE_H @@ -53,6 +53,7 @@ #define IXGBE_DEV_ID_82599_CX4 0x10F9 #define IXGBE_DEV_ID_82599_SFP 0x10FB #define IXGBE_DEV_ID_82599_SPW 0x1507 +#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC /* General Registers */ #define IXGBE_CTRL 0x00000 @@ -113,7 +114,7 @@ #define IXGBE_MAX_EITR 0x00000FF8 #define IXGBE_MIN_EITR 8 #define IXGBE_EITR(_i) (((_i) <= 23) ? \ - (0x00820 + ((_i) * 4)) : (0x012300 + ((_i) * 4))) + (0x00820 + ((_i) * 4)) : (0x012300 + (((_i) - 24) * 4))) #define IXGBE_EITR_ITR_INT_MASK 0x00000FF8 #define IXGBE_EITR_LLI_MOD 0x00008000 #define IXGBE_EITR_CNT_WDIS 0x80000000 @@ -232,10 +233,10 @@ #define IXGBE_L34T_IMIR(_i) (0x0E800 + ((_i) * 4)) /* 128 of these (0-127) */ #define IXGBE_LLITHRESH 0x0EC90 -#define IXGBE_VMD_CTL 0x0581C #define IXGBE_IMIR(_i) (0x05A80 + ((_i) * 4)) /* 8 of these (0-7) */ #define IXGBE_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* 8 of these (0-7) */ #define IXGBE_IMIRVP 0x05AC0 +#define IXGBE_VMD_CTL 0x0581C #define IXGBE_RETA(_i) (0x05C00 + ((_i) * 4)) /* 32 of these (0-31) */ #define IXGBE_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* 10 of these (0-9) */ @@ -279,6 +280,7 @@ #define IXGBE_DTXCTL 0x07E00 #define IXGBE_DMATXCTL 0x04A80 +#define IXGBE_PFDTXGSWC 0x08220 #define IXGBE_DTXMXSZRQ 0x08100 #define IXGBE_DTXTCPFLGL 0x04A88 #define IXGBE_DTXTCPFLGH 0x04A8C @@ -289,6 +291,8 @@ #define IXGBE_DMATXCTL_NS 0x2 /* No Snoop LSO hdr buffer */ #define IXGBE_DMATXCTL_GDV 0x8 /* Global Double VLAN */ #define IXGBE_DMATXCTL_VT_SHIFT 16 /* VLAN EtherType */ + +#define IXGBE_PFDTXGSWC_VT_LBEN 0x1 /* Local L2 VT switch enable */ #define IXGBE_DCA_TXCTRL(_i) (0x07200 + ((_i) * 4)) /* 16 of these (0-15) */ /* Tx DCA Control register : 128 of these (0-127) */ #define IXGBE_DCA_TXCTRL_82599(_i) (0x0600C + ((_i) * 0x40)) @@ -481,6 +485,7 @@ /* DCB registers */ #define IXGBE_RTRPCS 0x02430 #define IXGBE_RTTDCS 0x04900 +#define IXGBE_RTTDCS_ARBDIS 0x00000040 /* DCB arbiter disable */ #define IXGBE_RTTPCS 0x0CD00 #define IXGBE_RTRUP2TC 0x03020 #define IXGBE_RTTUP2TC 0x0C800 @@ -681,7 +686,7 @@ #define IXGBE_DCA_ID 0x11070 #define IXGBE_DCA_CTRL 0x11074 -/* PCI-E registers 82599 Specific */ +/* PCI-E registers 82599-Specific */ #define IXGBE_GCR_EXT 0x11050 #define IXGBE_GSCL_5_82599 0x11030 #define IXGBE_GSCL_6_82599 0x11034 @@ -980,6 +985,7 @@ #define IXGBE_MDIO_PHY_SPEED_ABILITY 0x4 /* Speed Ability Reg */ #define IXGBE_MDIO_PHY_SPEED_10G 0x0001 /* 10G capable */ #define IXGBE_MDIO_PHY_SPEED_1G 0x0010 /* 1G capable */ +#define IXGBE_MDIO_PHY_SPEED_100M 0x0020 /* 100M capable */ #define IXGBE_MDIO_PHY_EXT_ABILITY 0xB /* Ext Ability Reg */ #define IXGBE_MDIO_PHY_10GBASET_ABILITY 0x0004 /* 10GBaseT capable */ #define IXGBE_MDIO_PHY_1000BASET_ABILITY 0x0020 /* 1000BaseT capable */ @@ -1004,6 +1010,8 @@ /* PHY IDs */ #define TN1010_PHY_ID 0x00A19410 #define TNX_FW_REV 0xB +#define AQ1002_PHY_ID 0x03A1B420 +#define AQ_FW_REV 0x20 #define QT2022_PHY_ID 0x0043A400 #define ATH_PHY_ID 0x03429050 @@ -1093,6 +1101,8 @@ /* VFRE bitmask */ #define IXGBE_VFRE_ENABLE_ALL 0xFFFFFFFF +#define IXGBE_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ + /* RDHMPN and TDHMPN bitmasks */ #define IXGBE_RDHMPN_RDICADDR 0x007FF800 #define IXGBE_RDHMPN_RDICRDREQ 0x00800000 @@ -1318,12 +1328,10 @@ * * Current filters: * EAPOL 802.1x (0x888e): Filter 0 - * BCN (0x8904): Filter 1 * FCoE (0x8906): Filter 2 * 1588 (0x88f7): Filter 3 */ #define IXGBE_ETQF_FILTER_EAPOL 0 -#define IXGBE_ETQF_FILTER_BCN 1 #define IXGBE_ETQF_FILTER_FCOE 2 #define IXGBE_ETQF_FILTER_1588 3 @@ -1349,8 +1357,10 @@ #define IXGBE_STATUS_LAN_ID_1 0x00000004 /* LAN ID 1 */ /* ESDP Bit Masks */ -#define IXGBE_ESDP_SDP0 0x00000001 -#define IXGBE_ESDP_SDP1 0x00000002 +#define IXGBE_ESDP_SDP0 0x00000001 /* SDP0 Data Value */ +#define IXGBE_ESDP_SDP1 0x00000002 /* SDP1 Data Value */ +#define IXGBE_ESDP_SDP2 0x00000004 /* SDP2 Data Value */ +#define IXGBE_ESDP_SDP3 0x00000008 /* SDP3 Data Value */ #define IXGBE_ESDP_SDP4 0x00000010 /* SDP4 Data Value */ #define IXGBE_ESDP_SDP5 0x00000020 /* SDP5 Data Value */ #define IXGBE_ESDP_SDP6 0x00000040 /* SDP6 Data Value */ @@ -1523,6 +1533,7 @@ #define IXGBE_PBANUM1_PTR 0x16 #define IXGBE_SAN_MAC_ADDR_PTR 0x28 #define IXGBE_DEVICE_CAPS 0x2C +#define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11 #define IXGBE_PCIE_MSIX_82599_CAPS 0x72 #define IXGBE_PCIE_MSIX_82598_CAPS 0x62 @@ -1566,10 +1577,17 @@ #define IXGBE_EERD_ATTEMPTS 100000 #endif +#define IXGBE_PCIE_CTRL2 0x5 /* PCIe Control 2 Offset */ +#define IXGBE_PCIE_CTRL2_DUMMY_ENABLE 0x8 /* Dummy Function Enable */ +#define IXGBE_PCIE_CTRL2_LAN_DISABLE 0x2 /* LAN PCI Disable */ +#define IXGBE_PCIE_CTRL2_DISABLE_SELECT 0x1 /* LAN Disable Select */ + #define IXGBE_SAN_MAC_ADDR_PORT0_OFFSET 0x0 #define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET 0x3 #define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP 0x1 #define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS 0x2 +#define IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR 0x4 +#define IXGBE_FW_PATCH_VERSION_4 0x7 /* PCI Bus Info */ #define IXGBE_PCI_LINK_STATUS 0xB2 @@ -1629,7 +1647,7 @@ #define IXGBE_MAX_FRAME_SZ 0x40040000 #define IXGBE_TDWBAL_HEAD_WB_ENABLE 0x1 /* Tx head write-back enable */ -#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE 0x2 /* Tx seq. # write-back enable */ +#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE 0x2 /* Tx seq# write-back enable */ /* Receive Config masks */ #define IXGBE_RXCTRL_RXEN 0x00000001 /* Enable Receiver */ @@ -1770,6 +1788,7 @@ #define IXGBE_PSRTYPE_UDPHDR 0x00000020 #define IXGBE_PSRTYPE_IPV4HDR 0x00000100 #define IXGBE_PSRTYPE_IPV6HDR 0x00000200 +#define IXGBE_PSRTYPE_L2HDR 0x00001000 /* SRRCTL bit definitions */ #define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ @@ -1857,6 +1876,12 @@ #define IXGBE_RX_DESC_SPECIAL_PRI_SHIFT 0x000D /* Priority in upper 3 of 16 */ #define IXGBE_TX_DESC_SPECIAL_PRI_SHIFT IXGBE_RX_DESC_SPECIAL_PRI_SHIFT +/* SR-IOV specific macros */ +#define IXGBE_MBVFICR_INDEX(vf_number) (vf_number >> 4) +#define IXGBE_MBVFICR(_i) (0x00710 + (_i * 4)) +#define IXGBE_VFLRE(_i) (((_i & 1) ? 0x001C0 : 0x00600)) +#define IXGBE_VFLREC(_i) (0x00700 + (_i * 4)) + #ifndef __le16 /* Little Endian defines */ #define __le16 u16 @@ -1925,6 +1950,7 @@ #define IXGBE_FDIRHASH_BUCKET_VALID_SHIFT 15 #define IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT 16 +#define IXGBE_FDIRCMD_CMD_MASK 0x00000003 #define IXGBE_FDIRCMD_CMD_ADD_FLOW 0x00000001 #define IXGBE_FDIRCMD_CMD_REMOVE_FLOW 0x00000002 #define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT 0x00000003 @@ -1935,6 +1961,7 @@ #define IXGBE_FDIRCMD_L4TYPE_TCP 0x00000040 #define IXGBE_FDIRCMD_L4TYPE_SCTP 0x00000060 #define IXGBE_FDIRCMD_IPV6 0x00000080 +#define IXGBE_FDIRCMD_CLEARHT 0x00000100 #define IXGBE_FDIRCMD_DROP 0x00000200 #define IXGBE_FDIRCMD_INT 0x00000400 #define IXGBE_FDIRCMD_LAST 0x00000800 @@ -1942,6 +1969,8 @@ #define IXGBE_FDIRCMD_QUEUE_EN 0x00008000 #define IXGBE_FDIRCMD_RX_QUEUE_SHIFT 16 #define IXGBE_FDIRCMD_VT_POOL_SHIFT 24 +#define IXGBE_FDIR_INIT_DONE_POLL 10 +#define IXGBE_FDIRCMD_CMD_POLL 10 /* Transmit Descriptor - Legacy */ struct ixgbe_legacy_tx_desc { @@ -2113,6 +2142,7 @@ #define IXGBE_PHYSICAL_LAYER_1000BASE_KX 0x0200 #define IXGBE_PHYSICAL_LAYER_1000BASE_BX 0x0400 #define IXGBE_PHYSICAL_LAYER_10GBASE_KR 0x0800 +#define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 /* Software ATR hash keys */ #define IXGBE_ATR_BUCKET_HASH_KEY 0xE214AD3D @@ -2171,8 +2201,9 @@ ixgbe_phy_unknown = 0, ixgbe_phy_none, ixgbe_phy_tn, + ixgbe_phy_aq, + ixgbe_phy_cu_unknown, ixgbe_phy_qt, - ixgbe_phy_cu_unknown, ixgbe_phy_xaui, ixgbe_phy_nl, ixgbe_phy_tw_tyco, @@ -2400,6 +2431,8 @@ s32 (*write_analog_reg8)(struct ixgbe_hw *, u32, u8); s32 (*setup_sfp)(struct ixgbe_hw *); s32 (*enable_rx_dma)(struct ixgbe_hw *, u32); + s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u16); + void (*release_swfw_sync)(struct ixgbe_hw *, u16); /* Link */ s32 (*setup_link)(struct ixgbe_hw *); @@ -2483,6 +2516,7 @@ bool orig_link_settings_stored; bool autoneg; bool autoneg_succeeded; + bool autotry_restart; }; struct ixgbe_phy_info { @@ -2544,10 +2578,12 @@ #define IXGBE_ERR_SFP_NOT_PRESENT -20 #define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -21 #define IXGBE_ERR_NO_SAN_ADDR_PTR -22 +#define IXGBE_ERR_FDIR_REINIT_FAILED -23 +#define IXGBE_ERR_EEPROM_VERSION -24 #define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF #ifndef UNREFERENCED_PARAMETER -#define UNREFERENCED_PARAMETER(_p) +#define UNREFERENCED_PARAMETER(_p) (_p); #endif #endif /* _IXGBE_TYPE_H */
--- a/usr/src/uts/common/io/mac/mac.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/mac/mac.c Mon Aug 17 09:14:35 2009 -0400 @@ -1426,35 +1426,54 @@ */ int mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, - mac_ring_handle_t *hwrh) + mac_ring_handle_t *hwrh, mac_ring_type_t rtype) { mac_client_impl_t *mcip = (mac_client_impl_t *)mch; - flow_entry_t *flent = mcip->mci_flent; - mac_group_t *grp = flent->fe_rx_ring_group; - mac_ring_t *ring; int cnt = 0; - /* - * The mac client did not reserve any RX group, return directly. - * This is probably because the underlying MAC does not support - * any RX groups. - */ - *hwgh = NULL; - if (grp == NULL) - return (0); - - /* - * This RX group must be reserved by this mac client. - */ - ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && - (mch == (mac_client_handle_t)(MAC_RX_GROUP_ONLY_CLIENT(grp)))); - - for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) { - ASSERT(cnt < MAX_RINGS_PER_GROUP); - hwrh[cnt++] = (mac_ring_handle_t)ring; + switch (rtype) { + case MAC_RING_TYPE_RX: { + flow_entry_t *flent = mcip->mci_flent; + mac_group_t *grp; + mac_ring_t *ring; + + grp = flent->fe_rx_ring_group; + /* + * The mac client did not reserve any RX group, return directly. + * This is probably because the underlying MAC does not support + * any groups. + */ + *hwgh = NULL; + if (grp == NULL) + return (0); + /* + * This group must be reserved by this mac client. + */ + ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && + (mch == (mac_client_handle_t) + (MAC_RX_GROUP_ONLY_CLIENT(grp)))); + for (ring = grp->mrg_rings; + ring != NULL; ring = ring->mr_next, cnt++) { + ASSERT(cnt < MAX_RINGS_PER_GROUP); + hwrh[cnt] = (mac_ring_handle_t)ring; + } + *hwgh = (mac_group_handle_t)grp; + return (cnt); } - *hwgh = (mac_group_handle_t)grp; - return (cnt); + case MAC_RING_TYPE_TX: { + mac_soft_ring_set_t *tx_srs; + mac_srs_tx_t *tx; + + tx_srs = MCIP_TX_SRS(mcip); + tx = &tx_srs->srs_tx; + for (; cnt < tx->st_ring_count; cnt++) + hwrh[cnt] = tx->st_rings[cnt]; + return (cnt); + } + default: + ASSERT(B_FALSE); + return (-1); + } } /* @@ -1524,6 +1543,22 @@ return (info->mri_poll(info->mri_driver, bytes_to_pickup)); } +/* + * Send packets through the selected tx ring. + */ +mblk_t * +mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) +{ + mac_ring_t *ring = (mac_ring_t *)rh; + mac_ring_info_t *info = &ring->mr_info; + + ASSERT(ring->mr_type == MAC_RING_TYPE_TX); + ASSERT(ring->mr_state >= MR_INUSE); + ASSERT(info->mri_tx != NULL); + + return (info->mri_tx(info->mri_driver, mp)); +} + int mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) { @@ -3429,22 +3464,6 @@ } /* - * Send packets through a selected tx ring. - */ -mblk_t * -mac_ring_tx(mac_ring_handle_t rh, mblk_t *mp) -{ - mac_ring_t *ring = (mac_ring_t *)rh; - mac_ring_info_t *info = &ring->mr_info; - - ASSERT(ring->mr_type == MAC_RING_TYPE_TX); - ASSERT(ring->mr_state >= MR_INUSE); - ASSERT(info->mri_tx != NULL); - - return (info->mri_tx(info->mri_driver, mp)); -} - -/* * Find a ring from its index. */ mac_ring_t *
--- a/usr/src/uts/common/io/mac/mac_datapath_setup.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c Mon Aug 17 09:14:35 2009 -0400 @@ -2235,6 +2235,10 @@ tx->st_group); tx->st_group = NULL; } + if (tx->st_ring_count != 0) { + kmem_free(tx->st_rings, + sizeof (mac_ring_handle_t) * tx->st_ring_count); + } if (tx->st_arg2 != NULL) { ASSERT(tx_srs->srs_type & SRST_TX); mac_release_tx_ring(tx->st_arg2); @@ -3203,7 +3207,7 @@ mac_impl_t *mip = mcip->mci_mip; mac_soft_ring_set_t *tx_srs; int i, tx_ring_count = 0, tx_rings_reserved = 0; - mac_ring_handle_t *tx_ring = NULL; + mac_ring_handle_t *tx_rings = NULL; uint32_t soft_ring_type; mac_group_t *grp = NULL; mac_ring_t *ring; @@ -3221,7 +3225,7 @@ } if (tx_ring_count != 0) { - tx_ring = kmem_zalloc(sizeof (mac_ring_handle_t) * + tx_rings = kmem_zalloc(sizeof (mac_ring_handle_t) * tx_ring_count, KM_SLEEP); } @@ -3231,8 +3235,12 @@ * NIC's. */ if (srs_type == SRST_FLOW || - (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0) - goto use_default_ring; + (mcip->mci_state_flags & MCIS_NO_HWRINGS) != 0) { + /* use default ring */ + tx_rings[0] = (void *)mip->mi_default_tx_ring; + tx_rings_reserved++; + goto rings_assigned; + } if (mcip->mci_share != NULL) ring = grp->mrg_rings; @@ -3245,8 +3253,7 @@ * then each Tx ring will have a Tx-side soft ring. All * these soft rings will be hang off Tx SRS. */ - for (i = 0, tx_rings_reserved = 0; - i < tx_ring_count; i++, tx_rings_reserved++) { + for (i = 0; i < tx_ring_count; i++) { if (mcip->mci_share != NULL) { /* * The ring was already chosen and associated @@ -3255,42 +3262,39 @@ * between the share and non-share cases. */ ASSERT(ring != NULL); - tx_ring[i] = (mac_ring_handle_t)ring; + tx_rings[i] = (mac_ring_handle_t)ring; ring = ring->mr_next; } else { - tx_ring[i] = + tx_rings[i] = (mac_ring_handle_t)mac_reserve_tx_ring(mip, NULL); - if (tx_ring[i] == NULL) + if (tx_rings[i] == NULL) { + /* + * We have run out of Tx rings. So + * give the default ring too. + */ + tx_rings[i] = (void *)mip->mi_default_tx_ring; + tx_rings_reserved++; break; + } } + tx_rings_reserved++; } + +rings_assigned: if (mac_tx_serialize || (mip->mi_v12n_level & MAC_VIRT_SERIALIZE)) serialize = B_TRUE; /* * Did we get the requested number of tx rings? - * There are 3 actions we can take depending upon the number + * There are 2 actions we can take depending upon the number * of tx_rings we got. - * 1) If we got none, then hook up the tx_srs with the - * default ring. - * 2) If we got one, then get the tx_ring from the soft ring, + * 1) If we got one, then get the tx_ring from the soft ring, * save it in SRS and free up the soft ring. - * 3) If we got more than 1, then do the tx fanout among the + * 2) If we got more than 1, then do the tx fanout among the * rings we obtained. */ - switch (tx_rings_reserved) { - case 1: - /* - * No need to allocate Tx soft rings. Tx-side soft - * rings are for Tx fanout case. Just use Tx SRS. - */ - /* FALLTHRU */ - - case 0: -use_default_ring: - if (tx_rings_reserved == 0) - tx->st_arg2 = (void *)mip->mi_default_tx_ring; - else - tx->st_arg2 = (void *)tx_ring[0]; + ASSERT(tx_rings_reserved != 0); + if (tx_rings_reserved == 1) { + tx->st_arg2 = (void *)tx_rings[0]; /* For ring_count of 0 or 1, set the tx_mode and return */ if (tx_srs->srs_type & SRST_BW_CONTROL) tx->st_mode = SRS_TX_BW; @@ -3298,18 +3302,9 @@ tx->st_mode = SRS_TX_SERIALIZE; else tx->st_mode = SRS_TX_DEFAULT; - break; - - default: + } else { /* * We got multiple Tx rings for Tx fanout. - * - * cpuid of -1 is passed. This creates an unbound - * worker thread. Instead the code should get CPU - * binding information and pass that to - * mac_soft_ring_create(). This needs to be done - * in conjunction with Rx-side soft ring - * bindings. */ soft_ring_type = ST_RING_OTH | ST_RING_TX; if (tx_srs->srs_type & SRST_BW_CONTROL) { @@ -3322,7 +3317,7 @@ for (i = 0; i < tx_rings_reserved; i++) { (void) mac_soft_ring_create(i, 0, NULL, soft_ring_type, maxclsyspri, mcip, tx_srs, -1, NULL, mcip, - (mac_resource_handle_t)tx_ring[i]); + (mac_resource_handle_t)tx_rings[i]); } mac_srs_update_fanout_list(tx_srs); } @@ -3332,8 +3327,12 @@ int, tx->st_mode, int, tx_srs->srs_oth_ring_count); if (tx_ring_count != 0) { - kmem_free(tx_ring, - sizeof (mac_ring_handle_t) * tx_ring_count); + tx->st_ring_count = tx_rings_reserved; + tx->st_rings = kmem_zalloc(sizeof (mac_ring_handle_t) * + tx_rings_reserved, KM_SLEEP); + for (i = 0; i < tx->st_ring_count; i++) + tx->st_rings[i] = tx_rings[i]; + kmem_free(tx_rings, sizeof (mac_ring_handle_t) * tx_ring_count); } }
--- a/usr/src/uts/common/io/mii/mii.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/mii/mii.c Mon Aug 17 09:14:35 2009 -0400 @@ -1278,8 +1278,8 @@ ph->phy_speed = 0; phy_write(ph, MII_AN_ADVERT, anar); - phy_write(ph, MII_CONTROL, - bmcr & ~(MII_CONTROL_ANE|MII_CONTROL_RSAN|MII_CONTROL_LOOPBACK)); + phy_write(ph, MII_CONTROL, bmcr & ~(MII_CONTROL_RSAN)); + switch (ph->phy_type) { case XCVR_1000T: case XCVR_1000X:
--- a/usr/src/uts/common/io/nxge/nxge_hio.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/nxge/nxge_hio.c Mon Aug 17 09:14:35 2009 -0400 @@ -41,9 +41,6 @@ #include <sys/nxge/nxge_txdma.h> #include <sys/nxge/nxge_hio.h> -#define NXGE_HIO_SHARE_MIN_CHANNELS 2 -#define NXGE_HIO_SHARE_MAX_CHANNELS 2 - /* * External prototypes */ @@ -1057,23 +1054,6 @@ NXGE_DEBUG_MSG((nxge, HIO_CTL, "Hybrid IO-capable service domain")); return (NXGE_OK); - } else { - /* - * isLDOMguest(nxge) == B_TRUE - */ - nx_vio_fp_t *vio; - nhd->type = NXGE_HIO_TYPE_GUEST; - - vio = &nhd->hio.vio; - vio->__register = (vio_net_resource_reg_t) - modgetsymvalue("vio_net_resource_reg", 0); - vio->unregister = (vio_net_resource_unreg_t) - modgetsymvalue("vio_net_resource_unreg", 0); - - if (vio->__register == 0 || vio->unregister == 0) { - NXGE_ERROR_MSG((nxge, VIR_CTL, "vio_net is absent!")); - return (NXGE_ERROR); - } } return (0); @@ -1144,12 +1124,16 @@ static int nxge_hio_add_mac(void *arg, const uint8_t *mac_addr) { - nxge_ring_group_t *group = (nxge_ring_group_t *)arg; - p_nxge_t nxge = group->nxgep; - int rv; - nxge_hio_vr_t *vr; /* The Virtualization Region */ + nxge_ring_group_t *group = (nxge_ring_group_t *)arg; + p_nxge_t nxge = group->nxgep; + int rv; + nxge_hio_vr_t *vr; /* The Virtualization Region */ ASSERT(group->type == MAC_RING_TYPE_RX); + ASSERT(group->nxgep != NULL); + + if (isLDOMguest(group->nxgep)) + return (0); mutex_enter(nxge->genlock); @@ -1174,8 +1158,7 @@ /* * Program the mac address for the group. */ - if ((rv = nxge_hio_group_mac_add(nxge, group, - mac_addr)) != 0) { + if ((rv = nxge_hio_group_mac_add(nxge, group, mac_addr)) != 0) { return (rv); } @@ -1206,6 +1189,10 @@ int rv, slot; ASSERT(group->type == MAC_RING_TYPE_RX); + ASSERT(group->nxgep != NULL); + + if (isLDOMguest(group->nxgep)) + return (0); mutex_enter(nxge->genlock); @@ -1253,14 +1240,16 @@ int dev_gindex; ASSERT(group->type == MAC_RING_TYPE_RX); + ASSERT(group->nxgep != NULL); -#ifdef later ASSERT(group->nxgep->nxge_mac_state == NXGE_MAC_STARTED); -#endif if (group->nxgep->nxge_mac_state != NXGE_MAC_STARTED) return (ENXIO); mutex_enter(group->nxgep->genlock); + if (isLDOMguest(group->nxgep)) + goto nxge_hio_group_start_exit; + dev_gindex = group->nxgep->pt_config.hw_config.def_mac_rxdma_grpid + group->gindex; rdc_grp_p = &group->nxgep->pt_config.rdc_grps[dev_gindex]; @@ -1289,9 +1278,9 @@ (void) nxge_init_fzc_rdc_tbl(group->nxgep, rdc_grp_p, rdctbl); +nxge_hio_group_start_exit: group->started = B_TRUE; mutex_exit(group->nxgep->genlock); - return (0); } @@ -1305,6 +1294,9 @@ mutex_enter(group->nxgep->genlock); group->started = B_FALSE; + if (isLDOMguest(group->nxgep)) + goto nxge_hio_group_stop_exit; + /* * Unbind the RDC table previously bound for this group. * @@ -1314,6 +1306,7 @@ if (group->gindex != 0) (void) nxge_fzc_rdc_tbl_unbind(group->nxgep, group->rdctbl); +nxge_hio_group_stop_exit: mutex_exit(group->nxgep->genlock); } @@ -1334,20 +1327,26 @@ group->gindex = groupid; group->sindex = 0; /* not yet bound to a share */ - dev_gindex = nxgep->pt_config.hw_config.def_mac_rxdma_grpid + - groupid; + if (!isLDOMguest(nxgep)) { + dev_gindex = + nxgep->pt_config.hw_config.def_mac_rxdma_grpid + + groupid; - if (nxgep->pt_config.hw_config.def_mac_rxdma_grpid == - dev_gindex) - group->port_default_grp = B_TRUE; + if (nxgep->pt_config.hw_config.def_mac_rxdma_grpid == + dev_gindex) + group->port_default_grp = B_TRUE; + + infop->mgi_count = + nxgep->pt_config.rdc_grps[dev_gindex].max_rdcs; + } else { + infop->mgi_count = NXGE_HIO_SHARE_MAX_CHANNELS; + } infop->mgi_driver = (mac_group_driver_t)group; infop->mgi_start = nxge_hio_group_start; infop->mgi_stop = nxge_hio_group_stop; infop->mgi_addmac = nxge_hio_add_mac; infop->mgi_remmac = nxge_hio_rem_mac; - infop->mgi_count = - nxgep->pt_config.rdc_grps[dev_gindex].max_rdcs; break; case MAC_RING_TYPE_TX:
--- a/usr/src/uts/common/io/nxge/nxge_hio_guest.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/nxge/nxge_hio_guest.c Mon Aug 17 09:14:35 2009 -0400 @@ -35,46 +35,9 @@ #include <sys/nxge/nxge_fzc.h> #include <sys/nxge/nxge_rxdma.h> #include <sys/nxge/nxge_txdma.h> - #include <sys/nxge/nxge_hio.h> /* - * nxge_hio_unregister - * - * Unregister with the VNET module. - * - * Arguments: - * nxge - * - * Notes: - * We must uninitialize all DMA channels associated with the VR, too. - * - * We're assuming that the channels will be disabled & unassigned - * in the service domain, after we're done here. - * - * Context: - * Guest domain - */ -void -nxge_hio_unregister( - nxge_t *nxge) -{ - nxge_hio_data_t *nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio; - - if (nhd == 0) { - return; - } - -#if defined(sun4v) - /* Unregister with vNet. */ - if (nhd->hio.vio.unregister) { - if (nxge->hio_vr) - (*nhd->hio.vio.unregister)(nxge->hio_vr->vhp); - } -#endif -} - -/* * nxge_guest_regs_map * * Map in a guest domain's register set(s). @@ -95,8 +58,7 @@ }; int -nxge_guest_regs_map( - nxge_t *nxge) +nxge_guest_regs_map(nxge_t *nxge) { dev_regs_t *regs; off_t regsize; @@ -211,31 +173,22 @@ int nxge_hio_vr_add(nxge_t *nxge) { - extern mac_callbacks_t nxge_m_callbacks; - - nxge_hio_data_t *nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio; - nxge_hio_vr_t *vr; - nxge_hio_dc_t *dc; - - int *reg_val; - uint_t reg_len; - uint8_t vr_index; + extern nxge_status_t nxge_mac_register(p_nxge_t); - nxhv_vr_fp_t *fp; - uint64_t vr_address, vr_size; - uint32_t cookie; - - nxhv_dc_fp_t *tx, *rx; - uint64_t tx_map, rx_map; - - uint64_t hv_rv; - - /* Variables needed to register with vnet. */ - mac_register_t *mac_info; - ether_addr_t mac_addr; - nx_vio_fp_t *vio; - - int i; + nxge_hio_data_t *nhd = (nxge_hio_data_t *)nxge->nxge_hw_p->hio; + nxge_hio_vr_t *vr; + nxge_hio_dc_t *dc; + int *reg_val; + uint_t reg_len; + uint8_t vr_index; + nxhv_vr_fp_t *fp; + uint64_t vr_address, vr_size; + uint32_t cookie; + nxhv_dc_fp_t *tx, *rx; + uint64_t tx_map, rx_map; + uint64_t hv_rv; + int i; + nxge_status_t status; NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_vr_add")); @@ -384,40 +337,13 @@ } } - /* - * Register with vnet. - */ - if ((mac_info = mac_alloc(MAC_VERSION)) == NULL) - return (NXGE_ERROR); - - mac_info->m_type_ident = MAC_PLUGIN_IDENT_ETHER; - mac_info->m_driver = nxge; - mac_info->m_dip = nxge->dip; - mac_info->m_src_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP); - mac_info->m_dst_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP); - (void) memset(mac_info->m_src_addr, 0xff, sizeof (MAXMACADDRLEN)); - mac_info->m_callbacks = &nxge_m_callbacks; - mac_info->m_min_sdu = 0; - mac_info->m_max_sdu = NXGE_MTU_DEFAULT_MAX - - sizeof (struct ether_header) - ETHERFCSL - 4; - - (void) memset(&mac_addr, 0xff, sizeof (mac_addr)); - - /* Register with vio_net. */ - vio = &nhd->hio.vio; - if ((*vio->__register)(mac_info, VIO_NET_RES_HYBRID, - nxge->hio_mac_addr, mac_addr, &vr->vhp, &vio->cb)) { - NXGE_DEBUG_MSG((nxge, HIO_CTL, "HIO registration() failed")); - KMEM_FREE(mac_info->m_src_addr, MAXMACADDRLEN); - KMEM_FREE(mac_info->m_dst_addr, MAXMACADDRLEN); - mac_free(mac_info); - return (NXGE_ERROR); + status = nxge_mac_register(nxge); + if (status != NXGE_OK) { + cmn_err(CE_WARN, "nxge(%d): nxge_mac_register failed\n", + nxge->instance); + return (status); } - KMEM_FREE(mac_info->m_src_addr, MAXMACADDRLEN); - KMEM_FREE(mac_info->m_dst_addr, MAXMACADDRLEN); - mac_free(mac_info); - nxge->hio_vr = vr; /* For faster lookups. */ NXGE_DEBUG_MSG((nxge, HIO_CTL, "<== nxge_hio_vr_add"));
--- a/usr/src/uts/common/io/nxge/nxge_main.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/nxge/nxge_main.c Mon Aug 17 09:14:35 2009 -0400 @@ -272,14 +272,11 @@ static int nxge_m_multicst(void *, boolean_t, const uint8_t *); static int nxge_m_promisc(void *, boolean_t); static void nxge_m_ioctl(void *, queue_t *, mblk_t *); -static nxge_status_t nxge_mac_register(p_nxge_t); +nxge_status_t nxge_mac_register(p_nxge_t); static int nxge_altmac_set(p_nxge_t nxgep, uint8_t *mac_addr, int slot, int rdctbl, boolean_t usetbl); void nxge_mmac_kstat_update(p_nxge_t nxgep, int slot, boolean_t factory); -#if defined(sun4v) -extern mblk_t *nxge_m_tx(void *arg, mblk_t *mp); -#endif static void nxge_m_getfactaddr(void *, uint_t, uint8_t *); static boolean_t nxge_m_getcapab(void *, mac_capab_t, void *); @@ -630,11 +627,6 @@ if (nxgep->niu_type != N2_NIU) { nxge_set_pci_replay_timeout(nxgep); } -#if defined(sun4v) - if (isLDOMguest(nxgep)) { - nxge_m_callbacks.mc_tx = nxge_m_tx; - } -#endif #if defined(sun4v) /* This is required by nxge_hio_init(), which follows. */ @@ -961,11 +953,7 @@ (void) nxge_link_monitor(nxgep, LINK_MONITOR_STOP); - if (isLDOMguest(nxgep)) { - if (nxgep->nxge_mac_state == NXGE_MAC_STARTED) - nxge_m_stop((void *)nxgep); - nxge_hio_unregister(nxgep); - } else if (nxgep->mach && (status = mac_unregister(nxgep->mach)) != 0) { + if (nxgep->mach && (status = mac_unregister(nxgep->mach)) != 0) { NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, "<== nxge_detach status = 0x%08X", status)); return (DDI_FAILURE); @@ -4294,10 +4282,13 @@ case MAC_CAPAB_MULTIFACTADDR: { mac_capab_multifactaddr_t *mfacp = cap_data; - mutex_enter(nxgep->genlock); - mfacp->mcm_naddr = nxgep->nxge_mmac_info.num_factory_mmac; - mfacp->mcm_getaddr = nxge_m_getfactaddr; - mutex_exit(nxgep->genlock); + if (!isLDOMguest(nxgep)) { + mutex_enter(nxgep->genlock); + mfacp->mcm_naddr = + nxgep->nxge_mmac_info.num_factory_mmac; + mfacp->mcm_getaddr = nxge_m_getfactaddr; + mutex_exit(nxgep->genlock); + } break; } @@ -4325,34 +4316,68 @@ mutex_enter(nxgep->genlock); if (cap_rings->mr_type == MAC_RING_TYPE_RX) { - cap_rings->mr_group_type = MAC_GROUP_TYPE_DYNAMIC; - cap_rings->mr_rnum = p_cfgp->max_rdcs; - cap_rings->mr_rget = nxge_fill_ring; - cap_rings->mr_gnum = p_cfgp->max_rdc_grpids; - cap_rings->mr_gget = nxge_hio_group_get; - cap_rings->mr_gaddring = nxge_group_add_ring; - cap_rings->mr_gremring = nxge_group_rem_ring; + if (isLDOMguest(nxgep)) { + cap_rings->mr_group_type = + MAC_GROUP_TYPE_STATIC; + cap_rings->mr_rnum = + NXGE_HIO_SHARE_MAX_CHANNELS; + cap_rings->mr_rget = nxge_fill_ring; + cap_rings->mr_gnum = 1; + cap_rings->mr_gget = nxge_hio_group_get; + cap_rings->mr_gaddring = NULL; + cap_rings->mr_gremring = NULL; + } else { + /* + * Service Domain. + */ + cap_rings->mr_group_type = + MAC_GROUP_TYPE_DYNAMIC; + cap_rings->mr_rnum = p_cfgp->max_rdcs; + cap_rings->mr_rget = nxge_fill_ring; + cap_rings->mr_gnum = p_cfgp->max_rdc_grpids; + cap_rings->mr_gget = nxge_hio_group_get; + cap_rings->mr_gaddring = nxge_group_add_ring; + cap_rings->mr_gremring = nxge_group_rem_ring; + } NXGE_DEBUG_MSG((nxgep, RX_CTL, "==> nxge_m_getcapab: rx nrings[%d] ngroups[%d]", p_cfgp->max_rdcs, p_cfgp->max_rdc_grpids)); } else { - cap_rings->mr_group_type = MAC_GROUP_TYPE_DYNAMIC; - cap_rings->mr_rnum = p_cfgp->tdc.count; - cap_rings->mr_rget = nxge_fill_ring; - if (isLDOMservice(nxgep)) { - /* share capable */ - /* Do not report the default ring: hence -1 */ + /* + * TX Rings. + */ + if (isLDOMguest(nxgep)) { + cap_rings->mr_group_type = + MAC_GROUP_TYPE_STATIC; + cap_rings->mr_rnum = + NXGE_HIO_SHARE_MAX_CHANNELS; + cap_rings->mr_rget = nxge_fill_ring; + cap_rings->mr_gnum = 0; + cap_rings->mr_gget = NULL; + cap_rings->mr_gaddring = NULL; + cap_rings->mr_gremring = NULL; + } else { + /* + * Service Domain. + */ + cap_rings->mr_group_type = + MAC_GROUP_TYPE_DYNAMIC; + cap_rings->mr_rnum = p_cfgp->tdc.count; + cap_rings->mr_rget = nxge_fill_ring; + + /* + * Share capable. + * + * Do not report the default group: hence -1 + */ cap_rings->mr_gnum = NXGE_MAX_TDC_GROUPS / nxgep->nports - 1; - } else { - cap_rings->mr_gnum = 0; + cap_rings->mr_gget = nxge_hio_group_get; + cap_rings->mr_gaddring = nxge_group_add_ring; + cap_rings->mr_gremring = nxge_group_rem_ring; } - cap_rings->mr_gget = nxge_hio_group_get; - cap_rings->mr_gaddring = nxge_group_add_ring; - cap_rings->mr_gremring = nxge_group_rem_ring; - NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_m_getcapab: tx rings # of rings %d", p_cfgp->tdc.count)); @@ -6372,7 +6397,7 @@ NXGE_DEBUG_MSG((nxgep, INT_CTL, "<== nxge_intrs_disable")); } -static nxge_status_t +nxge_status_t nxge_mac_register(p_nxge_t nxgep) { mac_register_t *macp; @@ -6386,7 +6411,13 @@ macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; macp->m_driver = nxgep; macp->m_dip = nxgep->dip; - macp->m_src_addr = nxgep->ouraddr.ether_addr_octet; + if (!isLDOMguest(nxgep)) { + macp->m_src_addr = nxgep->ouraddr.ether_addr_octet; + } else { + macp->m_src_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP); + macp->m_dst_addr = KMEM_ZALLOC(MAXMACADDRLEN, KM_SLEEP); + (void) memset(macp->m_src_addr, 0xff, sizeof (MAXMACADDRLEN)); + } macp->m_callbacks = &nxge_m_callbacks; macp->m_min_sdu = 0; nxgep->mac.default_mtu = nxgep->mac.maxframesize - @@ -6395,7 +6426,12 @@ macp->m_margin = VLAN_TAGSZ; macp->m_priv_props = nxge_priv_props; macp->m_priv_prop_count = NXGE_MAX_PRIV_PROPS; - macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE; + if (isLDOMguest(nxgep)) { + macp->m_v12n = MAC_VIRT_LEVEL1 | MAC_VIRT_SERIALIZE; + } else { + macp->m_v12n = MAC_VIRT_HIO | MAC_VIRT_LEVEL1 | \ + MAC_VIRT_SERIALIZE; + } NXGE_DEBUG_MSG((nxgep, MAC_CTL, "==> nxge_mac_register: instance %d " @@ -6406,6 +6442,10 @@ NXGE_EHEADER_VLAN_CRC)); status = mac_register(macp, &nxgep->mach); + if (isLDOMguest(nxgep)) { + KMEM_FREE(macp->m_src_addr, MAXMACADDRLEN); + KMEM_FREE(macp->m_dst_addr, MAXMACADDRLEN); + } mac_free(macp); if (status != 0) {
--- a/usr/src/uts/common/io/nxge/nxge_rxdma.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/nxge/nxge_rxdma.c Mon Aug 17 09:14:35 2009 -0400 @@ -1756,7 +1756,7 @@ uint8_t channel; npi_handle_t handle; rx_dma_ctl_stat_t cs; - p_rx_rcr_ring_t rcr_ring; + p_rx_rcr_ring_t rcrp; mblk_t *mp = NULL; if (ldvp == NULL) { @@ -1789,7 +1789,7 @@ /* * Get the ring to enable us to process packets. */ - rcr_ring = nxgep->rx_rcr_rings->rcr_rings[ldvp->vdma_index]; + rcrp = nxgep->rx_rcr_rings->rcr_rings[ldvp->vdma_index]; /* * The RCR ring lock must be held when packets @@ -1799,7 +1799,7 @@ * (will cause fatal errors such as rcrincon bit set) * and the setting of the poll_flag. */ - MUTEX_ENTER(&rcr_ring->lock); + MUTEX_ENTER(&rcrp->lock); /* * Get the control and status for this channel. @@ -1840,12 +1840,12 @@ mgm.value); } } - MUTEX_EXIT(&rcr_ring->lock); + MUTEX_EXIT(&rcrp->lock); return (DDI_INTR_CLAIMED); } - ASSERT(rcr_ring->ldgp == ldgp); - ASSERT(rcr_ring->ldvp == ldvp); + ASSERT(rcrp->ldgp == ldgp); + ASSERT(rcrp->ldvp == ldvp); RXDMA_REG_READ64(handle, RX_DMA_CTL_STAT_REG, channel, &cs.value); @@ -1856,8 +1856,8 @@ cs.bits.hdw.rcrto, cs.bits.hdw.rcrthres)); - if (rcr_ring->poll_flag == 0) { - mp = nxge_rx_pkts(nxgep, rcr_ring, cs, -1); + if (!rcrp->poll_flag) { + mp = nxge_rx_pkts(nxgep, rcrp, cs, -1); } /* error events. */ @@ -1873,27 +1873,34 @@ * these two edge triggered bits. */ cs.value &= RX_DMA_CTL_STAT_WR1C; - cs.bits.hdw.mex = rcr_ring->poll_flag ? 0 : 1; + cs.bits.hdw.mex = rcrp->poll_flag ? 0 : 1; RXDMA_REG_WRITE64(handle, RX_DMA_CTL_STAT_REG, channel, cs.value); /* * If the polling mode is enabled, disable the interrupt. */ - if (rcr_ring->poll_flag) { + if (rcrp->poll_flag) { NXGE_DEBUG_MSG((nxgep, NXGE_ERR_CTL, "==> nxge_rx_intr: rdc %d ldgp $%p ldvp $%p " "(disabling interrupts)", channel, ldgp, ldvp)); + /* * Disarm this logical group if this is a single device * group. */ if (ldgp->nldvs == 1) { - ldgimgm_t mgm; - mgm.value = 0; - mgm.bits.ldw.arm = 0; - NXGE_REG_WR64(handle, - LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg), mgm.value); + if (isLDOMguest(nxgep)) { + ldgp->arm = B_FALSE; + nxge_hio_ldgimgn(nxgep, ldgp); + } else { + ldgimgm_t mgm; + mgm.value = 0; + mgm.bits.ldw.arm = 0; + NXGE_REG_WR64(handle, + LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg), + mgm.value); + } } } else { /* @@ -1920,24 +1927,11 @@ "==> nxge_rx_intr: rdc %d ldgp $%p " "exiting ISR (and call mac_rx_ring)", channel, ldgp)); } - MUTEX_EXIT(&rcr_ring->lock); + MUTEX_EXIT(&rcrp->lock); if (mp != NULL) { - if (!isLDOMguest(nxgep)) - mac_rx_ring(nxgep->mach, rcr_ring->rcr_mac_handle, mp, - rcr_ring->rcr_gen_num); -#if defined(sun4v) - else { /* isLDOMguest(nxgep) */ - nxge_hio_data_t *nhd = (nxge_hio_data_t *) - nxgep->nxge_hw_p->hio; - nx_vio_fp_t *vio = &nhd->hio.vio; - - if (vio->cb.vio_net_rx_cb) { - (*vio->cb.vio_net_rx_cb) - (nxgep->hio_vr->vhp, mp); - } - } -#endif + mac_rx_ring(nxgep->mach, rcrp->rcr_mac_handle, mp, + rcrp->rcr_gen_num); } NXGE_DEBUG_MSG((nxgep, RX_CTL, "<== nxge_rx_intr: DDI_INTR_CLAIMED")); return (DDI_INTR_CLAIMED); @@ -2720,6 +2714,7 @@ uint32_t channel; if (ring_handle == NULL) { + ASSERT(ring_handle != NULL); return (0); } @@ -2760,6 +2755,7 @@ uint32_t channel; if (ring_handle == NULL) { + ASSERT(ring_handle != NULL); return (0); } @@ -2816,12 +2812,18 @@ "==> nxge_disable_poll: rdc %d ldgp $%p (enable intr)", ringp->rdc, ldgp)); if (ldgp->nldvs == 1) { - ldgimgm_t mgm; - mgm.value = 0; - mgm.bits.ldw.arm = 1; - mgm.bits.ldw.timer = ldgp->ldg_timer; - NXGE_REG_WR64(handle, - LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg), mgm.value); + if (isLDOMguest(nxgep)) { + ldgp->arm = B_TRUE; + nxge_hio_ldgimgn(nxgep, ldgp); + } else { + ldgimgm_t mgm; + mgm.value = 0; + mgm.bits.ldw.arm = 1; + mgm.bits.ldw.timer = ldgp->ldg_timer; + NXGE_REG_WR64(handle, + LDGIMGN_REG + LDSV_OFFSET(ldgp->ldg), + mgm.value); + } } ringp->poll_flag = 0; }
--- a/usr/src/uts/common/io/nxge/nxge_send.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/nxge/nxge_send.c Mon Aug 17 09:14:35 2009 -0400 @@ -66,20 +66,9 @@ (void) nxge_txdma_reclaim(ring->nxgep, ring, 0); MUTEX_EXIT(&ring->lock); - if (!isLDOMguest(ring->nxgep) && !ring->tx_ring_offline) + if (!ring->tx_ring_offline) { mac_tx_ring_update(ring->nxgep->mach, ring->tx_ring_handle); -#if defined(sun4v) - else { - nxge_hio_data_t *nhd = - (nxge_hio_data_t *)ring->nxgep->nxge_hw_p->hio; - nx_vio_fp_t *vio = &nhd->hio.vio; - - /* Call back vnet. */ - if (vio->cb.vio_net_tx_update) { - (*vio->cb.vio_net_tx_update)(ring->nxgep->hio_vr->vhp); - } } -#endif } static void @@ -141,65 +130,6 @@ return ((mblk_t *)NULL); } -#if defined(sun4v) - -/* - * Hashing policy for load balancing over the set of TX rings - * available to the driver. - */ -static uint8_t nxge_tx_hash_policy = MAC_PKT_HASH_L4; - -/* - * nxge_m_tx() is needed for Hybrid I/O operation of the vnet in - * the guest domain. See CR 6778758 for long term solution. - * - * The guest domain driver will for now hash the packet - * to pick a DMA channel from the only group it has group 0. - */ - -mblk_t * -nxge_m_tx(void *arg, mblk_t *mp) -{ - p_nxge_t nxgep = (p_nxge_t)arg; - mblk_t *next; - uint64_t rindex; - p_tx_ring_t tx_ring_p; - int status; - - NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_m_tx")); - - /* - * Hash to pick a ring from Group 0, the only TX group - * for a guest domain driver. - */ - rindex = mac_pkt_hash(DL_ETHER, mp, nxge_tx_hash_policy, B_TRUE); - rindex = rindex % nxgep->pt_config.tdc_grps[0].max_tdcs; - - /* - * Get the ring handle. - */ - tx_ring_p = nxgep->tx_rings->rings[rindex]; - - while (mp != NULL) { - next = mp->b_next; - mp->b_next = NULL; - - status = nxge_start(nxgep, tx_ring_p, mp); - if (status != 0) { - mp->b_next = next; - nxge_tx_ring_dispatch(tx_ring_p); - return (mp); - } - - mp = next; - } - - NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_m_tx")); - return ((mblk_t *)NULL); -} - -#endif - int nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) {
--- a/usr/src/uts/common/io/nxge/nxge_virtual.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/nxge/nxge_virtual.c Mon Aug 17 09:14:35 2009 -0400 @@ -3994,6 +3994,9 @@ p_dma_cfgp = &nxgep->pt_config; p_cfgp = &p_dma_cfgp->hw_config; + if (isLDOMguest(nxgep)) + return (ringidx); + for (i = 0; i < groupid; i++) { rdc_grp_p = &p_dma_cfgp->rdc_grps[p_cfgp->def_mac_rxdma_grpid + i];
--- a/usr/src/uts/common/io/sata/adapters/ahci/ahci.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/sata/adapters/ahci/ahci.c Mon Aug 17 09:14:35 2009 -0400 @@ -94,33 +94,57 @@ static void ahci_dealloc_cmd_list(ahci_ctl_t *, ahci_port_t *); static int ahci_alloc_cmd_tables(ahci_ctl_t *, ahci_port_t *); static void ahci_dealloc_cmd_tables(ahci_ctl_t *, ahci_port_t *); +static void ahci_alloc_pmult(ahci_ctl_t *, ahci_port_t *); +static void ahci_dealloc_pmult(ahci_ctl_t *, ahci_port_t *); static int ahci_initialize_controller(ahci_ctl_t *); static void ahci_uninitialize_controller(ahci_ctl_t *); -static int ahci_initialize_port(ahci_ctl_t *, ahci_port_t *, uint8_t); +static int ahci_initialize_port(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *); static int ahci_config_space_init(ahci_ctl_t *); static void ahci_drain_ports_taskq(ahci_ctl_t *); +static int ahci_rdwr_pmult(ahci_ctl_t *, ahci_addr_t *, uint8_t, uint32_t *, + uint8_t); +static int ahci_read_pmult(ahci_ctl_t *, ahci_addr_t *, uint8_t, uint32_t *); +static int ahci_write_pmult(ahci_ctl_t *, ahci_addr_t *, uint8_t, uint32_t); +static int ahci_update_pmult_pscr(ahci_ctl_t *, ahci_addr_t *, + sata_device_t *); +static int ahci_update_pmult_gscr(ahci_ctl_t *, ahci_addr_t *, + sata_device_t *); +static int ahci_initialize_pmult(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *, + sata_device_t *); +static int ahci_initialize_pmport(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *); +static int ahci_probe_pmult(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *); +static int ahci_probe_pmport(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *, + sata_device_t *); + static void ahci_disable_interface_pm(ahci_ctl_t *, uint8_t); static int ahci_start_port(ahci_ctl_t *, ahci_port_t *, uint8_t); -static void ahci_find_dev_signature(ahci_ctl_t *, ahci_port_t *, uint8_t); +static void ahci_find_dev_signature(ahci_ctl_t *, ahci_port_t *, + ahci_addr_t *); static void ahci_update_sata_registers(ahci_ctl_t *, uint8_t, sata_device_t *); static int ahci_deliver_satapkt(ahci_ctl_t *, ahci_port_t *, - uint8_t, sata_pkt_t *); + ahci_addr_t *, sata_pkt_t *); static int ahci_do_sync_start(ahci_ctl_t *, ahci_port_t *, - uint8_t, sata_pkt_t *); -static int ahci_claim_free_slot(ahci_ctl_t *, ahci_port_t *, int); + ahci_addr_t *, sata_pkt_t *); +static int ahci_claim_free_slot(ahci_ctl_t *, ahci_port_t *, + ahci_addr_t *, int); static void ahci_copy_err_cnxt(sata_cmd_t *, ahci_fis_d2h_register_t *); static void ahci_copy_ncq_err_page(sata_cmd_t *, struct sata_ncq_error_recovery_page *); static void ahci_copy_out_regs(sata_cmd_t *, ahci_fis_d2h_register_t *); -static int ahci_software_reset(ahci_ctl_t *, ahci_port_t *, uint8_t); +static int ahci_software_reset(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *); static int ahci_hba_reset(ahci_ctl_t *); -static int ahci_port_reset(ahci_ctl_t *, ahci_port_t *, uint8_t); +static int ahci_port_reset(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *); +static int ahci_pmport_reset(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *); static void ahci_reject_all_abort_pkts(ahci_ctl_t *, ahci_port_t *, uint8_t); -static int ahci_reset_device_reject_pkts(ahci_ctl_t *, ahci_port_t *, uint8_t); -static int ahci_reset_port_reject_pkts(ahci_ctl_t *, ahci_port_t *, uint8_t); +static int ahci_reset_device_reject_pkts(ahci_ctl_t *, ahci_port_t *, + ahci_addr_t *); +static int ahci_reset_pmdevice_reject_pkts(ahci_ctl_t *, ahci_port_t *, + ahci_addr_t *); +static int ahci_reset_port_reject_pkts(ahci_ctl_t *, ahci_port_t *, + ahci_addr_t *); static int ahci_reset_hba_reject_pkts(ahci_ctl_t *); static int ahci_put_port_into_notrunning_state(ahci_ctl_t *, ahci_port_t *, uint8_t); @@ -132,6 +156,8 @@ static void ahci_get_rqsense_data(ahci_ctl_t *, ahci_port_t *, uint8_t, sata_pkt_t *); static void ahci_fatal_error_recovery_handler(ahci_ctl_t *, ahci_port_t *, + ahci_addr_t *, uint32_t); +static void ahci_pmult_error_recovery_handler(ahci_ctl_t *, ahci_port_t *, uint8_t, uint32_t); static void ahci_timeout_pkts(ahci_ctl_t *, ahci_port_t *, uint8_t, uint32_t); @@ -149,6 +175,8 @@ static int ahci_intr_cmd_cmplt(ahci_ctl_t *, ahci_port_t *, uint8_t); static int ahci_intr_set_device_bits(ahci_ctl_t *, ahci_port_t *, uint8_t); +static int ahci_intr_ncq_events(ahci_ctl_t *, ahci_port_t *, ahci_addr_t *); +static int ahci_intr_pmult_sntf_events(ahci_ctl_t *, ahci_port_t *, uint8_t); static int ahci_intr_port_connect_change(ahci_ctl_t *, ahci_port_t *, uint8_t); static int ahci_intr_device_mechanical_presence_status(ahci_ctl_t *, ahci_port_t *, uint8_t); @@ -159,6 +187,7 @@ uint8_t, uint32_t); static int ahci_intr_cold_port_detect(ahci_ctl_t *, ahci_port_t *, uint8_t); +static void ahci_get_ahci_addr(ahci_ctl_t *, sata_device_t *, ahci_addr_t *); static int ahci_get_num_implemented_ports(uint32_t); static void ahci_log_fatal_error_message(ahci_ctl_t *, uint8_t port, uint32_t); @@ -668,13 +697,34 @@ "hba does not support 64-bit addressing", NULL); } + /* Checking for the support of Port Multiplier */ + if (cap_status & AHCI_HBA_CAP_SPM) { + ahci_ctlp->ahcictl_cap |= AHCI_CAP_PMULT_CBSS; + AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp, + "hba supports port multiplier (CBSS)", NULL); + + /* Support FIS-based switching ? */ + if (cap_status & AHCI_HBA_CAP_FBSS) { + ahci_ctlp->ahcictl_cap |= AHCI_CAP_PMULT_FBSS; + AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp, + "hba supports FIS-based switching (FBSS)", NULL); + } + } + /* Checking for Support Command List Override */ if (cap_status & AHCI_HBA_CAP_SCLO) { ahci_ctlp->ahcictl_cap |= AHCI_CAP_SCLO; - AHCIDBG(AHCIDBG_INIT, ahci_ctlp, + AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp, "hba supports command list override.", NULL); } + /* Checking for Asynchronous Notification */ + if (cap_status & AHCI_HBA_CAP_SSNTF) { + ahci_ctlp->ahcictl_cap |= AHCI_CAP_SNTF; + AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp, + "hba supports asynchronous notification.", NULL); + } + if (pci_config_setup(dip, &ahci_ctlp->ahcictl_pci_conf_handle) != DDI_SUCCESS) { cmn_err(CE_WARN, "!ahci%d: Cannot set up pci configure space", @@ -1037,7 +1087,7 @@ /* Allocate memory for the sata_hba_tran */ sata_hba_tran = kmem_zalloc(sizeof (sata_hba_tran_t), KM_SLEEP); - sata_hba_tran->sata_tran_hba_rev = SATA_TRAN_HBA_REV_2; + sata_hba_tran->sata_tran_hba_rev = SATA_TRAN_HBA_REV_3; sata_hba_tran->sata_tran_hba_dip = ahci_ctlp->ahcictl_dip; sata_hba_tran->sata_tran_hba_dma_attr = &ahci_ctlp->ahcictl_buffer_dma_attr; @@ -1079,6 +1129,18 @@ "Command Queuing", NULL); } + /* Support port multiplier? */ + if (cap_status & AHCI_HBA_CAP_SPM) { + sata_hba_tran->sata_tran_hba_features_support |= + SATA_CTLF_PORT_MULTIPLIER; + + /* Support FIS-based switching for port multiplier? */ + if (cap_status & AHCI_HBA_CAP_FBSS) { + sata_hba_tran->sata_tran_hba_features_support |= + SATA_CTLF_PMULT_FBS; + } + } + /* Report the number of command slots */ sata_hba_tran->sata_tran_hba_qdepth = ahci_ctlp->ahcictl_num_cmd_slots; @@ -1140,6 +1202,15 @@ return (AHCI_SUCCESS); } +#define SET_PORTSTR(str, addrp) \ + if (AHCI_ADDR_IS_PORT(addrp)) \ + (void) sprintf((str), "%d", (addrp)->aa_port); \ + else if (AHCI_ADDR_IS_PMULT(addrp)) \ + (void) sprintf((str), "%d (pmult)", (addrp)->aa_port); \ + else \ + (void) sprintf((str), "%d:%d", (addrp)->aa_port, \ + (addrp)->aa_pmport); + /* * ahci_tran_probe_port is called by SATA framework. It returns port state, * port status registers and an attached device type via sata_device @@ -1155,67 +1226,128 @@ { ahci_ctl_t *ahci_ctlp; ahci_port_t *ahci_portp; - uint8_t cport = sd->satadev_addr.cport; - uint8_t pmport = sd->satadev_addr.pmport; - uint8_t qual = sd->satadev_addr.qual; - uint8_t device_type; + ahci_addr_t addr, pmult_addr; + uint8_t cport = sd->satadev_addr.cport; + char portstr[10]; + uint8_t device_type; uint32_t port_state; uint8_t port; + int rval = SATA_SUCCESS, rval_init; ahci_ctlp = ddi_get_soft_state(ahci_statep, ddi_get_instance(dip)); port = ahci_ctlp->ahcictl_cport_to_port[cport]; - AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, - "ahci_tran_probe_port enter: cport: %d, " - "pmport: %d, qual: %d", cport, pmport, qual); - ahci_portp = ahci_ctlp->ahcictl_ports[port]; mutex_enter(&ahci_portp->ahciport_mutex); - port_state = ahci_portp->ahciport_port_state; + ahci_get_ahci_addr(ahci_ctlp, sd, &addr); + ASSERT(AHCI_ADDR_IS_VALID(&addr)); + SET_PORTSTR(portstr, &addr); + + AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, + "ahci_tran_probe_port enter: port %s", portstr); + + if ((AHCI_ADDR_IS_PMULT(&addr) || AHCI_ADDR_IS_PMPORT(&addr)) && + (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT || + ahci_portp->ahciport_pmult_info == NULL)) { + /* port mutliplier is removed. */ + AHCIDBG(AHCIDBG_PMULT, ahci_ctlp, + "ahci_tran_probe_port: " + "port-pmult is removed from port %s", portstr); + mutex_exit(&ahci_portp->ahciport_mutex); + return (SATA_FAILURE); + } + + /* + * The sata_device may refer to + * 1. A controller port. + * A controller port should be ready here. + * 2. A port multiplier. + * If it has not been initialized, initialized it. If it is + * initilaized, we need check the status of all its device ports. + * 3. A port multiplier port. + * If it has not been initialized, initialized it. + * + * A port multiplier or a port multiplier port may require some + * initialization because we cannot do these time-consuming jobs in an + * interrupt context. + */ + if (AHCI_ADDR_IS_PORT(&addr)) { + if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) { + AHCI_ADDR_SET_PMULT(&pmult_addr, port); + port_state = ahci_portp->ahciport_port_state; + /* SATA Framework's first probe? */ + if (!(port_state & SATA_DSTATE_PMULT_INIT)) { + /* Initialize registers on a port multiplier */ + rval_init = ahci_initialize_pmult(ahci_ctlp, + ahci_portp, &pmult_addr, sd); + if (rval_init != AHCI_SUCCESS) { + AHCIDBG(AHCIDBG_PMULT, ahci_ctlp, + "ahci_tran_probe_port: " + "pmult initialization failed.", + NULL); + mutex_exit(&ahci_portp->ahciport_mutex); + return (SATA_FAILURE); + } + } + } + } else if (AHCI_ADDR_IS_PMULT(&addr)) { + /* Check pmports hotplug events */ + (void) ahci_probe_pmult(ahci_ctlp, ahci_portp, &addr); + } else if (AHCI_ADDR_IS_PMPORT(&addr)) { + if (ahci_probe_pmport(ahci_ctlp, ahci_portp, + &addr, sd) != AHCI_SUCCESS) { + rval = SATA_FAILURE; + goto out; + } + } + + /* Update port state and device type */ + port_state = AHCIPORT_GET_STATE(ahci_portp, &addr); + switch (port_state) { case SATA_PSTATE_FAILED: sd->satadev_state = SATA_PSTATE_FAILED; AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, - "ahci_tran_probe_port: port %d PORT FAILED", port); + "ahci_tran_probe_port: port %s PORT FAILED", portstr); goto out; case SATA_PSTATE_SHUTDOWN: sd->satadev_state = SATA_PSTATE_SHUTDOWN; AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, - "ahci_tran_probe_port: port %d PORT SHUTDOWN", port); + "ahci_tran_probe_port: port %s PORT SHUTDOWN", portstr); goto out; case SATA_PSTATE_PWROFF: sd->satadev_state = SATA_PSTATE_PWROFF; AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, - "ahci_tran_probe_port: port %d PORT PWROFF", port); + "ahci_tran_probe_port: port %s PORT PWROFF", portstr); goto out; case SATA_PSTATE_PWRON: sd->satadev_state = SATA_PSTATE_PWRON; AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_tran_probe_port: port %d PORT PWRON", port); + "ahci_tran_probe_port: port %s PORT PWRON", portstr); break; default: sd->satadev_state = port_state; AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_tran_probe_port: port %d PORT NORMAL %x", - port, port_state); + "ahci_tran_probe_port: port %s PORT NORMAL %x", + portstr, port_state); break; } - device_type = ahci_portp->ahciport_device_type; + device_type = AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr); switch (device_type) { case SATA_DTYPE_ATADISK: sd->satadev_type = SATA_DTYPE_ATADISK; AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_tran_probe_port: port %d DISK found", port); + "ahci_tran_probe_port: port %s DISK found", portstr); break; case SATA_DTYPE_ATAPI: @@ -1227,35 +1359,62 @@ */ sd->satadev_type = SATA_DTYPE_ATAPI; AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_tran_probe_port: port %d ATAPI found", port); + "ahci_tran_probe_port: port %s ATAPI found", portstr); break; case SATA_DTYPE_PMULT: + ASSERT(AHCI_ADDR_IS_PORT(&addr)||AHCI_ADDR_IS_PMULT(&addr)); sd->satadev_type = SATA_DTYPE_PMULT; + + /* Update the number of pmports. */ + ASSERT(ahci_portp->ahciport_pmult_info != NULL); + sd->satadev_add_info = ahci_portp-> + ahciport_pmult_info->ahcipmi_num_dev_ports; + AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_tran_probe_port: port %d Port Multiplier found", - port); + "ahci_tran_probe_port: port %s Port Multiplier found", + portstr); break; case SATA_DTYPE_UNKNOWN: sd->satadev_type = SATA_DTYPE_UNKNOWN; AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_tran_probe_port: port %d Unknown device found", port); + "ahci_tran_probe_port: port %s Unknown device found", + portstr); break; default: /* we don't support any other device types */ sd->satadev_type = SATA_DTYPE_NONE; AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_tran_probe_port: port %d No device found", port); + "ahci_tran_probe_port: port %s No device found", portstr); break; } out: - ahci_update_sata_registers(ahci_ctlp, port, sd); + /* Register update only fails while probing a pmult/pmport */ + if (AHCI_ADDR_IS_PORT(&addr)) { + ahci_update_sata_registers(ahci_ctlp, port, sd); + if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) + if (port_state & SATA_STATE_READY) + if (ahci_update_pmult_gscr(ahci_ctlp, + &pmult_addr, sd) != AHCI_SUCCESS) + rval = SATA_FAILURE; + } else if (AHCI_ADDR_IS_PMULT(&addr)) { + ahci_update_sata_registers(ahci_ctlp, port, sd); + if (port_state & SATA_STATE_READY) + if (ahci_update_pmult_gscr(ahci_ctlp, + &addr, sd) != AHCI_SUCCESS) + rval = SATA_FAILURE; + } else if (AHCI_ADDR_IS_PMPORT(&addr)) { + if (port_state & SATA_STATE_READY) + if (ahci_update_pmult_pscr(ahci_ctlp, + &addr, sd) != AHCI_SUCCESS) + rval = SATA_FAILURE; + } + mutex_exit(&ahci_portp->ahciport_mutex); - - return (SATA_SUCCESS); + return (rval); } /* @@ -1292,8 +1451,11 @@ { ahci_ctl_t *ahci_ctlp; ahci_port_t *ahci_portp; + ahci_addr_t addr, addr_pmult; + sata_device_t sdevice; uint8_t cport = spkt->satapkt_device.satadev_addr.cport; uint8_t port; + char portstr[10]; ahci_ctlp = ddi_get_soft_state(ahci_statep, ddi_get_instance(dip)); port = ahci_ctlp->ahcictl_cport_to_port[cport]; @@ -1305,12 +1467,75 @@ ahci_portp = ahci_ctlp->ahcictl_ports[port]; mutex_enter(&ahci_portp->ahciport_mutex); - - if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED | - ahci_portp->ahciport_port_state & SATA_PSTATE_SHUTDOWN | - ahci_portp->ahciport_port_state & SATA_PSTATE_PWROFF) { + ahci_get_ahci_addr(ahci_ctlp, &spkt->satapkt_device, &addr); + SET_PORTSTR(portstr, &addr); + + /* Sanity check */ + if (AHCI_ADDR_IS_PMPORT(&addr)) { + if (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT || + ahci_portp->ahciport_pmult_info == NULL) { + + spkt->satapkt_reason = SATA_PKT_PORT_ERROR; + spkt->satapkt_device.satadev_type = SATA_DTYPE_NONE; + spkt->satapkt_device.satadev_state = SATA_STATE_UNKNOWN; + ahci_update_sata_registers(ahci_ctlp, port, + &spkt->satapkt_device); + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_tran_start returning PORT_ERROR while " + "pmult removed: port: %s", portstr); + mutex_exit(&ahci_portp->ahciport_mutex); + return (SATA_TRAN_PORT_ERROR); + } + + /* Port multiplier and pmport are correctly initialized? */ + if (!(ahci_portp->ahciport_port_state & + SATA_DSTATE_PMULT_INIT)) { + AHCI_ADDR_SET_PMULT(&addr_pmult, port); + if (!ddi_in_panic() || + ahci_initialize_pmult(ahci_ctlp, ahci_portp, + &addr_pmult, &sdevice) != AHCI_SUCCESS) { + spkt->satapkt_reason = SATA_PKT_PORT_ERROR; + spkt->satapkt_device.satadev_type = + AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr); + spkt->satapkt_device.satadev_state = + AHCIPORT_GET_STATE(ahci_portp, &addr); + ahci_update_sata_registers(ahci_ctlp, port, + &spkt->satapkt_device); + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_tran_start returning PORT_ERROR " + "while pmult is not initialized " + "at port %d", port); + mutex_exit(&ahci_portp->ahciport_mutex); + return (SATA_TRAN_PORT_ERROR); + } + } + if (!(AHCIPORT_GET_STATE(ahci_portp, &addr) & + SATA_STATE_READY)) { + if (!ddi_in_panic() || + ahci_initialize_pmport(ahci_ctlp, + ahci_portp, &addr) != AHCI_SUCCESS) { + spkt->satapkt_reason = SATA_PKT_PORT_ERROR; + spkt->satapkt_device.satadev_type = + AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr); + spkt->satapkt_device.satadev_state = + AHCIPORT_GET_STATE(ahci_portp, &addr); + ahci_update_sata_registers(ahci_ctlp, port, + &spkt->satapkt_device); + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_tran_start returning PORT_ERROR " + "while sub-link is not initialized " + "at port: %s", portstr); + mutex_exit(&ahci_portp->ahciport_mutex); + return (SATA_TRAN_PORT_ERROR); + } + } + } + + if (AHCIPORT_GET_STATE(ahci_portp, &addr) & SATA_PSTATE_FAILED || + AHCIPORT_GET_STATE(ahci_portp, &addr) & SATA_PSTATE_SHUTDOWN|| + AHCIPORT_GET_STATE(ahci_portp, &addr) & SATA_PSTATE_PWROFF) { /* - * In case the targer driver would send the packet before + * In case the target driver would send the packet before * sata framework can have the opportunity to process those * event reports. */ @@ -1322,12 +1547,12 @@ AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_tran_start returning PORT_ERROR while " "port in FAILED/SHUTDOWN/PWROFF state: " - "port: %d", port); + "port: %s", portstr); mutex_exit(&ahci_portp->ahciport_mutex); return (SATA_TRAN_PORT_ERROR); } - if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) { + if (AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr) == SATA_DTYPE_NONE) { /* * ahci_intr_phyrdy_change() may have rendered it to * SATA_DTYPE_NONE. @@ -1340,11 +1565,31 @@ &spkt->satapkt_device); AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_tran_start returning PORT_ERROR while " - "no device attached: port: %d", port); + "no device attached: port: %s", portstr); mutex_exit(&ahci_portp->ahciport_mutex); return (SATA_TRAN_PORT_ERROR); } + /* R/W PMULT command will occupy the whole HBA port */ + if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_tran_start returning BUSY while " + "executing READ/WRITE PORT-MULT command: " + "port: %s", portstr); + spkt->satapkt_reason = SATA_PKT_BUSY; + mutex_exit(&ahci_portp->ahciport_mutex); + return (SATA_TRAN_BUSY); + } + + if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_HOTPLUG) { + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_tran_start returning BUSY while " + "hot-plug in progress: port: %s", portstr); + spkt->satapkt_reason = SATA_PKT_BUSY; + mutex_exit(&ahci_portp->ahciport_mutex); + return (SATA_TRAN_BUSY); + } + /* * SATA HBA driver should remember that a device was reset and it * is supposed to reject any packets which do not specify either @@ -1356,10 +1601,16 @@ * has a chance to restore the device setting (such as cache enable/ * disable or other resettable stuff). */ + /* + * It is unnecessary to use specific flags to indicate + * reset_in_progress for a pmport. While mopping, all command will be + * mopped so that the entire HBA port is being dealt as a single + * object. + */ if (spkt->satapkt_cmd.satacmd_flags.sata_clear_dev_reset) { ahci_portp->ahciport_reset_in_progress = 0; AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, - "ahci_tran_start clearing the " + "ahci_tran_start [CLEAR] the " "reset_in_progress for port: %d", port); } @@ -1374,6 +1625,14 @@ return (SATA_TRAN_BUSY); } +#ifdef AHCI_DEBUG + if (spkt->satapkt_cmd.satacmd_flags.sata_ignore_dev_reset) { + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_tran_start: packet 0x%p [PASSTHRU] at port %d", + spkt, port); + } +#endif + if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_MOPPING) { spkt->satapkt_reason = SATA_PKT_BUSY; AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, @@ -1401,7 +1660,7 @@ } /* We need to do the sync start now */ - if (ahci_do_sync_start(ahci_ctlp, ahci_portp, port, + if (ahci_do_sync_start(ahci_ctlp, ahci_portp, &addr, spkt) == AHCI_FAILURE) { AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_tran_start " "return QUEUE_FULL: port %d", port); @@ -1410,7 +1669,7 @@ } } else { /* Async start, using interrupt */ - if (ahci_deliver_satapkt(ahci_ctlp, ahci_portp, port, spkt) + if (ahci_deliver_satapkt(ahci_ctlp, ahci_portp, &addr, spkt) == AHCI_FAILURE) { spkt->satapkt_reason = SATA_PKT_QUEUE_FULL; AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_tran_start " @@ -1421,7 +1680,7 @@ } AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "ahci_tran_start " - "sata tran accepted: port %d", port); + "sata tran accepted: port %s", portstr); mutex_exit(&ahci_portp->ahciport_mutex); return (SATA_TRAN_ACCEPTED); @@ -1438,20 +1697,21 @@ */ static int ahci_do_sync_start(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, - uint8_t port, sata_pkt_t *spkt) + ahci_addr_t *addrp, sata_pkt_t *spkt) { int pkt_timeout_ticks; uint32_t timeout_tags; int rval; int instance = ddi_get_instance(ahci_ctlp->ahcictl_dip); + uint8_t port = addrp->aa_port; AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, "ahci_do_sync_start enter: " - "port %d spkt 0x%p", port, spkt); + "port %d:%d spkt 0x%p", port, addrp->aa_pmport, spkt); if (spkt->satapkt_op_mode & SATA_OPMODE_POLLING) { ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_POLLING; if ((rval = ahci_deliver_satapkt(ahci_ctlp, ahci_portp, - port, spkt)) == AHCI_FAILURE) { + addrp, spkt)) == AHCI_FAILURE) { ahci_portp->ahciport_flags &= ~ AHCI_PORT_FLAG_POLLING; return (rval); } @@ -1465,10 +1725,10 @@ /* Simulate the interrupt */ ahci_port_intr(ahci_ctlp, ahci_portp, port); - drv_usecwait(AHCI_1MS_USECS); + drv_usecwait(AHCI_10MS_USECS); mutex_enter(&ahci_portp->ahciport_mutex); - pkt_timeout_ticks -= AHCI_1MS_TICKS; + pkt_timeout_ticks -= AHCI_10MS_TICKS; if (pkt_timeout_ticks < 0) { cmn_err(CE_WARN, "!ahci%d: ahci_do_sync_start " "port %d satapkt 0x%p timed out\n", @@ -1485,7 +1745,7 @@ } else { if ((rval = ahci_deliver_satapkt(ahci_ctlp, ahci_portp, - port, spkt)) == AHCI_FAILURE) + addrp, spkt)) == AHCI_FAILURE) return (rval); #if AHCI_DEBUG @@ -1529,27 +1789,33 @@ /* * Searches for and claims a free command slot. * - * Returns: - * - * AHCI_FAILURE if failed - * 1. if no empty slot left - * 2. non-queued command requested while queued command(s) is outstanding - * 3. queued command requested whild non-queued command(s) is outstanding + * Returns value: + * + * AHCI_FAILURE returned only if + * 1. No empty slot left + * 2. Non-queued command requested while queued command(s) is outstanding + * 3. Queued command requested while non-queued command(s) is outstanding * 4. HBA doesn't support multiple-use of command list while already a - * non-queued command is oustanding - * - * claimed slot number if succeeded - * - * NOTE: it will always return slot 0 during error recovery process for - * REQUEST SENSE or READ LOG EXT command to simplify the algorithm. + * non-queued command is oustanding + * 5. Queued command requested while some queued command(s) has been + * outstanding on a different port multiplier port. (AHCI spec 1.2, + * 9.1.2) + * + * claimed slot number returned if succeeded + * + * NOTE: it will always return slot 0 for following commands to simplify the + * algorithm. + * 1. REQUEST SENSE or READ LOG EXT command during error recovery process + * 2. READ/WRITE PORTMULT command * * WARNING!!! ahciport_mutex should be acquired before the function * is called. */ static int -ahci_claim_free_slot(ahci_ctl_t *ahci_ctlp, - ahci_port_t *ahci_portp, int command_type) +ahci_claim_free_slot(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp, int command_type) { + uint32_t port_cmd_issue; uint32_t free_slots; int slot; @@ -1574,6 +1840,14 @@ "command", NULL); return (AHCI_FAILURE); } + if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "ahci_claim_free_slot: there is still pending " + "read/write port-mult command(s) in command list, " + "so no available slot for the non-queued command", + NULL); + return (AHCI_FAILURE); + } if ((ahci_ctlp->ahcictl_cap & AHCI_CAP_NO_MCMDLIST_NONQUEUE) && NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) { AHCIDBG(AHCIDBG_INFO, ahci_ctlp, @@ -1594,6 +1868,31 @@ NULL); return (AHCI_FAILURE); } + + /* + * NCQ commands cannot be sent to different port multiplier + * ports in Command-Based Switching mode + */ + /* + * NOTE: In Command-Based Switching mode, AHCI controller + * usually reports a 'Handshake Error' when multiple NCQ + * commands are outstanding simultaneously. + */ + if (AHCIPORT_DEV_TYPE(ahci_portp, addrp) == SATA_DTYPE_PMULT) { + ASSERT(ahci_portp->ahciport_pmult_info != NULL); + if (!(ahci_ctlp->ahcictl_cap & AHCI_CAP_PMULT_FBSS) && + NCQ_CMD_IN_PROGRESS(ahci_portp) && + AHCIPORT_NCQ_PMPORT(ahci_portp) != + addrp->aa_pmport) { + AHCIDBG(AHCIDBG_INFO, ahci_ctlp, + "ahci_claim_free_slot: there is still " + "pending queued command(s) in the " + "command list for another Port Multiplier " + "port, so no available slot.", NULL); + return (AHCI_FAILURE); + } + } + free_slots = (~ahci_portp->ahciport_pending_ncq_tags) & AHCI_NCQ_SLOT_MASK(ahci_portp); } else if (command_type == AHCI_ERR_RETRI_CMD) { @@ -1603,6 +1902,30 @@ "SENSE or READ LOG EXT command", NULL); slot = 0; goto out; + } else if (command_type == AHCI_RDWR_PMULT_CMD) { + /* + * An extra check on PxCI. Sometimes PxCI bits may not be + * cleared during hot-plug or error recovery process. + */ + port_cmd_issue = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxCI(ahci_ctlp, addrp->aa_port)); + + if (port_cmd_issue != 0) { + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "ahci_claim_free_slot: there is still pending " + "command(s) in command list (0x%x/0x%x, PxCI %x)," + "so no available slot for R/W PMULT command.", + NON_NCQ_CMD_IN_PROGRESS(ahci_portp), + NCQ_CMD_IN_PROGRESS(ahci_portp), + port_cmd_issue); + return (AHCI_FAILURE); + } + + AHCIDBG(AHCIDBG_INFO, ahci_ctlp, + "ahci_claim_free_slot: slot 0 is allocated for " + "READ/WRITE PORTMULT command", NULL); + slot = 0; + goto out; } slot = ddi_ffs(free_slots) - 1; @@ -1621,6 +1944,10 @@ */ if (command_type == AHCI_NCQ_CMD) { ahci_portp->ahciport_pending_ncq_tags |= (0x1 << slot); + if (AHCI_ADDR_IS_PMPORT(addrp)) { + ASSERT(ahci_portp->ahciport_pmult_info != NULL); + AHCIPORT_NCQ_PMPORT(ahci_portp) = addrp->aa_pmport; + } } ahci_portp->ahciport_pending_tags |= (0x1 << slot); @@ -1643,7 +1970,7 @@ */ static int ahci_deliver_satapkt(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, - uint8_t port, sata_pkt_t *spkt) + ahci_addr_t *addrp, sata_pkt_t *spkt) { int cmd_slot; sata_cmd_t *scmd; @@ -1655,11 +1982,15 @@ int command_type = AHCI_NON_NCQ_CMD; int ncq_qdepth; int instance = ddi_get_instance(ahci_ctlp->ahcictl_dip); + uint8_t port, pmport; #if AHCI_DEBUG uint32_t *ptr; uint8_t *ptr2; #endif + port = addrp->aa_port; + pmport = addrp->aa_pmport; + spkt->satapkt_reason = SATA_PKT_BUSY; scmd = &spkt->satapkt_cmd; @@ -1708,8 +2039,13 @@ if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) command_type = AHCI_ERR_RETRI_CMD; + /* Check if the command is an read/write pmult command */ + if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) + command_type = AHCI_RDWR_PMULT_CMD; + /* Check if there is an empty command slot */ - cmd_slot = ahci_claim_free_slot(ahci_ctlp, ahci_portp, command_type); + cmd_slot = ahci_claim_free_slot(ahci_ctlp, ahci_portp, + addrp, command_type); if (cmd_slot == AHCI_FAILURE) { AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "no free command slot", NULL); return (AHCI_FAILURE); @@ -1728,11 +2064,13 @@ &(cmd_table->ahcict_command_fis.ahcifc_fis.ahcifc_h2d_register); SET_FIS_TYPE(h2d_register_fisp, AHCI_H2D_REGISTER_FIS_TYPE); - if ((spkt->satapkt_device.satadev_addr.qual == SATA_ADDR_PMPORT) || - (spkt->satapkt_device.satadev_addr.qual == SATA_ADDR_DPMPORT)) { - SET_FIS_PMP(h2d_register_fisp, - spkt->satapkt_device.satadev_addr.pmport); - } + + /* + * PMP field only make sense when target is a port multiplier or a + * device behind a port multiplier. Otherwise should set it to 0. + */ + if (AHCI_ADDR_IS_PMULT(addrp) || AHCI_ADDR_IS_PMPORT(addrp)) + SET_FIS_PMP(h2d_register_fisp, pmport); SET_FIS_CDMDEVCTL(h2d_register_fisp, 1); SET_FIS_COMMAND(h2d_register_fisp, scmd->satacmd_cmd_reg); @@ -1753,6 +2091,8 @@ * SATAC_SMART * SATAC_ID_PACKET_DEVICE * SATAC_ID_DEVICE + * SATAC_READ_PORTMULT + * SATAC_WRITE_PORTMULT */ /* FALLTHRU */ @@ -1864,6 +2204,13 @@ /* Set the length of the command in the CFIS area */ SET_COMMAND_FIS_LENGTH(cmd_header, AHCI_H2D_REGISTER_FIS_LENGTH); + /* + * PMP field only make sense when target is a port multiplier or a + * device behind a port multiplier. Otherwise should set it to 0. + */ + if (AHCI_ADDR_IS_PMULT(addrp) || AHCI_ADDR_IS_PMPORT(addrp)) + SET_PORT_MULTI_PORT(cmd_header, pmport); + AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "command data direction is " "sata_data_direction = 0x%x", scmd->satacmd_flags.sata_data_direction); @@ -1886,8 +2233,13 @@ if (command_type != AHCI_NCQ_CMD) SET_PREFETCHABLE(cmd_header, AHCI_CMDHEAD_PREFETCHABLE); - /* Now remember the sata packet in ahciport_slot_pkts[]. */ - if (!ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) + /* + * Now remember the sata packet in ahciport_slot_pkts[]. + * Error retrieval command and r/w port multiplier command will + * be stored specifically for each port. + */ + if (!ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) && + !RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) ahci_portp->ahciport_slot_pkts[cmd_slot] = spkt; /* @@ -2008,6 +2360,19 @@ return (SATA_SUCCESS); } + /* + * If AHCI_PORT_FLAG_RDWR_PMULT flag is set, it means a R/W PMULT + * command is being executed so no other commands is outstanding, + * nothing to do. + */ + if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_RDWR_PMULT) { + AHCIDBG(AHCIDBG_INFO, ahci_ctlp, + "ahci_tran_abort: port %d is reading/writing " + "port multiplier, so just return directly ", port); + mutex_exit(&ahci_portp->ahciport_mutex); + return (SATA_SUCCESS); + } + if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED | ahci_portp->ahciport_port_state & SATA_PSTATE_SHUTDOWN | ahci_portp->ahciport_port_state & SATA_PSTATE_PWROFF) { @@ -2140,15 +2505,17 @@ * Used to do device reset and reject all the pending packets on a device * during the reset operation. * + * NOTE: ONLY called by ahci_tran_reset_dport * WARNING!!! ahciport_mutex should be acquired before the function is called. */ static int ahci_reset_device_reject_pkts(ahci_ctl_t *ahci_ctlp, - ahci_port_t *ahci_portp, uint8_t port) + ahci_port_t *ahci_portp, ahci_addr_t *addrp) { uint32_t slot_status = 0; uint32_t reset_tags = 0; uint32_t finished_tags = 0; + uint8_t port = addrp->aa_port; sata_device_t sdevice; int ret; @@ -2176,12 +2543,12 @@ reset_tags = slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp); } - if (ahci_software_reset(ahci_ctlp, ahci_portp, port) + if (ahci_software_reset(ahci_ctlp, ahci_portp, addrp) != AHCI_SUCCESS) { AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "Try to do a port reset after software " "reset failed", port); - ret = ahci_port_reset(ahci_ctlp, ahci_portp, port); + ret = ahci_port_reset(ahci_ctlp, ahci_portp, addrp); if (ret != AHCI_SUCCESS) { AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_reset_device_reject_pkts: port %d " @@ -2200,7 +2567,6 @@ sdevice.satadev_addr.cport = ahci_ctlp->ahcictl_port_to_cport[port]; sdevice.satadev_addr.pmport = 0; sdevice.satadev_addr.qual = SATA_ADDR_DCPORT; - sdevice.satadev_state = SATA_DSTATE_RESET | SATA_DSTATE_PWR_ACTIVE; mutex_exit(&ahci_portp->ahciport_mutex); @@ -2211,7 +2577,7 @@ mutex_enter(&ahci_portp->ahciport_mutex); AHCIDBG(AHCIDBG_EVENT, ahci_ctlp, - "port %d sending event up: SATA_EVNT_RESET", port); + "port %d sending event up: SATA_EVNT_DEVICE_RESET", port); /* Next try to mop the pending commands */ if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) @@ -2235,6 +2601,109 @@ } /* + * Used to do device reset and reject all the pending packets on a device + * during the reset operation. + * + * NOTE: ONLY called by ahci_tran_reset_dport + * WARNING!!! ahciport_mutex should be acquired before the function is called. + */ +static int +ahci_reset_pmdevice_reject_pkts(ahci_ctl_t *ahci_ctlp, + ahci_port_t *ahci_portp, ahci_addr_t *addrp) +{ + uint32_t finished_tags = 0, reset_tags = 0, slot_status = 0; + uint8_t port = addrp->aa_port; + uint8_t pmport = addrp->aa_pmport; + sata_device_t sdevice; + + AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_PMULT, ahci_ctlp, + "ahci_reset_pmdevice_reject_pkts at port %d:%d", port, pmport); + + if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_MOPPING) { + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_reset_pmdevice_reject_pkts: port %d is in " + "mopping process, so return directly ", port); + return (SATA_SUCCESS); + } + + /* Checking for outstanding commands */ + if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) { + slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxCI(ahci_ctlp, port)); + reset_tags = slot_status & AHCI_SLOT_MASK(ahci_ctlp); + } else if (NCQ_CMD_IN_PROGRESS(ahci_portp)) { + slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxSACT(ahci_ctlp, port)); + reset_tags = slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp); + } + + /* Issue SOFTWARE reset command. */ + if (ahci_software_reset(ahci_ctlp, ahci_portp, addrp) + != AHCI_SUCCESS) { + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "Try to do a port reset after software " + "reset failed", port); + return (SATA_FAILURE); + } + + /* Set the reset in progress flag */ + ahci_portp->ahciport_reset_in_progress = 1; + + ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_MOPPING; + ahci_portp->ahciport_mop_in_progress++; + + /* Indicate to the framework that a reset has happened */ + bzero((void *)&sdevice, sizeof (sata_device_t)); + sdevice.satadev_addr.cport = ahci_ctlp->ahcictl_port_to_cport[port]; + sdevice.satadev_addr.pmport = pmport; + if (AHCI_ADDR_IS_PMULT(addrp)) + sdevice.satadev_addr.qual = SATA_ADDR_PMULT; + else + sdevice.satadev_addr.qual = SATA_ADDR_DPMPORT; + sdevice.satadev_state = SATA_DSTATE_RESET | + SATA_DSTATE_PWR_ACTIVE; + mutex_exit(&ahci_portp->ahciport_mutex); + sata_hba_event_notify( + ahci_ctlp->ahcictl_sata_hba_tran->sata_tran_hba_dip, + &sdevice, + SATA_EVNT_DEVICE_RESET); + mutex_enter(&ahci_portp->ahciport_mutex); + + AHCIDBG(AHCIDBG_EVENT, ahci_ctlp, + "port %d:%d sending event up: SATA_EVNT_DEVICE_RESET", + port, pmport); + + /* Next try to mop the pending commands */ + if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) + finished_tags = ahci_portp->ahciport_pending_tags & + ~slot_status & AHCI_SLOT_MASK(ahci_ctlp); + else if (NCQ_CMD_IN_PROGRESS(ahci_portp)) + finished_tags = ahci_portp->ahciport_pending_ncq_tags & + ~slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp); + reset_tags &= ~finished_tags; + + AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp, + "reset_tags = %x, finished_tags = %x, slot_status = %x", + reset_tags, finished_tags, slot_status); + + /* + * NOTE: Because PxCI be only erased by unset PxCMD.ST bit, so even we + * try to reset a single device behind a port multiplier will + * terminate all the commands on that HBA port. We need mop these + * commands as well. + */ + ahci_mop_commands(ahci_ctlp, + ahci_portp, + slot_status, + 0, /* failed tags */ + 0, /* timeout tags */ + 0, /* aborted tags */ + reset_tags); /* reset tags */ + + return (SATA_SUCCESS); +} + +/* * Used to do port reset and reject all the pending packets on a port during * the reset operation. * @@ -2242,14 +2711,15 @@ */ static int ahci_reset_port_reject_pkts(ahci_ctl_t *ahci_ctlp, - ahci_port_t *ahci_portp, uint8_t port) + ahci_port_t *ahci_portp, ahci_addr_t *addrp) { uint32_t slot_status = 0; uint32_t reset_tags = 0; uint32_t finished_tags = 0; + uint8_t port = addrp->aa_port; AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, - "ahci_reset_port_reject_pkts on port: %d", port); + "ahci_reset_port_reject_pkts at port: %d", port); /* * If AHCI_PORT_FLAG_MOPPING flag is set, it means all the pending @@ -2277,8 +2747,15 @@ if (ahci_restart_port_wait_till_ready(ahci_ctlp, ahci_portp, port, AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP, - NULL) != AHCI_SUCCESS) + NULL) != AHCI_SUCCESS) { + + /* Clear mop flag */ + ahci_portp->ahciport_mop_in_progress--; + if (ahci_portp->ahciport_mop_in_progress == 0) + ahci_portp->ahciport_flags &= + ~AHCI_PORT_FLAG_MOPPING; return (SATA_FAILURE); + } if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) finished_tags = ahci_portp->ahciport_pending_tags & @@ -2311,12 +2788,16 @@ uint32_t slot_status[AHCI_MAX_PORTS]; uint32_t reset_tags[AHCI_MAX_PORTS]; uint32_t finished_tags[AHCI_MAX_PORTS]; - uint8_t port; + int port; int ret = SATA_SUCCESS; AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, "ahci_reset_hba_reject_pkts enter", NULL); + bzero(slot_status, sizeof (slot_status)); + bzero(reset_tags, sizeof (reset_tags)); + bzero(finished_tags, sizeof (finished_tags)); + for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) { if (!AHCI_PORT_IMPLEMENTED(ahci_ctlp, port)) { continue; @@ -2325,25 +2806,33 @@ ahci_portp = ahci_ctlp->ahcictl_ports[port]; mutex_enter(&ahci_portp->ahciport_mutex); + ahci_portp->ahciport_reset_in_progress = 1; if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) { slot_status[port] = ddi_get32( ahci_ctlp->ahcictl_ahci_acc_handle, (uint32_t *)AHCI_PORT_PxCI(ahci_ctlp, port)); reset_tags[port] = slot_status[port] & AHCI_SLOT_MASK(ahci_ctlp); + AHCIDBG(AHCIDBG_INIT, ahci_ctlp, + "port %d: reset_tags = 0x%x pending_tags = 0x%x", + port, reset_tags[port], + ahci_portp->ahciport_pending_tags); } else if (NCQ_CMD_IN_PROGRESS(ahci_portp)) { slot_status[port] = ddi_get32( ahci_ctlp->ahcictl_ahci_acc_handle, (uint32_t *)AHCI_PORT_PxSACT(ahci_ctlp, port)); reset_tags[port] = slot_status[port] & AHCI_NCQ_SLOT_MASK(ahci_portp); + AHCIDBG(AHCIDBG_INIT, ahci_ctlp, + "port %d: reset_tags = 0x%x pending_tags = 0x%x", + port, reset_tags[port], + ahci_portp->ahciport_pending_tags); } mutex_exit(&ahci_portp->ahciport_mutex); } if (ahci_hba_reset(ahci_ctlp) != AHCI_SUCCESS) { ret = SATA_FAILURE; - goto out; } for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) { @@ -2401,37 +2890,67 @@ { ahci_ctl_t *ahci_ctlp; ahci_port_t *ahci_portp; + ahci_addr_t addr; uint8_t cport = sd->satadev_addr.cport; + uint8_t pmport = sd->satadev_addr.pmport; uint8_t port; int ret = SATA_SUCCESS; int instance = ddi_get_instance(dip); ahci_ctlp = ddi_get_soft_state(ahci_statep, instance); port = ahci_ctlp->ahcictl_cport_to_port[cport]; + ahci_portp = ahci_ctlp->ahcictl_ports[port]; + + ahci_get_ahci_addr(ahci_ctlp, sd, &addr); AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, - "ahci_tran_reset_port enter: cport: %d", cport); + "ahci_tran_reset_dport enter: cport %d", cport); switch (sd->satadev_addr.qual) { + case SATA_ADDR_PMPORT: + /* + * If we want to issue a COMRESET on a pmport, we need to + * reject the outstanding commands on that pmport. According + * to AHCI spec, PxCI register could only be cleared by + * clearing PxCMD.ST, which will halt the controller port - as + * well as other pmports. + * + * Therefore we directly reset the controller port for + * simplicity. ahci_tran_probe_port() will handle reset stuff + * like initializing the given pmport. + */ + /* FALLTHRU */ case SATA_ADDR_CPORT: /* Port reset */ ahci_portp = ahci_ctlp->ahcictl_ports[port]; - cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport port %d " - "reset port", instance, port); + cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport " + "port %d reset port", instance, port); mutex_enter(&ahci_portp->ahciport_mutex); - ret = ahci_reset_port_reject_pkts(ahci_ctlp, ahci_portp, port); + ret = ahci_reset_port_reject_pkts(ahci_ctlp, ahci_portp, &addr); mutex_exit(&ahci_portp->ahciport_mutex); break; + case SATA_ADDR_DPMPORT: + cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport " + "port %d:%d reset device", instance, port, pmport); + /* FALLTHRU */ case SATA_ADDR_DCPORT: /* Device reset */ - ahci_portp = ahci_ctlp->ahcictl_ports[port]; - cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport port %d " - "reset device", instance, port); + if (sd->satadev_addr.qual == SATA_ADDR_DCPORT) + cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport " + "port %d reset device", instance, port); mutex_enter(&ahci_portp->ahciport_mutex); + /* + * software reset request must be sent to SATA_PMULT_HOSTPORT + * if target is a port multiplier: + */ + if (sd->satadev_addr.qual == SATA_ADDR_DCPORT && + ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) + AHCI_ADDR_SET_PMULT(&addr, port); + if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED | ahci_portp->ahciport_port_state & SATA_PSTATE_SHUTDOWN | ahci_portp->ahciport_port_state & SATA_PSTATE_PWROFF) { @@ -2451,13 +2970,15 @@ break; } - if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) { + if (AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr) == + SATA_DTYPE_NONE) { /* * ahci_intr_phyrdy_change() may have rendered it to * AHCI_PORT_TYPE_NODEV. */ sd->satadev_type = SATA_DTYPE_NONE; - sd->satadev_state = ahci_portp->ahciport_port_state; + sd->satadev_state = AHCIPORT_GET_STATE(ahci_portp, + &addr); ahci_update_sata_registers(ahci_ctlp, port, sd); AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_tran_reset_dport returning SATA_FAILURE " @@ -2467,24 +2988,24 @@ break; } - ret = ahci_reset_device_reject_pkts(ahci_ctlp, - ahci_portp, port); + if (AHCI_ADDR_IS_PORT(&addr)) { + ret = ahci_reset_device_reject_pkts(ahci_ctlp, + ahci_portp, &addr); + } else { + ret = ahci_reset_pmdevice_reject_pkts(ahci_ctlp, + ahci_portp, &addr); + } + mutex_exit(&ahci_portp->ahciport_mutex); break; case SATA_ADDR_CNTRL: /* Reset the whole controller */ - cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport port %d " - "reset the whole hba", instance, port); + cmn_err(CE_NOTE, "!ahci%d: ahci_tran_reset_dport " + "reset the whole hba", instance); ret = ahci_reset_hba_reject_pkts(ahci_ctlp); break; - case SATA_ADDR_PMPORT: - case SATA_ADDR_DPMPORT: - AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_tran_reset_dport: port multiplier will be " - "supported later", NULL); - /* FALLTHRU */ default: ret = SATA_FAILURE; } @@ -2495,14 +3016,16 @@ /* * Called by sata framework to activate a port as part of hotplug. * (cfgadm -c connect satax/y) - * Note: Not port-mult aware. + * Support port multiplier. */ static int ahci_tran_hotplug_port_activate(dev_info_t *dip, sata_device_t *satadev) { ahci_ctl_t *ahci_ctlp; ahci_port_t *ahci_portp; + ahci_addr_t addr; uint8_t cport = satadev->satadev_addr.cport; + uint8_t pmport = satadev->satadev_addr.pmport; uint8_t port; int instance = ddi_get_instance(dip); @@ -2510,32 +3033,48 @@ port = ahci_ctlp->ahcictl_cport_to_port[cport]; AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, - "ahci_tran_hotplug_port_activate cport %d enter", cport); + "ahci_tran_hotplug_port_activate enter: cport %d", cport); ahci_portp = ahci_ctlp->ahcictl_ports[port]; mutex_enter(&ahci_portp->ahciport_mutex); - ahci_enable_port_intrs(ahci_ctlp, port); - cmn_err(CE_NOTE, "!ahci%d: ahci port %d is activated", instance, port); - - /* - * Reset the port so that the PHY communication would be re-established. - * But this reset is an internal operation and the sata module doesn't - * need to know about it. Moreover, the port with a device attached will - * be started too. - */ - (void) ahci_restart_port_wait_till_ready(ahci_ctlp, - ahci_portp, port, - AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP, - NULL); - - /* - * Need to check the link status and device status of the port - * and consider raising power if the port was in D3 state - */ - ahci_portp->ahciport_port_state |= SATA_PSTATE_PWRON; - ahci_portp->ahciport_port_state &= ~SATA_PSTATE_PWROFF; - ahci_portp->ahciport_port_state &= ~SATA_PSTATE_SHUTDOWN; + ahci_get_ahci_addr(ahci_ctlp, satadev, &addr); + ASSERT(AHCI_ADDR_IS_PORT(&addr) || AHCI_ADDR_IS_PMPORT(&addr)); + + if (AHCI_ADDR_IS_PORT(&addr)) { + cmn_err(CE_NOTE, "!ahci%d: ahci port %d is activated", + instance, port); + + /* Enable the interrupts on the port */ + ahci_enable_port_intrs(ahci_ctlp, port); + + /* + * Reset the port so that the PHY communication would be + * re-established. But this reset is an internal operation + * and the sata module doesn't need to know about it. + * Moreover, the port with a device attached will be started + * too. + */ + (void) ahci_restart_port_wait_till_ready(ahci_ctlp, + ahci_portp, port, + AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP, + NULL); + + /* + * Need to check the link status and device status of the port + * and consider raising power if the port was in D3 state + */ + ahci_portp->ahciport_port_state |= SATA_PSTATE_PWRON; + ahci_portp->ahciport_port_state &= ~SATA_PSTATE_PWROFF; + ahci_portp->ahciport_port_state &= ~SATA_PSTATE_SHUTDOWN; + } else if (AHCI_ADDR_IS_PMPORT(&addr)) { + cmn_err(CE_NOTE, "!ahci%d: ahci port %d:%d is activated", + instance, port, pmport); + /* AHCI_ADDR_PMPORT */ + AHCIPORT_PMSTATE(ahci_portp, &addr) |= SATA_PSTATE_PWRON; + AHCIPORT_PMSTATE(ahci_portp, &addr) &= + ~(SATA_PSTATE_PWROFF|SATA_PSTATE_SHUTDOWN); + } satadev->satadev_state = ahci_portp->ahciport_port_state; @@ -2548,14 +3087,16 @@ /* * Called by sata framework to deactivate a port as part of hotplug. * (cfgadm -c disconnect satax/y) - * Note: Not port-mult aware. + * Support port multiplier. */ static int ahci_tran_hotplug_port_deactivate(dev_info_t *dip, sata_device_t *satadev) { ahci_ctl_t *ahci_ctlp; ahci_port_t *ahci_portp; - uint8_t cport = satadev->satadev_addr.cport; + ahci_addr_t addr; + uint8_t cport = satadev->satadev_addr.cport; + uint8_t pmport = satadev->satadev_addr.pmport; uint8_t port; uint32_t port_scontrol; int instance = ddi_get_instance(dip); @@ -2564,38 +3105,52 @@ port = ahci_ctlp->ahcictl_cport_to_port[cport]; AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, - "ahci_tran_hotplug_port_deactivate cport %d enter", cport); + "ahci_tran_hotplug_port_deactivate enter: cport %d", cport); ahci_portp = ahci_ctlp->ahcictl_ports[port]; - mutex_enter(&ahci_portp->ahciport_mutex); - cmn_err(CE_NOTE, "!ahci%d: ahci port %d is deactivated", - instance, port); - - /* Disable the interrupts on the port */ - ahci_disable_port_intrs(ahci_ctlp, port); - - if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) { - goto phy_offline; - } - - /* First to abort all the pending commands */ - ahci_reject_all_abort_pkts(ahci_ctlp, ahci_portp, port); - - /* Then stop the port */ - (void) ahci_put_port_into_notrunning_state(ahci_ctlp, - ahci_portp, port); - - /* Next put the PHY offline */ - -phy_offline: - port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, - (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port)); - SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_DISABLE); - - /* Update ahciport_port_state */ - ahci_portp->ahciport_port_state = SATA_PSTATE_SHUTDOWN; - satadev->satadev_state = ahci_portp->ahciport_port_state; + ahci_get_ahci_addr(ahci_ctlp, satadev, &addr); + ASSERT(AHCI_ADDR_IS_PORT(&addr) || AHCI_ADDR_IS_PMPORT(&addr)); + + if (AHCI_ADDR_IS_PORT(&addr)) { + cmn_err(CE_NOTE, "!ahci%d: ahci port %d is deactivated", + instance, port); + + /* Disable the interrupts on the port */ + ahci_disable_port_intrs(ahci_ctlp, port); + + if (ahci_portp->ahciport_device_type != SATA_DTYPE_NONE) { + + /* First to abort all the pending commands */ + ahci_reject_all_abort_pkts(ahci_ctlp, ahci_portp, port); + + /* Then stop the port */ + (void) ahci_put_port_into_notrunning_state(ahci_ctlp, + ahci_portp, port); + } + + /* Next put the PHY offline */ + port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxSCTL(ahci_ctlp, port)); + SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_DISABLE); + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, (uint32_t *) + AHCI_PORT_PxSCTL(ahci_ctlp, port), port_scontrol); + } else if (AHCI_ADDR_IS_PMPORT(&addr)) { + cmn_err(CE_NOTE, "!ahci%d: ahci port %d:%d is deactivated", + instance, port, pmport); + + ahci_disable_port_intrs(ahci_ctlp, port); + if (AHCIPORT_GET_DEV_TYPE(ahci_portp, &addr) + != SATA_DTYPE_NONE) + ahci_reject_all_abort_pkts(ahci_ctlp, ahci_portp, port); + + /* Re-enable the interrupts for the other pmports */ + ahci_enable_port_intrs(ahci_ctlp, port); + } + + /* Update port state */ + AHCIPORT_SET_STATE(ahci_portp, &addr, SATA_PSTATE_SHUTDOWN); + satadev->satadev_state = SATA_PSTATE_SHUTDOWN; ahci_update_sata_registers(ahci_ctlp, port, satadev); @@ -2617,7 +3172,14 @@ uint32_t abort_tags = 0; AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR, ahci_ctlp, - "ahci_reject_all_abort_pkts on port: %d", port); + "ahci_reject_all_abort_pkts at port: %d", port); + + /* Read/write port multiplier command takes highest priority */ + if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { + slot_status = 0x1; + abort_tags = 0x1; + goto out; + } /* * When AHCI_PORT_FLAG_MOPPING is set, we need to check whether a @@ -2772,6 +3334,7 @@ ahci_initialize_controller(ahci_ctl_t *ahci_ctlp) { ahci_port_t *ahci_portp; + ahci_addr_t addr; uint32_t ghc_control; int port; @@ -2807,7 +3370,9 @@ * Ensure that the controller is not in the running state * by checking every implemented port's PxCMD register */ - if (ahci_initialize_port(ahci_ctlp, ahci_portp, port) + AHCI_ADDR_SET_PORT(&addr, (uint8_t)port); + + if (ahci_initialize_port(ahci_ctlp, ahci_portp, &addr) != AHCI_SUCCESS) { AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_initialize_controller: failed to " @@ -2874,25 +3439,116 @@ } /* - * The routine is to initialize the port. First put the port in NOTRunning + * ahci_alloc_pmult() + * 1. Setting HBA port registers which are necessary for a port multiplier. + * (Set PxCMD.PMA while PxCMD.ST is '0') + * 2. Allocate ahci_pmult_info structure. + * + * NOTE: Must stop port before the function is called. + * WARNING!!! ahciport_mutex should be acquired before the function is + * called. + */ +static void +ahci_alloc_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp) +{ + uint32_t port_cmd_status; + uint8_t port = ahci_portp->ahciport_port_num; + + port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port)); + + /* The port must have been stopped before. */ + ASSERT(!(port_cmd_status & AHCI_CMD_STATUS_ST)); + + if (!(port_cmd_status & AHCI_CMD_STATUS_PMA)) { + /* set PMA bit */ + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port), + port_cmd_status|AHCI_CMD_STATUS_PMA); + + AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp, + "ahci_alloc_pmult: " + "PxCMD.PMA bit set at port %d.", port); + } + + /* Allocate port multiplier information structure */ + if (ahci_portp->ahciport_pmult_info == NULL) { + ahci_portp->ahciport_pmult_info = (ahci_pmult_info_t *) + kmem_zalloc(sizeof (ahci_pmult_info_t), KM_SLEEP); + } + + ASSERT(ahci_portp->ahciport_pmult_info != NULL); +} + +/* + * ahci_dealloc_pmult() + * 1. Clearing related registers when a port multiplier is detached. + * (Clear PxCMD.PMA while PxCMD.ST is '0') + * 2. Deallocate ahci_pmult_info structure. + * + * NOTE: Must stop port before the function is called. + * WARNING!!! ahciport_mutex should be acquired before the function is + * called. + */ +static void +ahci_dealloc_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp) +{ + uint32_t port_cmd_status; + uint8_t port = ahci_portp->ahciport_port_num; + + port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port)); + + if (port_cmd_status & AHCI_CMD_STATUS_PMA) { + /* Clear PMA bit */ + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port), + (port_cmd_status & (~AHCI_CMD_STATUS_PMA))); + + AHCIDBG(AHCIDBG_INIT|AHCIDBG_PMULT, ahci_ctlp, + "ahci_dealloc_pmult: " + "PxCMD.PMA bit cleared at port %d.", port); + } + + /* Release port multiplier information structure */ + if (ahci_portp->ahciport_pmult_info != NULL) { + kmem_free(ahci_portp->ahciport_pmult_info, + sizeof (ahci_pmult_info_t)); + ahci_portp->ahciport_pmult_info = NULL; + } +} + +/* + * The routine is to initialize a port. First put the port in NOTRunning * state, then enable port interrupt and clear Serror register. And under * AHCI_ATTACH case, find device signature and then try to start the port. * + * Called by + * 1. ahci_initialize_controller + * 2. ahci_intr_phyrdy_change (hotplug) + * * WARNING!!! ahciport_mutex should be acquired before the function is called. */ static int -ahci_initialize_port(ahci_ctl_t *ahci_ctlp, - ahci_port_t *ahci_portp, uint8_t port) +ahci_initialize_port(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp) { uint32_t port_sstatus, port_task_file, port_cmd_status; + uint8_t port = addrp->aa_port; + boolean_t pm_mode = B_TRUE; /* Power Management mode */ int ret; + ASSERT(mutex_owned(&ahci_portp->ahciport_mutex)); + + /* AHCI_ADDR_PORT: We've no idea of the attached device here. */ + ASSERT(AHCI_ADDR_IS_PORT(addrp)); + port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port)); AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp, - "ahci_initialize_port: port %d " - "port_cmd_status = 0x%x", port, port_cmd_status); + "ahci_initialize_port: port %d ", port); + /* * Check whether the port is in NotRunning state, if not, * put the port in NotRunning state @@ -2906,8 +3562,11 @@ ahci_portp, port); } + /* Disable interrupt */ + ahci_disable_port_intrs(ahci_ctlp, port); + /* Device is unknown at first */ - ahci_portp->ahciport_device_type = SATA_DTYPE_UNKNOWN; + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_UNKNOWN); /* Disable the interface power management */ ahci_disable_interface_pm(ahci_ctlp, port); @@ -2928,8 +3587,8 @@ /* Check whether port reset must be executed */ ahci_ctlp->ahcictl_cap & AHCI_CAP_INIT_PORT_RESET) { - /* Incorrect task file state, we need to reset port */ - ret = ahci_port_reset(ahci_ctlp, ahci_portp, port); + /* Something went wrong, we need do some reset things */ + ret = ahci_port_reset(ahci_ctlp, ahci_portp, addrp); /* Does port reset succeed on HBA port? */ if (ret != AHCI_SUCCESS) { @@ -2940,16 +3599,15 @@ } /* Is port failed? */ - if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED) { + if (AHCIPORT_GET_STATE(ahci_portp, addrp) & + SATA_PSTATE_FAILED) { AHCIDBG(AHCIDBG_INIT|AHCIDBG_ERRS, ahci_ctlp, "ahci_initialize_port: port %d state 0x%x", port, ahci_portp->ahciport_port_state); return (AHCI_FAILURE); } } - - ahci_portp->ahciport_port_state = SATA_STATE_READY; - + AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_STATE_READY); AHCIDBG(AHCIDBG_INIT, ahci_ctlp, "port %d is ready now.", port); /* @@ -2958,13 +3616,23 @@ * changed during power state changes, but at the time being, we * don't support the situation. */ - if (ahci_ctlp->ahcictl_flags & AHCI_ATTACH) { + mutex_exit(&ahci_portp->ahciport_mutex); + mutex_enter(&ahci_ctlp->ahcictl_mutex); + if (ahci_ctlp->ahcictl_flags & AHCI_ATTACH) + pm_mode = B_FALSE; + mutex_exit(&ahci_ctlp->ahcictl_mutex); + mutex_enter(&ahci_portp->ahciport_mutex); + + if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_HOTPLUG) + pm_mode = B_FALSE; + + if (!pm_mode) { /* * Try to get the device signature if the port is * not empty. */ if (ahci_portp->ahciport_device_type != SATA_DTYPE_NONE) - ahci_find_dev_signature(ahci_ctlp, ahci_portp, port); + ahci_find_dev_signature(ahci_ctlp, ahci_portp, addrp); } else { /* @@ -2975,7 +3643,7 @@ AHCIDBG(AHCIDBG_PM, ahci_ctlp, "ahci_initialize_port: port %d " "reset the port during resume", port); - (void) ahci_port_reset(ahci_ctlp, ahci_portp, port); + (void) ahci_port_reset(ahci_ctlp, ahci_portp, addrp); AHCIDBG(AHCIDBG_PM, ahci_ctlp, "ahci_initialize_port: port %d " @@ -3000,6 +3668,13 @@ goto out; } + /* If this is a port multiplier, we need do some initialization */ + if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) { + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "Port multiplier found at port %d", port); + ahci_alloc_pmult(ahci_ctlp, ahci_portp); + } + /* Try to start the port */ if (ahci_start_port(ahci_ctlp, ahci_portp, port) != AHCI_SUCCESS) { @@ -3217,6 +3892,639 @@ } /* + * Read/Write a register at port multiplier by SATA READ PORTMULT / SATA WRITE + * PORTMULT command. SYNC & POLLING mode is used. + * + * WARNING!!! ahciport_mutex should be acquired before the function + * is called. + */ +static int +ahci_rdwr_pmult(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp, + uint8_t regn, uint32_t *pregv, uint8_t type) +{ + ahci_port_t *ahci_portp; + ahci_addr_t pmult_addr; + sata_pkt_t *spkt; + sata_cmd_t *scmd; + sata_device_t sata_device; + uint8_t port = addrp->aa_port; + uint8_t pmport = addrp->aa_pmport; + uint8_t cport; + uint32_t intr_mask; + int rval; + char portstr[10]; + + SET_PORTSTR(portstr, addrp); + cport = ahci_ctlp->ahcictl_port_to_cport[port]; + ahci_portp = ahci_ctlp->ahcictl_ports[port]; + + ASSERT(AHCI_ADDR_IS_PMPORT(addrp) || AHCI_ADDR_IS_PMULT(addrp)); + ASSERT(mutex_owned(&ahci_portp->ahciport_mutex)); + + /* Check the existence of the port multiplier */ + if (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) + return (AHCI_FAILURE); + + /* Request a READ/WRITE PORTMULT sata packet. */ + bzero(&sata_device, sizeof (sata_device_t)); + sata_device.satadev_addr.cport = cport; + sata_device.satadev_addr.pmport = pmport; + sata_device.satadev_addr.qual = SATA_ADDR_PMULT; + sata_device.satadev_rev = SATA_DEVICE_REV; + + /* + * Make sure no command is outstanding here. All R/W PMULT requests + * come from + * + * 1. ahci_attach() + * The port should be empty. + * + * 2. ahci_tran_probe_port() + * Any request from SATA framework (via ahci_tran_start) should be + * rejected if R/W PMULT command is outstanding. + * + * If we are doing mopping, do not check those flags because no + * command will be actually outstanding. + * + * If the port has been occupied by any other commands, the probe + * function will return a SATA_RETRY. SATA framework will retry + * later. + */ + if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp, + "R/W PMULT failed: R/W PMULT in progress at port %d.", + port, ahci_portp->ahciport_flags); + return (AHCI_FAILURE); + } + + if (!(ahci_portp->ahciport_flags & AHCI_PORT_FLAG_MOPPING) && ( + ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) || + NCQ_CMD_IN_PROGRESS(ahci_portp) || + NON_NCQ_CMD_IN_PROGRESS(ahci_portp))) { + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp, + "R/W PMULT failed: port %d is occupied (flags 0x%x).", + port, ahci_portp->ahciport_flags); + return (AHCI_FAILURE); + } + + /* + * The port multiplier is gone. This may happen when + * 1. Cutting off the power of an enclosure. The device lose the power + * before port multiplier. + * 2. Disconnecting the port multiplier during hot-plugging a sub-drive. + * + * The issued command should be aborted and the following command + * should not be continued. + */ + if (!(ahci_portp->ahciport_port_state & SATA_STATE_READY)) { + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp, + "READ/WRITE PMULT failed: " + "port-mult is removed from port %d", port); + return (AHCI_FAILURE); + } + + ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_RDWR_PMULT; + + spkt = sata_get_rdwr_pmult_pkt(ahci_ctlp->ahcictl_dip, + &sata_device, regn, *pregv, type); + + /* + * READ/WRITE PORTMULT command is intended to sent to the control port + * of the port multiplier. + */ + AHCI_ADDR_SET_PMULT(&pmult_addr, addrp->aa_port); + + ahci_portp->ahciport_rdwr_pmult_pkt = spkt; + + /* No interrupt here. Store the interrupt enable mask. */ + intr_mask = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port)); + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port), 0); + + rval = ahci_do_sync_start(ahci_ctlp, ahci_portp, &pmult_addr, spkt); + + if (rval == AHCI_SUCCESS && + spkt->satapkt_reason == SATA_PKT_COMPLETED) { + if (type == SATA_RDWR_PMULT_PKT_TYPE_READ) { + scmd = &spkt->satapkt_cmd; + *pregv = scmd->satacmd_lba_high_lsb << 24 | + scmd->satacmd_lba_mid_lsb << 16 | + scmd->satacmd_lba_low_lsb << 8 | + scmd->satacmd_sec_count_lsb; + } + } else { + /* Failed or not completed. */ + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp, + "ahci_rdwr_pmult: cannot [%s] %s[%d] at port %s", + type == SATA_RDWR_PMULT_PKT_TYPE_READ?"Read":"Write", + AHCI_ADDR_IS_PMULT(addrp)?"gscr":"pscr", regn, portstr); + rval = AHCI_FAILURE; + } +out: + /* Restore the interrupt mask */ + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port), intr_mask); + + ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_RDWR_PMULT; + ahci_portp->ahciport_rdwr_pmult_pkt = NULL; + sata_free_rdwr_pmult_pkt(spkt); + return (rval); +} + +static int +ahci_read_pmult(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp, + uint8_t regn, uint32_t *pregv) +{ + return ahci_rdwr_pmult(ahci_ctlp, addrp, regn, pregv, + SATA_RDWR_PMULT_PKT_TYPE_READ); +} + +static int +ahci_write_pmult(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp, + uint8_t regn, uint32_t regv) +{ + return ahci_rdwr_pmult(ahci_ctlp, addrp, regn, ®v, + SATA_RDWR_PMULT_PKT_TYPE_WRITE); +} + +#define READ_PMULT(addrp, r, pv, out) \ + if (ahci_read_pmult(ahci_ctlp, addrp, r, pv) != AHCI_SUCCESS) \ + goto out; + +#define WRITE_PMULT(addrp, r, v, out) \ + if (ahci_write_pmult(ahci_ctlp, addrp, r, v) != AHCI_SUCCESS) \ + goto out; + +/* + * Update sata registers on port multiplier, including GSCR/PSCR registers. + * ahci_update_pmult_gscr() + * ahci_update_pmult_pscr() + * + * WARNING!!! ahciport_mutex should be acquired before those functions + * get called. + */ +static int +ahci_update_pmult_gscr(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp, + sata_device_t *sd) +{ + READ_PMULT(addrp, SATA_PMULT_GSCR0, &sd->satadev_gscr.gscr0, err); + READ_PMULT(addrp, SATA_PMULT_GSCR1, &sd->satadev_gscr.gscr1, err); + READ_PMULT(addrp, SATA_PMULT_GSCR2, &sd->satadev_gscr.gscr2, err); + READ_PMULT(addrp, SATA_PMULT_GSCR64, &sd->satadev_gscr.gscr64, err); + + return (AHCI_SUCCESS); + +err: /* R/W PMULT error */ + return (AHCI_FAILURE); +} + +static int +ahci_update_pmult_pscr(ahci_ctl_t *ahci_ctlp, ahci_addr_t *addrp, + sata_device_t *sd) +{ + ASSERT(AHCI_ADDR_IS_PMPORT(addrp)); + + READ_PMULT(addrp, SATA_PMULT_REG_SSTS, &sd->satadev_scr.sstatus, err); + READ_PMULT(addrp, SATA_PMULT_REG_SERR, &sd->satadev_scr.serror, err); + READ_PMULT(addrp, SATA_PMULT_REG_SCTL, &sd->satadev_scr.scontrol, err); + READ_PMULT(addrp, SATA_PMULT_REG_SACT, &sd->satadev_scr.sactive, err); + + return (AHCI_SUCCESS); + +err: /* R/W PMULT error */ + return (AHCI_FAILURE); +} + +/* + * ahci_initialize_pmult() + * + * Initialize a port multiplier, including + * 1. Enable FEATURES register at port multiplier. (SATA Chp.16) + * 2. Redefine MASK register. (SATA Chap 16.?) + * + * WARNING!!! ahciport_mutex should be acquired before the function + * is called. + */ +static int +ahci_initialize_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp, sata_device_t *sd) +{ + uint32_t gscr64; + uint8_t port = addrp->aa_port; + + ASSERT(mutex_owned(&ahci_portp->ahciport_mutex)); + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "[Initialize] Port-multiplier at port %d.", port); + + /* + * Enable features of port multiplier. Currently only + * Asynchronous Notification is enabled. + */ + /* Check gscr64 for supported features. */ + READ_PMULT(addrp, SATA_PMULT_GSCR64, &gscr64, err); + + if (gscr64 & SATA_PMULT_CAP_SNOTIF) { + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "port %d: Port Multiplier supports " + "Asynchronous Notification.", port); + + /* Write to gscr96 to enabled features */ + WRITE_PMULT(addrp, SATA_PMULT_GSCR96, + SATA_PMULT_CAP_SNOTIF, err); + + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxSNTF(ahci_ctlp, port), + AHCI_SNOTIF_CLEAR_ALL); + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "port %d: PMult PxSNTF cleared.", port); + + } + + /* + * Now we need to update gscr33 register to enable hot-plug interrupt + * for sub devices behind port multiplier. + */ + WRITE_PMULT(addrp, SATA_PMULT_GSCR33, (0x1ffff), err); + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "port %d: gscr33 mask set to %x.", port, (0x1ffff)); + + /* + * Fetch the number of device ports of the port multiplier + */ + if (ahci_update_pmult_gscr(ahci_ctlp, addrp, sd) != AHCI_SUCCESS) + return (AHCI_FAILURE); + + /* If it's not in the blacklist, use the value in GSCR2 */ + if (sata_check_pmult_blacklist(sd) == SATA_FAILURE) + sd->satadev_add_info = sd->satadev_gscr.gscr2 & + SATA_PMULT_PORTNUM_MASK; + + ahci_portp->ahciport_pmult_info->ahcipmi_num_dev_ports = + sd->satadev_add_info; + + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "port %d: pmult sub-port number updated to %x.", port, + ahci_portp->ahciport_pmult_info->ahcipmi_num_dev_ports); + + /* Till now port-mult is successfully initialized */ + ahci_portp->ahciport_port_state |= SATA_DSTATE_PMULT_INIT; + return (AHCI_SUCCESS); + +err: /* R/W PMULT error */ + return (AHCI_FAILURE); +} + +/* + * Initialize a port multiplier port. According to spec, firstly we need + * issue a COMRESET, then a software reset to get its signature. + * + * NOTE: This function should only be called in ahci_probe_pmport() + * WARNING!!! ahciport_mutex should be acquired before the function. + */ +static int +ahci_initialize_pmport(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp) +{ + uint32_t finished_tags = 0, reset_tags = 0, slot_status = 0; + uint8_t port = addrp->aa_port; + uint8_t pmport = addrp->aa_pmport; + int ret = AHCI_FAILURE; + + ASSERT(AHCI_ADDR_IS_PMPORT(addrp)); + + AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp, + "ahci_initialize_pmport: port %d:%d", port, pmport); + + /* Check HBA port state */ + if (ahci_portp->ahciport_port_state & SATA_PSTATE_FAILED) { + AHCIDBG(AHCIDBG_INIT|AHCIDBG_ERRS, ahci_ctlp, + "ahci_initialize_pmport:" + "port %d:%d Port Multiplier is failed.", + port, pmport); + return (AHCI_FAILURE); + } + + if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_HOTPLUG) { + return (AHCI_FAILURE); + } + ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_HOTPLUG; + + /* Checking for outstanding commands */ + if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) { + slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxCI(ahci_ctlp, port)); + reset_tags = slot_status & AHCI_SLOT_MASK(ahci_ctlp); + } else if (NCQ_CMD_IN_PROGRESS(ahci_portp)) { + slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxSACT(ahci_ctlp, port)); + reset_tags = slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp); + } + + ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_MOPPING; + ahci_portp->ahciport_mop_in_progress++; + + /* Clear status */ + AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_STATE_UNKNOWN); + + /* Firstly assume an unknown device */ + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_UNKNOWN); + + ahci_disable_port_intrs(ahci_ctlp, port); + + /* port reset is necessary for port multiplier port */ + if (ahci_pmport_reset(ahci_ctlp, ahci_portp, addrp) != AHCI_SUCCESS) { + AHCIDBG(AHCIDBG_INIT|AHCIDBG_ERRS, ahci_ctlp, + "ahci_initialize_pmport:" + "port reset failed at port %d:%d", + port, pmport); + goto out; + } + + /* Is port failed? */ + if (AHCIPORT_GET_STATE(ahci_portp, addrp) & + SATA_PSTATE_FAILED) { + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_initialize_pmport: port %d:%d failed. " + "state = 0x%x", port, pmport, + ahci_portp->ahciport_port_state); + goto out; + } + + /* Is there any device attached? */ + if (AHCIPORT_GET_DEV_TYPE(ahci_portp, addrp) + == SATA_DTYPE_NONE) { + /* Do not waste time on an empty port */ + AHCIDBG(AHCIDBG_INFO, ahci_ctlp, + "ahci_initialize_pmport: No device is found " + "at port %d:%d", port, pmport); + ret = AHCI_SUCCESS; + goto out; + } + + AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_STATE_READY); + AHCIDBG(AHCIDBG_INIT, ahci_ctlp, + "port %d:%d is ready now.", port, pmport); + + /* + * Till now we can assure a device attached to that HBA port and work + * correctly. Now try to get the device signature. This is an optional + * step. If failed, unknown device is assumed, then SATA module will + * continue to use IDENTIFY DEVICE to get the information of the + * device. + */ + ahci_find_dev_signature(ahci_ctlp, ahci_portp, addrp); + + ret = AHCI_SUCCESS; + +out: + /* Next try to mop the pending commands */ + if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) + finished_tags = ahci_portp->ahciport_pending_tags & + ~slot_status & AHCI_SLOT_MASK(ahci_ctlp); + else if (NCQ_CMD_IN_PROGRESS(ahci_portp)) + finished_tags = ahci_portp->ahciport_pending_ncq_tags & + ~slot_status & AHCI_NCQ_SLOT_MASK(ahci_portp); + reset_tags &= ~finished_tags; + + ahci_mop_commands(ahci_ctlp, + ahci_portp, + slot_status, + 0, /* failed tags */ + 0, /* timeout tags */ + 0, /* aborted tags */ + reset_tags); /* reset tags */ + + /* Clear PxSNTF register if supported. */ + if (ahci_ctlp->ahcictl_cap & AHCI_CAP_SNTF) { + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxSNTF(ahci_ctlp, port), + AHCI_SNOTIF_CLEAR_ALL); + } + + ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_HOTPLUG; + ahci_enable_port_intrs(ahci_ctlp, port); + return (ret); +} + +/* + * ahci_probe_pmult() + * + * This function will be called to probe a port multiplier, which will + * handle hotplug events on port multiplier ports. + * + * NOTE: Only called from ahci_tran_probe_port() + * WARNING!!! ahciport_mutex should be acquired before the function is called. + */ +static int +ahci_probe_pmult(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp) +{ + sata_device_t sdevice; + ahci_addr_t pmport_addr; + uint32_t gscr32, port_hotplug_tags; + uint32_t pmport_sstatus; + int dev_exists_now = 0, dev_existed_previously = 0; + uint8_t port = addrp->aa_port; + int npmport; + + /* The bits in GSCR32 refers to the pmport that has a hot-plug event. */ + READ_PMULT(addrp, SATA_PMULT_GSCR32, &gscr32, err); + port_hotplug_tags = gscr32 & AHCI_PMPORT_MASK(ahci_portp); + + do { + npmport = ddi_ffs(port_hotplug_tags) - 1; + if (npmport == -1) + /* no pending hot plug events. */ + return (AHCI_SUCCESS); + + AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp, + "hot-plug event at port %d:%d", port, npmport); + + AHCI_ADDR_SET_PMPORT(&pmport_addr, port, (uint8_t)npmport); + + /* Check previous device at that port */ + if (AHCIPORT_GET_DEV_TYPE(ahci_portp, &pmport_addr) + != SATA_DTYPE_NONE) + dev_existed_previously = 1; + + /* PxSStatus tells the presence of device. */ + READ_PMULT(&pmport_addr, SATA_PMULT_REG_SSTS, + &pmport_sstatus, err); + + if (SSTATUS_GET_DET(pmport_sstatus) == + SSTATUS_DET_DEVPRE_PHYCOM) + dev_exists_now = 1; + + /* + * Clear PxSERR is critical. The transition from 0 to 1 will + * emit a FIS which generates an asynchronous notification + * event at controller. If we fail to clear the PxSERR, the + * Async Notif events will no longer be activated on this + * pmport. + */ + WRITE_PMULT(&pmport_addr, SATA_PMULT_REG_SERR, + AHCI_SERROR_CLEAR_ALL, err); + + bzero((void *)&sdevice, sizeof (sata_device_t)); + sdevice.satadev_addr.cport = ahci_ctlp-> + ahcictl_port_to_cport[port]; + sdevice.satadev_addr.qual = SATA_ADDR_PMPORT; + sdevice.satadev_addr.pmport = (uint8_t)npmport; + sdevice.satadev_state = SATA_PSTATE_PWRON; + + AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp, + "[Existence] %d -> %d", dev_existed_previously, + dev_exists_now); + + if (dev_exists_now) { + if (dev_existed_previously) { + /* Link (may) not change: Exist -> Exist * */ + AHCIDBG(AHCIDBG_EVENT, ahci_ctlp, + "ahci_probe_pmult: port %d:%d " + "device link lost/established", + port, npmport); + + mutex_exit(&ahci_portp->ahciport_mutex); + sata_hba_event_notify( + ahci_ctlp->ahcictl_sata_hba_tran-> + sata_tran_hba_dip, + &sdevice, + SATA_EVNT_LINK_LOST| + SATA_EVNT_LINK_ESTABLISHED); + mutex_enter(&ahci_portp->ahciport_mutex); + } else { + /* Link change: None -> Exist */ + AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp, + "ahci_probe_pmult: port %d:%d " + "device link established", port, npmport); + + /* Clear port state */ + AHCIPORT_SET_STATE(ahci_portp, &pmport_addr, + SATA_STATE_UNKNOWN); + AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp, + "ahci_probe_pmult: port %d " + "ahciport_port_state [Cleared].", port); + + mutex_exit(&ahci_portp->ahciport_mutex); + sata_hba_event_notify( + ahci_ctlp->ahcictl_sata_hba_tran-> + sata_tran_hba_dip, + &sdevice, + SATA_EVNT_LINK_ESTABLISHED); + mutex_enter(&ahci_portp->ahciport_mutex); + } + } else { /* No device exists now */ + if (dev_existed_previously) { + + /* Link change: Exist -> None */ + AHCIDBG(AHCIDBG_EVENT|AHCIDBG_PMULT, ahci_ctlp, + "ahci_probe_pmult: port %d:%d " + "device link lost", port, npmport); + + /* An existing device is lost. */ + AHCIPORT_SET_STATE(ahci_portp, &pmport_addr, + SATA_STATE_UNKNOWN); + AHCIPORT_SET_DEV_TYPE(ahci_portp, &pmport_addr, + SATA_DTYPE_NONE); + + mutex_exit(&ahci_portp->ahciport_mutex); + sata_hba_event_notify( + ahci_ctlp->ahcictl_sata_hba_tran-> + sata_tran_hba_dip, + &sdevice, + SATA_EVNT_LINK_LOST); + mutex_enter(&ahci_portp->ahciport_mutex); + } + } + + CLEAR_BIT(port_hotplug_tags, npmport); + } while (port_hotplug_tags != 0); + + return (AHCI_SUCCESS); + +err: /* R/W PMULT error */ + return (AHCI_FAILURE); +} + +/* + * Probe and initialize a port multiplier port. + * A port multiplier port could only be initilaizer here. + * + * WARNING!!! ahcictl_mutex should be acquired before the function + * is called. + */ +static int +ahci_probe_pmport(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp, sata_device_t *sd) +{ + uint32_t port_state; + uint8_t port = addrp->aa_port; + ahci_addr_t addr_pmult; + + /* + * Check the parent - port multiplier first. + */ + + /* + * Parent port multiplier might have been removed. This event will be + * ignored and failure. + */ + if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE || + ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) { + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp, + "ahci_tran_probe_port: " + "parent device removed, ignore event.", NULL); + + return (AHCI_FAILURE); + } + + /* The port is ready? */ + port_state = ahci_portp->ahciport_port_state; + if (!(port_state & SATA_STATE_READY)) { + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp, + "ahci_tran_probe_port: " + "parent port-mult is NOT ready.", NULL); + + if (ahci_restart_port_wait_till_ready(ahci_ctlp, + ahci_portp, port, AHCI_PORT_RESET, NULL) != + AHCI_SUCCESS) { + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp, + "ahci_tran_probe_port: " + "restart port-mult failed.", NULL); + return (AHCI_FAILURE); + } + } + + /* + * If port-mult is restarted due to some reason, we need + * re-initialized the PMult. + */ + if (!(port_state & SATA_DSTATE_PMULT_INIT)) { + /* Initialize registers on a port multiplier */ + AHCI_ADDR_SET_PMULT(&addr_pmult, addrp->aa_port); + if (ahci_initialize_pmult(ahci_ctlp, ahci_portp, + &addr_pmult, sd) != AHCI_SUCCESS) + return (AHCI_FAILURE); + } + + /* + * Then we check the port-mult port + */ + /* Is this pmport initialized? */ + port_state = AHCIPORT_GET_STATE(ahci_portp, addrp); + if (!(port_state & SATA_STATE_READY)) { + + /* ahci_initialize_pmport() will set READY state */ + if (ahci_initialize_pmport(ahci_ctlp, + ahci_portp, addrp) != AHCI_SUCCESS) + return (AHCI_FAILURE); + } + + return (AHCI_SUCCESS); +} + +/* * AHCI device reset ...; a single device on one of the ports is reset, * but the HBA and physical communication remain intact. This is the * least intrusive. @@ -3232,17 +4540,19 @@ */ static int ahci_software_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, - uint8_t port) + ahci_addr_t *addrp) { ahci_fis_h2d_register_t *h2d_register_fisp; ahci_cmd_table_t *cmd_table; ahci_cmd_header_t *cmd_header; uint32_t port_cmd_status, port_cmd_issue, port_task_file; int slot, loop_count; + uint8_t port = addrp->aa_port; + uint8_t pmport = addrp->aa_pmport; int rval = AHCI_FAILURE; AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, - "Port %d device resetting", port); + "port %d:%d device software resetting (FIS)", port, pmport); /* First clear PxCMD.ST (AHCI v1.2 10.4.1) */ if (ahci_put_port_into_notrunning_state(ahci_ctlp, ahci_portp, @@ -3335,6 +4645,7 @@ &(cmd_table->ahcict_command_fis.ahcifc_fis.ahcifc_h2d_register); SET_FIS_TYPE(h2d_register_fisp, AHCI_H2D_REGISTER_FIS_TYPE); + SET_FIS_PMP(h2d_register_fisp, pmport); SET_FIS_DEVCTL(h2d_register_fisp, SATA_DEVCTL_SRST); /* Set Command Header in Command List */ @@ -3342,6 +4653,7 @@ BZERO_DESCR_INFO(cmd_header); BZERO_PRD_BYTE_COUNT(cmd_header); SET_COMMAND_FIS_LENGTH(cmd_header, 5); + SET_PORT_MULTI_PORT(cmd_header, pmport); SET_CLEAR_BUSY_UPON_R_OK(cmd_header, 1); SET_RESET(cmd_header, 1); @@ -3376,9 +4688,8 @@ "loop_count = %d", loop_count); goto out; } - /* Wait for 10 millisec */ - delay(AHCI_10MS_TICKS); + drv_usecwait(AHCI_10MS_USECS); } while (port_cmd_issue & AHCI_SLOT_MASK(ahci_ctlp) & (0x1 << slot)); AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp, @@ -3397,12 +4708,14 @@ &(cmd_table->ahcict_command_fis.ahcifc_fis.ahcifc_h2d_register); SET_FIS_TYPE(h2d_register_fisp, AHCI_H2D_REGISTER_FIS_TYPE); + SET_FIS_PMP(h2d_register_fisp, pmport); /* Set Command Header in Command List */ cmd_header = &ahci_portp->ahciport_cmd_list[slot]; BZERO_DESCR_INFO(cmd_header); BZERO_PRD_BYTE_COUNT(cmd_header); SET_COMMAND_FIS_LENGTH(cmd_header, 5); + SET_PORT_MULTI_PORT(cmd_header, pmport); SET_WRITE(cmd_header, 1); @@ -3437,7 +4750,7 @@ } /* Wait for 10 millisec */ - delay(AHCI_10MS_TICKS); + drv_usecwait(AHCI_10MS_USECS); } while (port_cmd_issue & AHCI_SLOT_MASK(ahci_ctlp) & (0x1 << slot)); AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp, @@ -3450,9 +4763,9 @@ rval = AHCI_SUCCESS; out: AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, - "ahci_software_reset: %s at port %d", + "ahci_software_reset: %s at port %d:%d", rval == AHCI_SUCCESS ? "succeed" : "failed", - port); + port, pmport); return (rval); } @@ -3478,15 +4791,26 @@ * cleared before the function is called. */ static int -ahci_port_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, uint8_t port) +ahci_port_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp) { + ahci_addr_t pmult_addr; uint32_t cap_status, port_cmd_status; uint32_t port_scontrol, port_sstatus, port_serror; uint32_t port_intr_status, port_task_file; + uint32_t port_state; + uint8_t port = addrp->aa_port; int loop_count; int instance = ddi_get_instance(ahci_ctlp->ahcictl_dip); + /* Target is a port multiplier port? */ + if (AHCI_ADDR_IS_PMPORT(addrp)) + return (ahci_pmport_reset(ahci_ctlp, ahci_portp, addrp)); + + /* Otherwise it must be an HBA port. */ + ASSERT(AHCI_ADDR_IS_PORT(addrp)); + AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp, "Port %d port resetting...", port); ahci_portp->ahciport_port_state = 0; @@ -3561,7 +4885,7 @@ * spec, software shall wait at least 1 millisecond before * clearing PxSCTL.DET */ - delay(AHCI_1MS_TICKS*2); + drv_usecwait(AHCI_1MS_USECS*2); /* Fetch the SCONTROL again and rewrite the DET part with 0 */ port_scontrol = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, @@ -3587,7 +4911,7 @@ port_cmd_status); /* 0 -> 1 edge */ - delay(AHCI_1MS_TICKS*2); + drv_usecwait(AHCI_1MS_USECS*2); /* Set PxCMD.SUD to 1 */ port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, @@ -3640,7 +4964,7 @@ } /* Wait for 10 millisec */ - delay(AHCI_10MS_TICKS); + drv_usecwait(AHCI_10MS_USECS); } while (SSTATUS_GET_DET(port_sstatus) != SSTATUS_DET_DEVPRE_PHYCOM); AHCIDBG(AHCIDBG_INIT|AHCIDBG_POLL_LOOP, ahci_ctlp, @@ -3654,7 +4978,7 @@ * Either the port is not active or there * is no device present. */ - ahci_portp->ahciport_device_type = SATA_DTYPE_NONE; + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_NONE); return (AHCI_SUCCESS); } @@ -3673,7 +4997,7 @@ cmn_err(CE_WARN, "!ahci%d: ahci_port_reset port %d " "COMINIT signal from the device not received", instance, port); - ahci_portp->ahciport_port_state |= SATA_PSTATE_FAILED; + AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_PSTATE_FAILED); return (AHCI_FAILURE); } @@ -3711,7 +5035,7 @@ loop_count = 0; do { /* Wait for 10 millisec */ - delay(AHCI_10MS_TICKS); + drv_usecwait(AHCI_10MS_USECS); if (loop_count++ > AHCI_POLLRATE_PORT_TFD_ERROR) { /* @@ -3731,16 +5055,21 @@ (uint32_t *)AHCI_PORT_PxSERR(ahci_ctlp, port), AHCI_SERROR_CLEAR_ALL); + AHCI_ADDR_SET_PMULT(&pmult_addr, port); + /* Try another software reset. */ if (ahci_software_reset(ahci_ctlp, ahci_portp, - port) != AHCI_SUCCESS) { - ahci_portp->ahciport_port_state |= - SATA_PSTATE_FAILED; + &pmult_addr) != AHCI_SUCCESS) { + AHCIPORT_SET_STATE(ahci_portp, addrp, + SATA_PSTATE_FAILED); return (AHCI_FAILURE); } break; } + /* Wait for 10 millisec */ + drv_usecwait(AHCI_10MS_USECS); + /* * The Error bit '1' means COMRESET is finished successfully * The device hardware has been initialized and the power-up @@ -3765,7 +5094,8 @@ AHCI_SERROR_CLEAR_ALL); /* Set port as ready */ - ahci_portp->ahciport_port_state |= SATA_STATE_READY; + port_state = AHCIPORT_GET_STATE(ahci_portp, addrp); + AHCIPORT_SET_STATE(ahci_portp, addrp, port_state|SATA_STATE_READY); AHCIDBG(AHCIDBG_INFO|AHCIDBG_ERRS, ahci_ctlp, "ahci_port_reset: succeed at port %d.", port); @@ -3773,6 +5103,189 @@ } /* + * COMRESET on a port multiplier port. + * + * NOTE: Only called in ahci_port_reset() + */ +static int +ahci_pmport_reset(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp) +{ + uint32_t port_scontrol, port_sstatus, port_serror; + uint32_t port_cmd_status, port_intr_status; + uint32_t port_state; + uint8_t port = addrp->aa_port; + uint8_t pmport = addrp->aa_pmport; + int loop_count; + int instance = ddi_get_instance(ahci_ctlp->ahcictl_dip); + + AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp, + "port %d:%d: pmport resetting", port, pmport); + + /* Initialize pmport state */ + AHCIPORT_SET_STATE(ahci_portp, addrp, 0); + + READ_PMULT(addrp, SATA_PMULT_REG_SCTL, &port_scontrol, err); + SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_COMRESET); + WRITE_PMULT(addrp, SATA_PMULT_REG_SCTL, port_scontrol, err); + + /* PxCMD.FRE should be set before. */ + port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port)); + ASSERT(port_cmd_status & AHCI_CMD_STATUS_FRE); + if (!(port_cmd_status & AHCI_CMD_STATUS_FRE)) + return (AHCI_FAILURE); + + /* + * Give time for COMRESET to percolate, according to the AHCI + * spec, software shall wait at least 1 millisecond before + * clearing PxSCTL.DET + */ + drv_usecwait(AHCI_1MS_USECS*2); + + /* + * Fetch the SCONTROL again and rewrite the DET part with 0 + * This will generate an Asychronous Notification events. + */ + READ_PMULT(addrp, SATA_PMULT_REG_SCTL, &port_scontrol, err); + SCONTROL_SET_DET(port_scontrol, SCONTROL_DET_NOACTION); + WRITE_PMULT(addrp, SATA_PMULT_REG_SCTL, port_scontrol, err); + + /* + * The port enters P:StartComm state, and HBA tells link layer to + * start communication, which involves sending COMRESET to device. + * And the HBA resets PxTFD.STS to 7Fh. + * + * When a COMINIT is received from the device, then the port enters + * P:ComInit state. And HBA sets PxTFD.STS to FFh or 80h. HBA sets + * PxSSTS.DET to 1h to indicate a device is detected but communication + * is not yet established. HBA sets PxSERR.DIAG.X to '1' to indicate + * a COMINIT has been received. + */ + /* + * The DET field is valid only if IPM field indicates + * that the interface is in active state. + */ + loop_count = 0; + do { + READ_PMULT(addrp, SATA_PMULT_REG_SSTS, &port_sstatus, err); + + if (SSTATUS_GET_IPM(port_sstatus) != SSTATUS_IPM_ACTIVE) { + /* + * If the interface is not active, the DET field + * is considered not accurate. So we want to + * continue looping. + */ + SSTATUS_SET_DET(port_sstatus, SSTATUS_DET_NODEV); + } + + if (loop_count++ > AHCI_POLLRATE_PORT_SSTATUS) { + /* + * We are effectively timing out after 0.1 sec. + */ + break; + } + + /* Wait for 10 millisec */ + drv_usecwait(AHCI_10MS_USECS); + + } while (SSTATUS_GET_DET(port_sstatus) != SSTATUS_DET_DEVPRE_PHYCOM); + + AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp, + "ahci_pmport_reset: 1st loop count: %d, " + "port_sstatus = 0x%x port %d:%d", + loop_count, port_sstatus, port, pmport); + + if ((SSTATUS_GET_IPM(port_sstatus) != SSTATUS_IPM_ACTIVE) || + (SSTATUS_GET_DET(port_sstatus) != SSTATUS_DET_DEVPRE_PHYCOM)) { + /* + * Either the port is not active or there + * is no device present. + */ + AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO, ahci_ctlp, + "ahci_pmport_reset: " + "no device attached to port %d:%d", + port, pmport); + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_NONE); + return (AHCI_SUCCESS); + } + + /* Now we can make sure there is a device connected to the port */ + /* COMINIT signal is supposed to be received (PxSERR.DIAG.X = '1') */ + READ_PMULT(addrp, SATA_PMULT_REG_SERR, &port_serror, err); + + if (!(port_serror & (1 << 26))) { + cmn_err(CE_WARN, "!ahci%d: ahci_pmport_reset: " + "COMINIT signal from the device not received port %d:%d", + instance, port, pmport); + + AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_PSTATE_FAILED); + return (AHCI_FAILURE); + } + + /* + * After clear PxSERR register, we will receive a D2H FIS. + * Normally this FIS will cause a IPMS error according to AHCI spec + * v1.2 because there is no command outstanding for it. So we need + * to ignore this error. + */ + ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_IGNORE_IPMS; + WRITE_PMULT(addrp, SATA_PMULT_REG_SERR, AHCI_SERROR_CLEAR_ALL, err); + + /* Now we need to check the D2H FIS by checking IPMS error. */ + loop_count = 0; + do { + port_intr_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxIS(ahci_ctlp, port)); + + if (loop_count++ > AHCI_POLLRATE_PORT_TFD_ERROR) { + /* + * No D2H FIS received. This is possible according + * to SATA 2.6 spec. + */ + cmn_err(CE_WARN, "ahci_port_reset: port %d:%d " + "PxIS.IPMS is not set, we need another " + "software reset.", port, pmport); + + break; + } + + /* Wait for 10 millisec */ + mutex_exit(&ahci_portp->ahciport_mutex); + delay(AHCI_10MS_TICKS); + mutex_enter(&ahci_portp->ahciport_mutex); + + } while (!(port_intr_status & AHCI_INTR_STATUS_IPMS)); + + AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp, + "ahci_pmport_reset: 2st loop count: %d, " + "port_sstatus = 0x%x port %d:%d", + loop_count, port_sstatus, port, pmport); + + /* Clear IPMS */ + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxIS(ahci_ctlp, port), + AHCI_INTR_STATUS_IPMS); + ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_IGNORE_IPMS; + + /* This pmport is now ready for ahci_tran_start() */ + port_state = AHCIPORT_GET_STATE(ahci_portp, addrp); + AHCIPORT_SET_STATE(ahci_portp, addrp, port_state|SATA_STATE_READY); + + AHCIDBG(AHCIDBG_INFO|AHCIDBG_ERRS, ahci_ctlp, + "ahci_pmport_reset: succeed at port %d:%d", port, pmport); + return (AHCI_SUCCESS); + +err: /* R/W PMULT error */ + /* IPMS flags might be set before. */ + ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_IGNORE_IPMS; + AHCIDBG(AHCIDBG_INFO|AHCIDBG_ERRS, ahci_ctlp, + "ahci_pmport_reset: failed at port %d:%d", port, pmport); + + return (AHCI_FAILURE); +} + +/* * AHCI HBA reset ...; the entire HBA is reset, and all ports are disabled. * This is the most intrusive. * @@ -3790,6 +5303,7 @@ ahci_hba_reset(ahci_ctl_t *ahci_ctlp) { ahci_port_t *ahci_portp; + ahci_addr_t addr; uint32_t ghc_control; uint8_t port; int loop_count; @@ -3825,10 +5339,10 @@ } /* Wait for 10 millisec */ - delay(AHCI_10MS_TICKS); + drv_usecwait(AHCI_10MS_USECS); } while (ghc_control & AHCI_HBA_GHC_HR); - AHCIDBG(AHCIDBG_INIT|AHCIDBG_POLL_LOOP, ahci_ctlp, + AHCIDBG(AHCIDBG_POLL_LOOP, ahci_ctlp, "ahci_hba_reset: 1st loop count: %d, " "ghc_control = 0x%x", loop_count, ghc_control); @@ -3840,6 +5354,17 @@ return (AHCI_FAILURE); } + /* + * HBA reset will clear (AHCI Spec v1.2 10.4.3) GHC.IE / GHC.AE + */ + ghc_control = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp)); + ghc_control |= (AHCI_HBA_GHC_AE | AHCI_HBA_GHC_IE); + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp), ghc_control); + + mutex_exit(&ahci_ctlp->ahcictl_mutex); + for (port = 0; port < ahci_ctlp->ahcictl_num_ports; port++) { /* Only check implemented ports */ if (!AHCI_PORT_IMPLEMENTED(ahci_ctlp, port)) { @@ -3849,29 +5374,24 @@ ahci_portp = ahci_ctlp->ahcictl_ports[port]; mutex_enter(&ahci_portp->ahciport_mutex); - if (ahci_port_reset(ahci_ctlp, ahci_portp, port) - != AHCI_SUCCESS) { + AHCI_ADDR_SET_PORT(&addr, port); + + if (ahci_restart_port_wait_till_ready(ahci_ctlp, ahci_portp, + port, AHCI_PORT_RESET|AHCI_RESET_NO_EVENTS_UP, NULL) != + AHCI_SUCCESS) { rval = AHCI_FAILURE; AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_hba_reset: port %d failed", port); + /* + * Set the port state to SATA_PSTATE_FAILED if + * failed to initialize it. + */ + ahci_portp->ahciport_port_state = SATA_PSTATE_FAILED; } mutex_exit(&ahci_portp->ahciport_mutex); } - /* - * Indicate that system software is AHCI aware by setting - * GHC.AE to 1 - */ - ghc_control = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, - (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp)); - - ghc_control |= AHCI_HBA_GHC_AE; - ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, - (uint32_t *)AHCI_GLOBAL_GHC(ahci_ctlp), ghc_control); - - mutex_exit(&ahci_ctlp->ahcictl_mutex); - return (rval); } @@ -3888,23 +5408,43 @@ * is called. And the port interrupt is disabled. */ static void -ahci_find_dev_signature(ahci_ctl_t *ahci_ctlp, - ahci_port_t *ahci_portp, uint8_t port) +ahci_find_dev_signature(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + ahci_addr_t *addrp) { + ahci_addr_t dev_addr; uint32_t signature; - - AHCIDBG(AHCIDBG_INIT|AHCIDBG_ENTRY, ahci_ctlp, - "ahci_find_dev_signature enter: port %d", port); - - ahci_portp->ahciport_device_type = SATA_DTYPE_UNKNOWN; + uint8_t port = addrp->aa_port; + uint8_t pmport = addrp->aa_pmport; + ASSERT(AHCI_ADDR_IS_VALID(addrp)); + + if (AHCI_ADDR_IS_PORT(addrp)) { + AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, + "ahci_find_dev_signature enter: port %d", port); + + /* + * NOTE: when the ahci address is a HBA port, we do not know + * it is a device or a port multiplier that attached. we need + * try a software reset at port multiplier address (0xf + * pmport) + */ + AHCI_ADDR_SET_PMULT(&dev_addr, addrp->aa_port); + } else { + AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, + "ahci_find_dev_signature enter: port %d:%d", + port, pmport); + dev_addr = *addrp; + } + + /* Assume it is unknown. */ + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_UNKNOWN); /* Issue a software reset to get the signature */ - if (ahci_software_reset(ahci_ctlp, ahci_portp, port) + if (ahci_software_reset(ahci_ctlp, ahci_portp, &dev_addr) != AHCI_SUCCESS) { AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_find_dev_signature: software reset failed " - "at port %d, cannot get signature.", port); - ahci_portp->ahciport_port_state = SATA_PSTATE_FAILED; + "at port %d:%d, cannot get signature.", port, pmport); + AHCIPORT_SET_STATE(ahci_portp, addrp, SATA_PSTATE_FAILED); return; } @@ -3912,44 +5452,61 @@ * ahci_software_reset has started the port, so we need manually stop * the port again. */ - if (ahci_put_port_into_notrunning_state(ahci_ctlp, ahci_portp, port) - != AHCI_SUCCESS) { - AHCIDBG(AHCIDBG_INFO, ahci_ctlp, - "ahci_find_dev_signature: cannot stop port %d.", port); - ahci_portp->ahciport_port_state = SATA_PSTATE_FAILED; - return; + if (AHCI_ADDR_IS_PORT(addrp)) { + if (ahci_put_port_into_notrunning_state(ahci_ctlp, + ahci_portp, port) != AHCI_SUCCESS) { + AHCIDBG(AHCIDBG_INFO, ahci_ctlp, + "ahci_find_dev_signature: cannot stop port %d.", + port); + ahci_portp->ahciport_port_state = SATA_PSTATE_FAILED; + return; + } } /* Now we can make sure that a valid signature is received. */ signature = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, (uint32_t *)AHCI_PORT_PxSIG(ahci_ctlp, port)); - AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO, ahci_ctlp, - "ahci_find_dev_signature: port %d signature = 0x%x", - port, signature); +#ifdef AHCI_DEBUG + if (AHCI_ADDR_IS_PMPORT(addrp)) { + AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "ahci_find_dev_signature: signature = 0x%x at port %d:%d", + signature, port, pmport); + } else { + AHCIDBG(AHCIDBG_INIT|AHCIDBG_INFO, ahci_ctlp, + "ahci_find_dev_signature: signature = 0x%x at port %d", + signature, port); + } +#endif + + /* NOTE: Only support ATAPI device at controller port. */ + if (signature == AHCI_SIGNATURE_ATAPI && !AHCI_ADDR_IS_PORT(addrp)) + signature = SATA_DTYPE_UNKNOWN; switch (signature) { case AHCI_SIGNATURE_DISK: - ahci_portp->ahciport_device_type = SATA_DTYPE_ATADISK; + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_ATADISK); AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "Disk is found at port: %d", port); break; case AHCI_SIGNATURE_ATAPI: - ahci_portp->ahciport_device_type = SATA_DTYPE_ATAPI; + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_ATAPI); AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "ATAPI device is found at port: %d", port); break; case AHCI_SIGNATURE_PORT_MULTIPLIER: - ahci_portp->ahciport_device_type = SATA_DTYPE_PMULT; + /* Port Multiplier cannot recursively attached. */ + ASSERT(AHCI_ADDR_IS_PORT(addrp)); + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_PMULT); AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "Port Multiplier is found at port: %d", port); break; default: - ahci_portp->ahciport_device_type = SATA_DTYPE_UNKNOWN; + AHCIPORT_SET_DEV_TYPE(ahci_portp, addrp, SATA_DTYPE_UNKNOWN); AHCIDBG(AHCIDBG_INFO, ahci_ctlp, "Unknown device is found at port: %d", port); } @@ -4071,7 +5628,7 @@ ahci_ctlp->ahcictl_ports[port] = ahci_portp; ahci_portp->ahciport_port_num = port; - /* Intialize the port condition variable */ + /* Initialize the port condition variable */ cv_init(&ahci_portp->ahciport_cv, NULL, CV_DRIVER, NULL); /* Initialize the port mutex */ @@ -4108,6 +5665,9 @@ ahci_portp->ahciport_event_args = kmem_zalloc(sizeof (ahci_event_arg_t), KM_SLEEP); + ahci_portp->ahciport_event_args->ahciea_addrp = + kmem_zalloc(sizeof (ahci_addr_t), KM_SLEEP); + if (ahci_portp->ahciport_event_args == NULL) goto err_case4; @@ -4148,11 +5708,15 @@ ASSERT(ahci_portp != NULL); mutex_enter(&ahci_portp->ahciport_mutex); + kmem_free(ahci_portp->ahciport_event_args->ahciea_addrp, + sizeof (ahci_addr_t)); + ahci_portp->ahciport_event_args->ahciea_addrp = NULL; kmem_free(ahci_portp->ahciport_event_args, sizeof (ahci_event_arg_t)); ahci_portp->ahciport_event_args = NULL; ddi_taskq_destroy(ahci_portp->ahciport_event_taskq); ahci_dealloc_cmd_list(ahci_ctlp, ahci_portp); ahci_dealloc_rcvd_fis(ahci_portp); + ahci_dealloc_pmult(ahci_ctlp, ahci_portp); mutex_exit(&ahci_portp->ahciport_mutex); mutex_destroy(&ahci_portp->ahciport_mutex); @@ -4523,8 +6087,10 @@ } /* - * WARNING!!! ahciport_mutex should be acquired before the function - * is called. + * Update SATA registers at controller ports + * + * WARNING!!! ahciport_mutex should be acquired before those functions + * get called. */ static void ahci_update_sata_registers(ahci_ctl_t *ahci_ctlp, uint8_t port, @@ -4572,19 +6138,21 @@ AHCI_INTR_STATUS_HBDS | AHCI_INTR_STATUS_HBFS | AHCI_INTR_STATUS_TFES); - mutex_exit(&ahci_portp->ahciport_mutex); - goto next; - } + } else { + /* + * port_intr_enable indicates that the corresponding interrrupt + * reporting is enabled. + */ + port_intr_enable = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port)); + } + + /* IPMS error in port reset should be ignored according AHCI spec. */ + if (!(ahci_portp->ahciport_flags & AHCI_PORT_FLAG_IGNORE_IPMS)) + port_intr_enable |= AHCI_INTR_STATUS_IPMS; mutex_exit(&ahci_portp->ahciport_mutex); /* - * port_intr_enable indicates that the corresponding interrrupt - * reporting is enabled. - */ - port_intr_enable = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, - (uint32_t *)AHCI_PORT_PxIE(ahci_ctlp, port)); -next: - /* * port_intr_stats indicates that the corresponding interrupt * condition is active. */ @@ -4635,18 +6203,20 @@ } /* - * Check the non-fatal error interrupt bits, there are three + * Check the non-fatal error interrupt bits, there are four * kinds of non-fatal errors at the time being: * * PxIS.UFS - Unknown FIS Error * PxIS.OFS - Overflow Error * PxIS.INFS - Interface Non-Fatal Error + * PxIS.IPMS - Incorrect Port Multiplier Status Error * * For these non-fatal errors, the HBA can continue to operate, * so the driver just log the error messages. */ if (port_intr_status & (AHCI_INTR_STATUS_UFS | AHCI_INTR_STATUS_OFS | + AHCI_INTR_STATUS_IPMS | AHCI_INTR_STATUS_INFS)) { (void) ahci_intr_non_fatal_error(ahci_ctlp, ahci_portp, port, port_intr_status); @@ -4774,6 +6344,7 @@ mutex_enter(&ahci_portp->ahciport_mutex); if (!ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) && + !RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp) && !NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) { /* * Spurious interrupt. Nothing to be done. @@ -4792,7 +6363,15 @@ "ahci_intr_cmd_cmplt: port %d the sata pkt for error " "retrieval is finished, and finished_tags = 0x%x", port, finished_tags); + } else if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { + finished_tags = 0x1 & ~port_cmd_issue; + AHCIDBG(AHCIDBG_INFO, ahci_ctlp, + "ahci_intr_cmd_cmplt: port %d the sata pkt for r/w " + "port multiplier is finished, and finished_tags = 0x%x", + port, finished_tags); + } else { + finished_tags = ahci_portp->ahciport_pending_tags & ~port_cmd_issue & AHCI_SLOT_MASK(ahci_ctlp); } @@ -4816,6 +6395,28 @@ goto out; } + if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp) && + (finished_tags == 0x1)) { + satapkt = ahci_portp->ahciport_rdwr_pmult_pkt; + ASSERT(satapkt != NULL); + + AHCIDBG(AHCIDBG_INTR, ahci_ctlp, + "ahci_intr_cmd_cmplt: sending up pkt 0x%p " + "with SATA_PKT_COMPLETED", (void *)satapkt); + + /* READ PORTMULT need copy out FIS content. */ + if (satapkt->satapkt_cmd.satacmd_flags.sata_special_regs) { + rcvd_fisp = &(ahci_portp->ahciport_rcvd_fis-> + ahcirf_d2h_register_fis); + satapkt->satapkt_cmd.satacmd_status_reg = + GET_RFIS_STATUS(rcvd_fisp); + ahci_copy_out_regs(&satapkt->satapkt_cmd, rcvd_fisp); + } + + SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_COMPLETED); + goto out; + } + while (finished_tags) { finished_slot = ddi_ffs(finished_tags) - 1; if (finished_slot == -1) { @@ -4915,8 +6516,7 @@ * with the 'I' bit set and has been copied into system memory. It will * be sent under the following situations: * - * 1. NCQ command is completed - * 2. Asynchronous notification + * 1. NCQ command is completed * * The completion of NCQ commands (READ/WRITE FPDMA QUEUED) is performed * via the Set Device Bits FIS. When such event is generated, the software @@ -4924,22 +6524,62 @@ * list of commands previously issue by software. ahciport_pending_ncq_tags * keeps the tags of previously issued commands. * - * Asynchronous Notification is a feature in SATA II, which allows an - * ATAPI device to send a signal to the host when media is inserted or + * 2. Asynchronous Notification + * + * Asynchronous Notification is a feature in SATA spec 2.6. + * + * 1) ATAPI device will send a signal to the host when media is inserted or * removed and avoids polling the device for media changes. The signal * sent to the host is a Set Device Bits FIS with the 'I' and 'N' bits * set to '1'. At the moment, it's not supported yet. + * + * 2) Port multiplier will send a signal to the host when a hot plug event + * has occured on a port multiplier port. It is used when command based + * switching is employed. This is handled by ahci_intr_pmult_sntf_events() */ static int ahci_intr_set_device_bits(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, uint8_t port) { + ahci_addr_t addr; + + AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR, ahci_ctlp, + "ahci_intr_set_device_bits enter: port %d", port); + + /* Initialize HBA port address */ + AHCI_ADDR_SET_PORT(&addr, port); + + /* NCQ plug handler */ + (void) ahci_intr_ncq_events(ahci_ctlp, ahci_portp, &addr); + + /* Check port multiplier's asynchronous notification events */ + if (ahci_ctlp->ahcictl_cap & AHCI_CAP_SNTF) { + (void) ahci_intr_pmult_sntf_events(ahci_ctlp, + ahci_portp, port); + } + + /* ATAPI events is not supported yet */ + + return (AHCI_SUCCESS); +} +/* + * NCQ interrupt handler. Called upon a NCQ command is completed. + * Only be called from ahci_intr_set_device_bits(). + * + * WARNING!!! ahciport_mutex should be acquired before the function is + * called. + */ +static int +ahci_intr_ncq_events(ahci_ctl_t *ahci_ctlp, + ahci_port_t *ahci_portp, ahci_addr_t *addrp) +{ uint32_t port_sactive; uint32_t port_cmd_issue; uint32_t issued_tags; int issued_slot; uint32_t finished_tags; int finished_slot; + uint8_t port = addrp->aa_port; sata_pkt_t *satapkt; AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR|AHCIDBG_NCQ, ahci_ctlp, @@ -5037,6 +6677,116 @@ } /* + * Port multiplier asynchronous notification event handler. Called upon a + * device is hot plugged/pulled. + * + * The async-notification event will only be recorded by ahcipmi_snotif_tags + * here and will be handled by ahci_probe_pmult(). + * + * NOTE: called only from ahci_port_intr(). + */ +static int +ahci_intr_pmult_sntf_events(ahci_ctl_t *ahci_ctlp, ahci_port_t *ahci_portp, + uint8_t port) +{ + sata_device_t sdevice; + + AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR, ahci_ctlp, + "ahci_intr_pmult_sntf_events enter: port %d ", port); + + /* no hot-plug while attaching process */ + mutex_enter(&ahci_ctlp->ahcictl_mutex); + if (ahci_ctlp->ahcictl_flags & AHCI_ATTACH) { + mutex_exit(&ahci_ctlp->ahcictl_mutex); + return (AHCI_SUCCESS); + } + mutex_exit(&ahci_ctlp->ahcictl_mutex); + + mutex_enter(&ahci_portp->ahciport_mutex); + if (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) { + mutex_exit(&ahci_portp->ahciport_mutex); + return (AHCI_SUCCESS); + } + + ASSERT(ahci_portp->ahciport_pmult_info != NULL); + + ahci_portp->ahciport_pmult_info->ahcipmi_snotif_tags = + ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxSNTF(ahci_ctlp, port)); + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxSNTF(ahci_ctlp, port), + AHCI_SNOTIF_CLEAR_ALL); + + if (ahci_portp->ahciport_pmult_info->ahcipmi_snotif_tags == 0) { + mutex_exit(&ahci_portp->ahciport_mutex); + return (AHCI_SUCCESS); + } + + /* Port Multiplier sub-device hot-plug handler */ + if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { + mutex_exit(&ahci_portp->ahciport_mutex); + return (AHCI_SUCCESS); + } + + if (ahci_portp->ahciport_flags & AHCI_PORT_FLAG_PMULT_SNTF) { + /* Not allowed to re-enter. */ + mutex_exit(&ahci_portp->ahciport_mutex); + return (AHCI_SUCCESS); + } + + ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_PMULT_SNTF; + + /* + * NOTE: + * Even if Asynchronous Notification is supported (and enabled) by + * both controller and the port multiplier, the content of PxSNTF + * register is always set to 0x8000 by async notification event. We + * need to check GSCR[32] on the port multiplier to find out the + * owner of this event. + * This is not accord with SATA spec 2.6 and needs further + * clarification. + */ + /* hot-plug will not reported while reseting. */ + if (ahci_portp->ahciport_reset_in_progress == 1) { + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "port %d snotif event ignored", port); + ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_PMULT_SNTF; + mutex_exit(&ahci_portp->ahciport_mutex); + return (AHCI_SUCCESS); + } + + AHCIDBG(AHCIDBG_INFO|AHCIDBG_PMULT, ahci_ctlp, + "PxSNTF is set to 0x%x by port multiplier", + ahci_portp->ahciport_pmult_info->ahcipmi_snotif_tags); + + /* + * Now we need do some necessary operation and inform SATA framework + * that link/device events has happened. + */ + bzero((void *)&sdevice, sizeof (sata_device_t)); + sdevice.satadev_addr.cport = ahci_ctlp-> + ahcictl_port_to_cport[port]; + sdevice.satadev_addr.pmport = SATA_PMULT_HOSTPORT; + sdevice.satadev_addr.qual = SATA_ADDR_PMULT; + sdevice.satadev_state = SATA_PSTATE_PWRON; + + /* Just reject packets, do not stop that port. */ + ahci_reject_all_abort_pkts(ahci_ctlp, ahci_portp, port); + + mutex_exit(&ahci_portp->ahciport_mutex); + sata_hba_event_notify( + ahci_ctlp->ahcictl_sata_hba_tran->sata_tran_hba_dip, + &sdevice, + SATA_EVNT_PMULT_LINK_CHANGED); + mutex_enter(&ahci_portp->ahciport_mutex); + + ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_PMULT_SNTF; + mutex_exit(&ahci_portp->ahciport_mutex); + + return (AHCI_SUCCESS); +} + +/* * 1=Change in Current Connect Status. 0=No change in Current Connect Status. * This bit reflects the state of PxSERR.DIAG.X. This bit is only cleared * when PxSERR.DIAG.X is cleared. When PxSERR.DIAG.X is set to one, it @@ -5169,6 +6919,7 @@ sata_device_t sdevice; int dev_exists_now = 0; int dev_existed_previously = 0; + ahci_addr_t port_addr; AHCIDBG(AHCIDBG_INTR|AHCIDBG_ENTRY, ahci_ctlp, "ahci_intr_phyrdy_change enter, port %d", port); @@ -5219,8 +6970,11 @@ sdevice.satadev_state = SATA_PSTATE_PWRON; ahci_portp->ahciport_port_state = SATA_PSTATE_PWRON; + AHCI_ADDR_SET_PORT(&port_addr, port); + + ahci_portp->ahciport_flags |= AHCI_PORT_FLAG_HOTPLUG; if (dev_exists_now) { - if (dev_existed_previously) { + if (dev_existed_previously) { /* 1 -> 1 */ /* Things are fine now. The loss was temporary. */ AHCIDBG(AHCIDBG_EVENT, ahci_ctlp, "ahci_intr_phyrdy_change port %d " @@ -5233,14 +6987,18 @@ SATA_EVNT_LINK_LOST|SATA_EVNT_LINK_ESTABLISHED); mutex_enter(&ahci_portp->ahciport_mutex); - } else { + } else { /* 0 -> 1 */ AHCIDBG(AHCIDBG_EVENT, ahci_ctlp, "ahci_intr_phyrdy_change: port %d " "device link established", port); - /* A new device has been detected. */ - (void) ahci_port_reset(ahci_ctlp, ahci_portp, port); - ahci_find_dev_signature(ahci_ctlp, ahci_portp, port); + /* + * A new device has been detected. The new device + * might be a port multiplier instead of a drive, so + * we cannot update the signature directly. + */ + (void) ahci_initialize_port(ahci_ctlp, + ahci_portp, &port_addr); /* Try to start the port */ if (ahci_start_port(ahci_ctlp, ahci_portp, port) @@ -5264,7 +7022,7 @@ } } else { /* No device exists now */ - if (dev_existed_previously) { + if (dev_existed_previously) { /* 1 -> 0 */ AHCIDBG(AHCIDBG_EVENT, ahci_ctlp, "ahci_intr_phyrdy_change: port %d " "device link lost", port); @@ -5273,8 +7031,14 @@ (void) ahci_put_port_into_notrunning_state(ahci_ctlp, ahci_portp, port); + if (ahci_portp->ahciport_device_type == + SATA_DTYPE_PMULT) { + ahci_dealloc_pmult(ahci_ctlp, ahci_portp); + } + /* An existing device is lost. */ ahci_portp->ahciport_device_type = SATA_DTYPE_NONE; + ahci_portp->ahciport_port_state = SATA_STATE_UNKNOWN; mutex_exit(&ahci_portp->ahciport_mutex); sata_hba_event_notify( @@ -5284,6 +7048,7 @@ mutex_enter(&ahci_portp->ahciport_mutex); } } + ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_HOTPLUG; mutex_exit(&ahci_portp->ahciport_mutex); @@ -5301,6 +7066,14 @@ * won't be posted to memory and PxSERR.ERR.P will be set, which is then * a fatal error. * + * PxIS.IPMS - Incorrect Port Multiplier Status + * + * IPMS Indicates that the HBA received a FIS from a device that did not + * have a command outstanding. The IPMS bit may be set during enumeration + * of devices on a Port Multiplier due to the normal Port Multiplier + * enumeration process. It is recommended that IPMS only be used after + * enumeration is complete on the Port Multiplier (copied from spec). + * * PxIS.OFS - Overflow Error * * Command list overflow is defined as software building a command table @@ -5350,7 +7123,7 @@ AHCIDBG(AHCIDBG_INTR|AHCIDBG_ENTRY|AHCIDBG_ERRS, ahci_ctlp, "ahci_intr_non_fatal_error: port %d, " - "port_serror = 0x%x", port, port_serror); + "PxSERR = 0x%x, PxIS = 0x%x ", port, port_serror, intr_status); ahci_log_serror_message(ahci_ctlp, port, port_serror, 1); @@ -5365,6 +7138,11 @@ } #if AHCI_DEBUG + if (intr_status & AHCI_INTR_STATUS_IPMS) { + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci port %d " + "has Incorrect Port Multiplier Status error", port); + } + if (intr_status & AHCI_INTR_STATUS_OFS) { AHCIDBG(AHCIDBG_INTR|AHCIDBG_ERRS, ahci_ctlp, "ahci port %d has overflow error", port); @@ -5581,6 +7359,10 @@ "task_file_status = 0x%x", port, task_file_status); } + /* + * Here we just log the fatal error info in interrupt context. + * Misc recovery processing will be handled in task queue. + */ if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) { /* * Read PxCMD.CCS to determine the slot that the HBA @@ -5596,6 +7378,7 @@ "ahci_intr_fatal_error: spkt 0x%p is being processed when " "fatal error occurred for port %d", spkt, port); + /* A Task File Data error. */ if (intr_status & AHCI_INTR_STATUS_TFES) { err_byte = (task_file_status & AHCI_TFD_ERR_MASK) >> AHCI_TFD_ERR_SHIFT; @@ -5640,6 +7423,7 @@ args->ahciea_ctlp = (void *)ahci_ctlp; args->ahciea_portp = (void *)ahci_portp; args->ahciea_event = intr_status; + AHCI_ADDR_SET_PORT((ahci_addr_t *)args->ahciea_addrp, port); /* Start the taskq to handle error recovery */ if ((ddi_taskq_dispatch(ahci_portp->ahciport_event_taskq, @@ -6129,7 +7913,7 @@ } /* Wait for 10 millisec */ - drv_usecwait(10000); + drv_usecwait(AHCI_10MS_USECS); } while (port_cmd_status & AHCI_CMD_STATUS_CR); ahci_portp->ahciport_flags &= ~AHCI_PORT_FLAG_STARTED; @@ -6160,6 +7944,7 @@ * the process. * * The routine will be called under following scenarios: + * + To reset the HBA * + To abort the packet(s) * + To reset the port * + To activate the port @@ -6190,12 +7975,19 @@ uint32_t task_file_status; sata_device_t sdevice; int rval; + ahci_addr_t addr_port; + ahci_pmult_info_t *pminfo = NULL; int dev_exists_begin = 0; int dev_exists_end = 0; + uint32_t previous_dev_type = ahci_portp->ahciport_device_type; + int npmport = 0; + uint8_t cport = ahci_ctlp->ahcictl_port_to_cport[port]; AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, "ahci_restart_port_wait_till_ready: port %d enter", port); + AHCI_ADDR_SET_PORT(&addr_port, port); + if (ahci_portp->ahciport_device_type != SATA_DTYPE_NONE) dev_exists_begin = 1; @@ -6221,7 +8013,7 @@ /* * Check whether the device is in a stable status, if yes, - * then start the port directly. However for ahci_tran_dport_reset, + * then start the port directly. However for ahci_tran_reset_dport, * we may have to perform a port reset. */ if (!(task_file_status & (AHCI_TFD_STS_BSY | AHCI_TFD_STS_DRQ)) && @@ -6233,13 +8025,23 @@ * If PxTFD.STS.BSY or PxTFD.STS.DRQ is set to '1', then issue * a COMRESET to the device */ - rval = ahci_port_reset(ahci_ctlp, ahci_portp, port); + ahci_disable_port_intrs(ahci_ctlp, port); + rval = ahci_port_reset(ahci_ctlp, ahci_portp, &addr_port); + ahci_enable_port_intrs(ahci_ctlp, port); + +#ifdef AHCI_DEBUG + if (rval != AHCI_SUCCESS) + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_restart_port_wait_till_ready: port %d failed", + port); +#endif if (reset_flag != NULL) *reset_flag = 1; /* Indicate to the framework that a reset has happened. */ if ((ahci_portp->ahciport_device_type != SATA_DTYPE_NONE) && + (ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) && !(flag & AHCI_RESET_NO_EVENTS_UP)) { /* Set the reset in progress flag */ ahci_portp->ahciport_reset_in_progress = 1; @@ -6262,7 +8064,7 @@ } AHCIDBG(AHCIDBG_EVENT, ahci_ctlp, - "port %d sending event up: SATA_EVNT_RESET", port); + "port %d sending event up: SATA_EVNT_DEVICE_RESET", port); } else { ahci_portp->ahciport_reset_in_progress = 0; } @@ -6276,11 +8078,13 @@ if (SSTATUS_GET_DET(port_sstatus) == SSTATUS_DET_DEVPRE_PHYCOM) { dev_exists_end = 1; - ASSERT(ahci_portp->ahciport_device_type != SATA_DTYPE_NONE); - } + } + + if (dev_exists_begin == 0 && dev_exists_end == 0) /* 0 -> 0 */ + return (rval); /* Check whether a hot plug event happened */ - if (dev_exists_begin == 1 && dev_exists_end == 0) { + if (dev_exists_begin == 1 && dev_exists_end == 0) { /* 1 -> 0 */ AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, "ahci_restart_port_wait_till_ready: port %d " "device is removed", port); @@ -6291,7 +8095,124 @@ mutex_exit(&ahci_portp->ahciport_mutex); (void) ahci_intr_phyrdy_change(ahci_ctlp, ahci_portp, port); mutex_enter(&ahci_portp->ahciport_mutex); - } + + return (rval); + } + + + /* 0/1 -> 1 : device may change */ + /* + * May be called by ahci_fatal_error_recovery_handler, so + * don't issue software if the previous device is ATAPI. + */ + if (ahci_portp->ahciport_device_type == SATA_DTYPE_ATAPI) + return (rval); + + /* + * The COMRESET will make port multiplier enter legacy mode. + * Issue a software reset to make it work again. + */ + ahci_find_dev_signature(ahci_ctlp, ahci_portp, &addr_port); + + /* + * Following codes are specific for the port multiplier + */ + if (previous_dev_type != SATA_DTYPE_PMULT && + ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) { + /* in case previous_dev_type is corrupt */ + ahci_dealloc_pmult(ahci_ctlp, ahci_portp); + (void) ahci_start_port(ahci_ctlp, ahci_portp, port); + return (rval); + } + + /* Device change: PMult -> Non-PMult */ + if (previous_dev_type == SATA_DTYPE_PMULT && + ahci_portp->ahciport_device_type != SATA_DTYPE_PMULT) { + /* + * This might happen because + * 1. Software reset failed. Port multiplier is not correctly + * enumerated. + * 2. Another non-port-multiplier device is attached. Perhaps + * the port multiplier was replaced by another device by + * whatever reason, but AHCI driver missed hot-plug event. + * + * Now that the port has been initialized, we just need to + * update the port structure according new device, then report + * and wait SATA framework to probe new device. + */ + + /* Force to release pmult resource */ + ahci_dealloc_pmult(ahci_ctlp, ahci_portp); + (void) ahci_start_port(ahci_ctlp, ahci_portp, port); + + bzero((void *)&sdevice, sizeof (sata_device_t)); + sdevice.satadev_addr.cport = + ahci_ctlp->ahcictl_port_to_cport[port]; + sdevice.satadev_addr.pmport = 0; + sdevice.satadev_addr.qual = SATA_ADDR_DCPORT; + + sdevice.satadev_state = SATA_DSTATE_RESET | + SATA_DSTATE_PWR_ACTIVE; + + mutex_exit(&ahci_portp->ahciport_mutex); + sata_hba_event_notify( + ahci_ctlp->ahcictl_dip, + &sdevice, + SATA_EVNT_DEVICE_RESET); + mutex_enter(&ahci_portp->ahciport_mutex); + + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "Port multiplier is [Gone] at port %d ", port); + AHCIDBG(AHCIDBG_EVENT, ahci_ctlp, + "port %d sending event up: SATA_EVNT_DEVICE_RESET", port); + + return (AHCI_SUCCESS); + } + + /* Device change: Non-PMult -> PMult */ + if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) { + + /* NOTE: The PxCMD.PMA may be cleared by HBA reset. */ + ahci_alloc_pmult(ahci_ctlp, ahci_portp); + + (void) ahci_start_port(ahci_ctlp, ahci_portp, port); + } + pminfo = ahci_portp->ahciport_pmult_info; + ASSERT(pminfo != NULL); + + /* Device (may) change: PMult -> PMult */ + /* + * First initialize port multiplier. Set state to READY and wait for + * probe entry point to initialize it + */ + ahci_portp->ahciport_port_state = SATA_STATE_READY; + + /* + * It's a little complicated while target is a port multiplier. we + * need to COMRESET all pmports behind that PMult otherwise those + * sub-links between the PMult and the sub-devices will be in an + * inactive state (indicated by PSCR0/PxSSTS) and the following access + * to those sub-devices will be rejected by Link-Fatal-Error. + */ + /* + * The PxSNTF will be set soon after the pmult is plugged. While the + * pmult itself is attaching, sata_hba_event_notfiy will fail. so we + * simply mark every sub-port as 'unknown', then ahci_probe_pmport + * will initialized it. + */ + for (npmport = 0; npmport < pminfo->ahcipmi_num_dev_ports; npmport++) + pminfo->ahcipmi_port_state[npmport] = SATA_STATE_UNKNOWN; + + /* Report reset event. */ + ahci_portp->ahciport_reset_in_progress = 1; + + bzero((void *)&sdevice, sizeof (sata_device_t)); + sdevice.satadev_addr.cport = cport; + sdevice.satadev_addr.pmport = SATA_PMULT_HOSTPORT; + sdevice.satadev_addr.qual = SATA_ADDR_PMULT; + sdevice.satadev_state = SATA_DSTATE_RESET | SATA_DSTATE_PWR_ACTIVE; + sata_hba_event_notify(ahci_ctlp->ahcictl_dip, &sdevice, + SATA_EVNT_DEVICE_RESET); return (rval); } @@ -6323,6 +8244,7 @@ sata_pkt_t *satapkt; int ncq_cmd_in_progress = 0; int err_retri_cmd_in_progress = 0; + int rdwr_pmult_cmd_in_progress = 0; AHCIDBG(AHCIDBG_ERRS|AHCIDBG_ENTRY, ahci_ctlp, "ahci_mop_commands entered: port: %d slot_status: 0x%x", @@ -6334,6 +8256,25 @@ "reset_tags: 0x%x", failed_tags, timeout_tags, aborted_tags, reset_tags); +#ifdef AHCI_DEBUG + if (ahci_debug_flags & AHCIDBG_ERRS) { + int i; + char msg_buf[200] = {0, }; + for (i = 0x1f; i >= 0; i--) { + if (ahci_portp->ahciport_slot_pkts[i] != NULL) + msg_buf[i] = 'X'; + else + msg_buf[i] = '.'; + } + msg_buf[0x20] = '\0'; + cmn_err(CE_NOTE, "port[%d] slots: %s", + ahci_portp->ahciport_port_num, msg_buf); + cmn_err(CE_NOTE, "[ERR-RT] %p [RW-PM] %p ", + (void *)ahci_portp->ahciport_err_retri_pkt, + (void *)ahci_portp->ahciport_rdwr_pmult_pkt); + } +#endif + if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) { finished_tags = ahci_portp->ahciport_pending_tags & ~slot_status & AHCI_SLOT_MASK(ahci_ctlp); @@ -6378,7 +8319,21 @@ ahci_portp->ahciport_port_num, slot_status); ASSERT(ahci_portp->ahciport_mop_in_progress > 1); ASSERT(slot_status == 0x1); - } + } else if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { + rdwr_pmult_cmd_in_progress = 1; + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_PMULT, ahci_ctlp, + "ahci_mop_commands is called for port %d while " + "READ/WRITE PORTMULT command is being executed", + ahci_portp->ahciport_port_num); + + ASSERT(slot_status == 0x1); + } + +#ifdef AHCI_DEBUG + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_ENTRY, ahci_ctlp, + "ahci_mop_commands: finished_tags: 0x%x, " + "unfinished_tags 0x%x", finished_tags, unfinished_tags); +#endif /* Send up finished packets with SATA_PKT_COMPLETED */ while (finished_tags) { @@ -6428,6 +8383,16 @@ SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_DEV_ERROR); break; } + if (rdwr_pmult_cmd_in_progress) { + satapkt = ahci_portp->ahciport_rdwr_pmult_pkt; + ASSERT(satapkt != NULL); + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_mop_commands: sending up " + "rdwr pmult pkt 0x%p with SATA_PKT_DEV_ERROR", + (void *)satapkt); + SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_DEV_ERROR); + break; + } tmp_slot = ddi_ffs(failed_tags) - 1; if (tmp_slot == -1) { @@ -6464,6 +8429,16 @@ SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_TIMEOUT); break; } + if (rdwr_pmult_cmd_in_progress) { + satapkt = ahci_portp->ahciport_rdwr_pmult_pkt; + ASSERT(satapkt != NULL); + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_mop_commands: sending up " + "rdwr pmult pkt 0x%p with SATA_PKT_TIMEOUT", + (void *)satapkt); + SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_TIMEOUT); + break; + } tmp_slot = ddi_ffs(timeout_tags) - 1; if (tmp_slot == -1) { @@ -6500,6 +8475,17 @@ SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_ABORTED); break; } + if (rdwr_pmult_cmd_in_progress) { + satapkt = ahci_portp->ahciport_rdwr_pmult_pkt; + ASSERT(satapkt != NULL); + ASSERT(aborted_tags == 0x1); + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_mop_commands: sending up " + "rdwr pmult pkt 0x%p with SATA_PKT_ABORTED", + (void *)satapkt); + SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_ABORTED); + break; + } tmp_slot = ddi_ffs(aborted_tags) - 1; if (tmp_slot == -1) { @@ -6525,6 +8511,18 @@ /* Send up reset packets with SATA_PKT_RESET. */ while (reset_tags) { + if (rdwr_pmult_cmd_in_progress) { + satapkt = ahci_portp->ahciport_rdwr_pmult_pkt; + ASSERT(satapkt != NULL); + ASSERT(aborted_tags == 0x1); + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "ahci_mop_commands: sending up " + "rdwr pmult pkt 0x%p with SATA_PKT_RESET", + (void *)satapkt); + SENDUP_PACKET(ahci_portp, satapkt, SATA_PKT_RESET); + break; + } + tmp_slot = ddi_ffs(reset_tags) - 1; if (tmp_slot == -1) { break; @@ -6590,6 +8588,7 @@ sata_device_t sdevice; sata_pkt_t *rdlog_spkt, *spkt; ddi_dma_handle_t buf_dma_handle; + ahci_addr_t addr; int loop_count; int rval; int failed_slot; @@ -6606,6 +8605,9 @@ sdevice.satadev_addr.qual = SATA_ADDR_DCPORT; sdevice.satadev_addr.pmport = 0; + /* Translate sata_device.satadev_addr -> ahci_addr */ + ahci_get_ahci_addr(ahci_ctlp, &sdevice, &addr); + /* * Call the sata hba interface to get a rdlog spkt */ @@ -6616,7 +8618,7 @@ if (rdlog_spkt == NULL) { if (loop_count++ < AHCI_POLLRATE_GET_SPKT) { /* Sleep for a while */ - delay(AHCI_10MS_TICKS); + drv_usecwait(AHCI_10MS_USECS); goto loop; } /* Timed out after 1s */ @@ -6638,7 +8640,7 @@ * the whole command list is empty. */ ahci_portp->ahciport_err_retri_pkt = rdlog_spkt; - (void) ahci_do_sync_start(ahci_ctlp, ahci_portp, port, rdlog_spkt); + (void) ahci_do_sync_start(ahci_ctlp, ahci_portp, &addr, rdlog_spkt); ahci_portp->ahciport_err_retri_pkt = NULL; /* Remove the flag after READ LOG EXT command is completed */ @@ -6706,6 +8708,7 @@ sata_pkt_t *rs_spkt; sata_cmd_t *sata_cmd; ddi_dma_handle_t buf_dma_handle; + ahci_addr_t addr; int loop_count; #if AHCI_DEBUG struct scsi_extended_sense *rqsense; @@ -6721,6 +8724,9 @@ sdevice.satadev_addr.qual = SATA_ADDR_DCPORT; sdevice.satadev_addr.pmport = 0; + /* Translate sata_device.satadev_addr -> ahci_addr */ + ahci_get_ahci_addr(ahci_ctlp, &sdevice, &addr); + sata_cmd = &spkt->satapkt_cmd; /* @@ -6733,7 +8739,7 @@ if (rs_spkt == NULL) { if (loop_count++ < AHCI_POLLRATE_GET_SPKT) { /* Sleep for a while */ - delay(AHCI_10MS_TICKS); + drv_usecwait(AHCI_10MS_USECS); goto loop; } @@ -6756,7 +8762,7 @@ * the whole command list is empty. */ ahci_portp->ahciport_err_retri_pkt = rs_spkt; - (void) ahci_do_sync_start(ahci_ctlp, ahci_portp, port, rs_spkt); + (void) ahci_do_sync_start(ahci_ctlp, ahci_portp, &addr, rs_spkt); ahci_portp->ahciport_err_retri_pkt = NULL; /* Remove the flag after REQUEST SENSE command is completed */ @@ -6822,23 +8828,34 @@ */ static void ahci_fatal_error_recovery_handler(ahci_ctl_t *ahci_ctlp, - ahci_port_t *ahci_portp, uint8_t port, uint32_t intr_status) + ahci_port_t *ahci_portp, ahci_addr_t *addrp, uint32_t intr_status) { uint32_t port_cmd_status; uint32_t slot_status = 0; uint32_t failed_tags = 0; int failed_slot; - int reset_flag = 0; + int reset_flag = 0, flag = 0; ahci_fis_d2h_register_t *ahci_rcvd_fisp; sata_cmd_t *sata_cmd = NULL; sata_pkt_t *spkt = NULL; #if AHCI_DEBUG ahci_cmd_header_t *cmd_header; #endif + uint8_t port = addrp->aa_port; AHCIDBG(AHCIDBG_ENTRY, ahci_ctlp, "ahci_fatal_error_recovery_handler enter: port %d", port); + /* Port multiplier error */ + if (ahci_portp->ahciport_device_type == SATA_DTYPE_PMULT) { + /* FBS code is neither completed nor tested. */ + ahci_pmult_error_recovery_handler(ahci_ctlp, ahci_portp, + port, intr_status); + + /* Force a port reset */ + flag = AHCI_PORT_RESET; + } + if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp) || ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) { @@ -6934,7 +8951,7 @@ ahci_portp->ahciport_mop_in_progress++; (void) ahci_restart_port_wait_till_ready(ahci_ctlp, ahci_portp, - port, NULL, &reset_flag); + port, flag, &reset_flag); /* * Won't retrieve error information: @@ -6985,6 +9002,97 @@ } /* + * Used to recovery a PMULT pmport fatal error under FIS-based switching. + * 1. device specific.PxFBS.SDE=1 + * 2. Non-Deivce specific. + * Nothing will be done when Command-based switching is employed. + * + * Currently code is neither completed nor tested. + * + * WARNING!!! ahciport_mutex should be acquired before the function + * is called. + */ +static void +ahci_pmult_error_recovery_handler(ahci_ctl_t *ahci_ctlp, + ahci_port_t *ahci_portp, uint8_t port, uint32_t intr_status) +{ +#ifndef __lock_lint + _NOTE(ARGUNUSED(intr_status)) +#endif + uint32_t port_fbs_ctrl; + int loop_count = 0; + ahci_addr_t addr; + + /* Nothing will be done under Command-based switching. */ + if (!(ahci_ctlp->ahcictl_cap & AHCI_CAP_PMULT_FBSS)) + return; + + port_fbs_ctrl = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxFBS(ahci_ctlp, port)); + + if (!(port_fbs_ctrl & AHCI_FBS_EN)) + /* FBS is not enabled. */ + return; + + /* Problem's getting complicated now. */ + /* + * If FIS-based switching is used, we need to check + * the PxFBS to see the error type. + */ + port_fbs_ctrl = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxFBS(ahci_ctlp, port)); + + /* Refer to spec(v1.2) 9.3.6.1 */ + if (port_fbs_ctrl & AHCI_FBS_SDE) { + AHCIDBG(AHCIDBG_ERRS, ahci_ctlp, + "A Device Sepcific Error: port %d", port); + /* + * Controller has paused commands for all other + * sub-devices until PxFBS.DEC is set. + */ + ahci_reject_all_abort_pkts(ahci_ctlp, + ahci_portp, 0); + + ddi_put32(ahci_ctlp->ahcictl_ahci_acc_handle, + (uint32_t *)AHCI_PORT_PxFBS(ahci_ctlp, port), + port_fbs_ctrl | AHCI_FBS_DEC); + + /* + * Wait controller clear PxFBS.DEC, + * then we can continue. + */ + loop_count = 0; + do { + port_fbs_ctrl = ddi_get32(ahci_ctlp-> + ahcictl_ahci_acc_handle, (uint32_t *) + AHCI_PORT_PxFBS(ahci_ctlp, port)); + + if (loop_count++ > 1000) + /* + * Esclate the error. Follow + * non-device specific error + * procedure. + */ + return; + + drv_usecwait(AHCI_100US_USECS); + } while (port_fbs_ctrl & AHCI_FBS_DEC); + + /* + * Issue a software reset to ensure drive is in + * a known state. + */ + (void) ahci_software_reset(ahci_ctlp, + ahci_portp, &addr); + + } else { + + /* Process Non-Device Specific Error. */ + /* This will be handled later on. */ + cmn_err(CE_NOTE, "!FBS is not supported now."); + } +} +/* * Handle events - fatal error recovery */ static void @@ -6993,19 +9101,19 @@ ahci_event_arg_t *ahci_event_arg; ahci_ctl_t *ahci_ctlp; ahci_port_t *ahci_portp; + ahci_addr_t *addrp; uint32_t event; - uint8_t port; ahci_event_arg = (ahci_event_arg_t *)args; ahci_ctlp = ahci_event_arg->ahciea_ctlp; ahci_portp = ahci_event_arg->ahciea_portp; + addrp = ahci_event_arg->ahciea_addrp; event = ahci_event_arg->ahciea_event; - port = ahci_portp->ahciport_port_num; AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR|AHCIDBG_ERRS, ahci_ctlp, "ahci_events_handler enter: port %d intr_status = 0x%x", - port, event); + ahci_portp->ahciport_port_num, event); mutex_enter(&ahci_portp->ahciport_mutex); @@ -7016,7 +9124,8 @@ if (ahci_portp->ahciport_device_type == SATA_DTYPE_NONE) { AHCIDBG(AHCIDBG_ENTRY|AHCIDBG_INTR, ahci_ctlp, "ahci_events_handler: port %d no device attached, " - "and just return without doing anything", port); + "and just return without doing anything", + ahci_portp->ahciport_port_num); goto out; } @@ -7025,7 +9134,7 @@ AHCI_INTR_STATUS_HBFS | AHCI_INTR_STATUS_TFES)) ahci_fatal_error_recovery_handler(ahci_ctlp, ahci_portp, - port, event); + addrp, event); out: mutex_exit(&ahci_portp->ahciport_mutex); @@ -7049,6 +9158,7 @@ mutex_enter(&ahci_portp->ahciport_mutex); if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp) || + RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp) || ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) { /* Read PxCI to see which commands are still outstanding */ slot_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, @@ -7077,6 +9187,12 @@ ahci_portp->ahciport_pending_tags, ahci_portp->ahciport_pending_ncq_tags); ASSERT(slot_status == 0x1); + } else if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { + AHCIDBG(AHCIDBG_ERRS|AHCIDBG_TIMEOUT, ahci_ctlp, + "ahci_timeout_pkts called while executing R/W PMULT " + "command timeout_tags = 0x%x, slot_status = 0x%x", + tmp_timeout_tags, slot_status); + ASSERT(slot_status == 0x1); } #endif @@ -7084,7 +9200,7 @@ ahci_portp->ahciport_mop_in_progress++; (void) ahci_restart_port_wait_till_ready(ahci_ctlp, ahci_portp, - port, NULL, NULL); + port, AHCI_PORT_RESET, NULL); /* * Re-identify timeout tags because some previously checked commands @@ -7112,17 +9228,18 @@ "pending_ncq_tags = 0x%x ", port, finished_tags, timeout_tags, slot_status, ahci_portp->ahciport_pending_ncq_tags); - } else if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) { + } else if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) || + RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { timeout_tags = tmp_timeout_tags; } ahci_mop_commands(ahci_ctlp, ahci_portp, slot_status, - 0, /* failed tags */ - timeout_tags, /* timeout tags */ - 0, /* aborted tags */ - 0); /* reset tags */ + 0, /* failed tags */ + timeout_tags, /* timeout tags */ + 0, /* aborted tags */ + 0); /* reset tags */ mutex_exit(&ahci_portp->ahciport_mutex); } @@ -7180,7 +9297,8 @@ port_cmd_status = ddi_get32(ahci_ctlp->ahcictl_ahci_acc_handle, (uint32_t *)AHCI_PORT_PxCMD(ahci_ctlp, port)); - if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) { + if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) || + RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) { current_slot = 0; pending_tags = 0x1; } else if (NON_NCQ_CMD_IN_PROGRESS(ahci_portp)) { @@ -7207,6 +9325,8 @@ if (ERR_RETRI_CMD_IN_PROGRESS(ahci_portp)) spkt = ahci_portp->ahciport_err_retri_pkt; + else if (RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp)) + spkt = ahci_portp->ahciport_rdwr_pmult_pkt; else spkt = ahci_portp->ahciport_slot_pkts[tmp_slot]; @@ -7497,6 +9617,39 @@ } /* + * Translate the sata_address_t type into the ahci_addr_t type. + * sata_device.satadev_addr structure is used as source. + */ +static void +ahci_get_ahci_addr(ahci_ctl_t *ahci_ctlp, sata_device_t *sd, + ahci_addr_t *ahci_addrp) +{ + sata_address_t *sata_addrp = &sd->satadev_addr; + ahci_addrp->aa_port = + ahci_ctlp->ahcictl_cport_to_port[sata_addrp->cport]; + ahci_addrp->aa_pmport = sata_addrp->pmport; + + switch (sata_addrp->qual) { + case SATA_ADDR_DCPORT: + case SATA_ADDR_CPORT: + ahci_addrp->aa_qual = AHCI_ADDR_PORT; + break; + case SATA_ADDR_PMULT: + ahci_addrp->aa_qual = AHCI_ADDR_PMULT; + break; + case SATA_ADDR_DPMPORT: + case SATA_ADDR_PMPORT: + ahci_addrp->aa_qual = AHCI_ADDR_PMPORT; + break; + case SATA_ADDR_NULL: + default: + /* something went wrong */ + ahci_addrp->aa_qual = AHCI_ADDR_NULL; + break; + } +} + +/* * This routine is to calculate the total number of ports implemented * by the HBA. */
--- a/usr/src/uts/common/io/sata/impl/sata.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/sata/impl/sata.c Mon Aug 17 09:14:35 2009 -0400 @@ -50,6 +50,7 @@ #include <sys/sata/sata_hba.h> #include <sys/sata/sata_defs.h> #include <sys/sata/sata_cfgadm.h> +#include <sys/sata/sata_blacklist.h> /* Debug flags - defined in sata.h */ int sata_debug_flags = 0; @@ -61,6 +62,7 @@ #define SATA_ENABLE_QUEUING 1 #define SATA_ENABLE_NCQ 2 #define SATA_ENABLE_PROCESS_EVENTS 4 +#define SATA_ENABLE_PMULT_FBS 8 /* FIS-Based Switching */ int sata_func_enable = SATA_ENABLE_PROCESS_EVENTS | SATA_ENABLE_QUEUING | SATA_ENABLE_NCQ; @@ -129,7 +131,7 @@ #define LEGACY_HWID_LEN 64 /* Model (40) + Serial (20) + pad */ -static char sata_rev_tag[] = {"1.43"}; +static char sata_rev_tag[] = {"1.44"}; /* * SATA cb_ops functions @@ -167,13 +169,21 @@ static void sata_event_daemon(void *); static void sata_event_thread_control(int); static void sata_process_controller_events(sata_hba_inst_t *sata_hba_inst); +static void sata_process_pmult_events(sata_hba_inst_t *, uint8_t); static void sata_process_device_reset(sata_hba_inst_t *, sata_address_t *); +static void sata_process_pmdevice_reset(sata_hba_inst_t *, sata_address_t *); static void sata_process_port_failed_event(sata_hba_inst_t *, sata_address_t *); static void sata_process_port_link_events(sata_hba_inst_t *, sata_address_t *); +static void sata_process_pmport_link_events(sata_hba_inst_t *, + sata_address_t *); static void sata_process_device_detached(sata_hba_inst_t *, sata_address_t *); +static void sata_process_pmdevice_detached(sata_hba_inst_t *, + sata_address_t *); static void sata_process_device_attached(sata_hba_inst_t *, sata_address_t *); +static void sata_process_pmdevice_attached(sata_hba_inst_t *, + sata_address_t *); static void sata_process_port_pwr_change(sata_hba_inst_t *, sata_address_t *); static void sata_process_cntrl_pwr_level_change(sata_hba_inst_t *); static void sata_process_target_node_cleanup(sata_hba_inst_t *, @@ -220,7 +230,7 @@ static int32_t sata_get_port_num(sata_hba_inst_t *, struct devctl_iocdata *); static void sata_cfgadm_state(sata_hba_inst_t *, int32_t, devctl_ap_state_t *); -static dev_info_t *sata_get_target_dip(dev_info_t *, int32_t); +static dev_info_t *sata_get_target_dip(dev_info_t *, uint8_t, uint8_t); static dev_info_t *sata_get_scsi_target_dip(dev_info_t *, sata_address_t *); static dev_info_t *sata_devt_to_devinfo(dev_t); static int sata_ioctl_connect(sata_hba_inst_t *, sata_device_t *); @@ -250,11 +260,19 @@ static void sata_remove_hba_instance(dev_info_t *); static int sata_validate_sata_hba_tran(dev_info_t *, sata_hba_tran_t *); static void sata_probe_ports(sata_hba_inst_t *); +static void sata_probe_pmports(sata_hba_inst_t *, uint8_t); static int sata_reprobe_port(sata_hba_inst_t *, sata_device_t *, int); -static int sata_add_device(dev_info_t *, sata_hba_inst_t *, int cport, - int pmport); +static int sata_reprobe_pmult(sata_hba_inst_t *, sata_device_t *, int); +static int sata_reprobe_pmport(sata_hba_inst_t *, sata_device_t *, int); +static void sata_alloc_pmult(sata_hba_inst_t *, sata_device_t *); +static void sata_free_pmult(sata_hba_inst_t *, sata_device_t *); +static int sata_add_device(dev_info_t *, sata_hba_inst_t *, sata_device_t *); +static int sata_offline_device(sata_hba_inst_t *, sata_device_t *, + sata_drive_info_t *); static dev_info_t *sata_create_target_node(dev_info_t *, sata_hba_inst_t *, sata_address_t *); +static void sata_remove_target_node(sata_hba_inst_t *, + sata_address_t *); static int sata_validate_scsi_address(sata_hba_inst_t *, struct scsi_address *, sata_device_t *); static int sata_validate_sata_address(sata_hba_inst_t *, int, int, int); @@ -276,6 +294,7 @@ static int sata_fetch_device_identify_data(sata_hba_inst_t *, sata_drive_info_t *); static void sata_update_port_info(sata_hba_inst_t *, sata_device_t *); +static void sata_update_pmport_info(sata_hba_inst_t *, sata_device_t *); static void sata_update_port_scr(sata_port_scr_t *, sata_device_t *); static int sata_set_dma_mode(sata_hba_inst_t *, sata_drive_info_t *); static int sata_set_cache_mode(sata_hba_inst_t *, sata_drive_info_t *, int); @@ -319,6 +338,7 @@ static void sata_save_drive_settings(sata_drive_info_t *); static void sata_show_drive_info(sata_hba_inst_t *, sata_drive_info_t *); +static void sata_show_pmult_info(sata_hba_inst_t *, sata_device_t *); static void sata_log(sata_hba_inst_t *, uint_t, char *fmt, ...); static void sata_trace_log(sata_hba_inst_t *, uint_t, const char *fmt, ...); static int sata_fetch_smart_return_status(sata_hba_inst_t *, @@ -527,7 +547,7 @@ _NOTE(SCHEME_PROTECTS_DATA("No Mutex Needed", sata_hba_inst::satahba_dip)) _NOTE(SCHEME_PROTECTS_DATA("Scheme", sata_hba_inst::satahba_attached)) _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_hba_inst::satahba_dev_port)) -_NOTE(MUTEX_PROTECTS_DATA(sata_hba_inst::satahba_mutex, +_NOTE(MUTEX_PROTECTS_DATA(sata_hba_inst::satahba_mutex, sata_hba_inst::satahba_event_flags)) _NOTE(MUTEX_PROTECTS_DATA(sata_cport_info::cport_mutex, \ sata_cport_info::cport_devp)) @@ -539,10 +559,18 @@ _NOTE(MUTEX_PROTECTS_DATA(sata_cport_info::cport_mutex, \ sata_cport_info::cport_state)) _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_cport_info::cport_state)) -_NOTE(MUTEX_PROTECTS_DATA(sata_cport_info::cport_mutex, \ +_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \ sata_pmport_info::pmport_state)) _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmport_info::pmport_state)) +_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \ + sata_pmport_info::pmport_dev_type)) _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmport_info::pmport_dev_type)) +_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \ + sata_pmport_info::pmport_sata_drive)) +_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \ + sata_pmport_info::pmport_tgtnode_clean)) +_NOTE(MUTEX_PROTECTS_DATA(sata_pmport_info::pmport_mutex, \ + sata_pmport_info::pmport_event_flags)) _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmport_info::pmport_sata_drive)) _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmult_info::pmult_dev_port)) _NOTE(DATA_READABLE_WITHOUT_LOCK(sata_pmult_info::pmult_num_dev_ports)) @@ -928,7 +956,7 @@ * * When the last HBA instance is detached, the event daemon is terminated. * - * NOTE: cport support only, no port multiplier support. + * NOTE: Port multiplier is supported. */ int sata_hba_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) @@ -937,8 +965,10 @@ sata_hba_inst_t *sata_hba_inst; scsi_hba_tran_t *scsi_hba_tran; sata_cport_info_t *cportinfo; + sata_pmult_info_t *pminfo; sata_drive_info_t *sdinfo; - int ncport; + sata_device_t sdevice; + int ncport, npmport; SATADBG3(SATA_DBG_HBA_IF, NULL, "sata_hba_detach: node %s (%s%d)\n", ddi_node_name(dip), ddi_driver_name(dip), ddi_get_instance(dip)); @@ -970,7 +1000,7 @@ sdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo); if (sdinfo != NULL) { tdip = sata_get_target_dip(dip, - ncport); + ncport, 0); if (tdip != NULL) { if (ndi_devi_offline(tdip, NDI_DEVI_REMOVE) != @@ -985,6 +1015,44 @@ } } } + } else { /* SATA_DTYPE_PMULT */ + mutex_enter(&cportinfo->cport_mutex); + pminfo = SATA_CPORTINFO_PMULT_INFO(cportinfo); + + if (pminfo == NULL) { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_detach: Port multiplier " + "not ready yet!")); + mutex_exit(&cportinfo->cport_mutex); + return (DDI_FAILURE); + } + + /* + * Detach would fail if removal of any of the + * target nodes is failed - albeit in that + * case some of them may have been removed. + */ + for (npmport = 0; npmport < SATA_NUM_PMPORTS( + sata_hba_inst, ncport); npmport++) { + tdip = sata_get_target_dip(dip, ncport, + npmport); + if (tdip != NULL) { + if (ndi_devi_offline(tdip, + NDI_DEVI_REMOVE) != + NDI_SUCCESS) { + SATA_LOG_D(( + sata_hba_inst, + CE_WARN, + "sata_hba_detach: " + "Target node not " + "removed !")); + mutex_exit(&cportinfo-> + cport_mutex); + return (DDI_FAILURE); + } + } + } + mutex_exit(&cportinfo->cport_mutex); } } /* @@ -1026,6 +1094,10 @@ mutex_destroy(&cportinfo->cport_mutex); kmem_free(cportinfo, sizeof (sata_cport_info_t)); + } else { /* SATA_DTYPE_PMULT */ + sdevice.satadev_addr.cport = (uint8_t)ncport; + sdevice.satadev_addr.qual = SATA_ADDR_PMULT; + sata_free_pmult(sata_hba_inst, &sdevice); } } @@ -1196,8 +1268,9 @@ * 0 if successful, * error code if operation failed. * - * NOTE: Port Multiplier is not supported. - * + * Port Multiplier support is supported now. + * + * NOTE: qual should be SATA_ADDR_DCPORT or SATA_ADDR_DPMPORT */ static int @@ -1260,10 +1333,14 @@ return (EINVAL); } + /* + * According to SCSI_TO_SATA_ADDR_QUAL, qual should be either + * SATA_ADDR_DCPORT or SATA_ADDR_DPMPORT. + */ cport = SCSI_TO_SATA_CPORT(comp_port); pmport = SCSI_TO_SATA_PMPORT(comp_port); - /* Only cport is considered now, i.e. SATA_ADDR_CPORT */ - qual = SATA_ADDR_CPORT; + qual = SCSI_TO_SATA_ADDR_QUAL(comp_port); + if (sata_validate_sata_address(sata_hba_inst, cport, pmport, qual) != 0) { ndi_dc_freehdl(dcp); @@ -1422,8 +1499,9 @@ pmport = SCSI_TO_SATA_PMPORT(ioc.port); qual = SCSI_TO_SATA_ADDR_QUAL(ioc.port); - /* Override address qualifier - handle cport only for now */ - qual = SATA_ADDR_CPORT; + SATADBG3(SATA_DBG_IOCTL_IF, sata_hba_inst, + "sata_hba_ioctl: target port is %d:%d (%d)", + cport, pmport, qual); if (sata_validate_sata_address(sata_hba_inst, cport, pmport, qual) != 0) @@ -1502,12 +1580,7 @@ break; case SATA_CFGA_GET_DEVICE_PATH: - if (qual == SATA_ADDR_CPORT) - sata_device.satadev_addr.qual = - SATA_ADDR_DCPORT; - else - sata_device.satadev_addr.qual = - SATA_ADDR_DPMPORT; + rv = sata_ioctl_get_device_path(sata_hba_inst, &sata_device, &ioc, mode); break; @@ -1678,6 +1751,138 @@ } /* + * Create READ PORT MULTIPLIER and WRITE PORT MULTIPLIER sata packet + * + * No association with any scsi packet is made and no callback routine is + * specified. + * + * Returns a pointer to sata packet upon successfull packet creation. + * Returns NULL, if packet cannot be created. + * + * NOTE: Input/Output value includes 64 bits accoring to SATA Spec 2.6, + * only lower 32 bits are available currently. + */ +sata_pkt_t * +sata_get_rdwr_pmult_pkt(dev_info_t *dip, sata_device_t *sd, + uint8_t regn, uint32_t regv, uint32_t type) +{ + sata_hba_inst_t *sata_hba_inst; + sata_pkt_txlate_t *spx; + sata_pkt_t *spkt; + sata_cmd_t *scmd; + + /* Only READ/WRITE commands are accepted. */ + ASSERT(type == SATA_RDWR_PMULT_PKT_TYPE_READ || + type == SATA_RDWR_PMULT_PKT_TYPE_WRITE); + + mutex_enter(&sata_mutex); + for (sata_hba_inst = sata_hba_list; sata_hba_inst != NULL; + sata_hba_inst = sata_hba_inst->satahba_next) { + if (SATA_DIP(sata_hba_inst) == dip) + break; + } + mutex_exit(&sata_mutex); + ASSERT(sata_hba_inst != NULL); + + spx = kmem_zalloc(sizeof (sata_pkt_txlate_t), KM_SLEEP); + spx->txlt_sata_hba_inst = sata_hba_inst; + spx->txlt_scsi_pkt = NULL; /* No scsi pkt involved */ + spkt = sata_pkt_alloc(spx, SLEEP_FUNC); + if (spkt == NULL) { + kmem_free(spx, sizeof (sata_pkt_txlate_t)); + return (NULL); + } + + /* + * NOTE: We need to send this command to the port multiplier, + * that means send to SATA_PMULT_HOSTPORT(0xf) pmport + * + * sata_device contains the address of actual target device, and the + * pmport number in the command comes from the sata_device structure. + */ + spkt->satapkt_device.satadev_addr = sd->satadev_addr; + spkt->satapkt_device.satadev_addr.pmport = SATA_PMULT_HOSTPORT; + spkt->satapkt_device.satadev_addr.qual = SATA_ADDR_PMULT; + + /* Fill sata_pkt */ + spkt->satapkt_op_mode = SATA_OPMODE_SYNCH | SATA_OPMODE_POLLING; + spkt->satapkt_comp = NULL; /* Synchronous mode, no callback */ + spkt->satapkt_time = 10; /* Timeout 10s */ + + /* Build READ PORT MULTIPLIER cmd in the sata_pkt */ + scmd = &spkt->satapkt_cmd; + scmd->satacmd_features_reg = regn & 0xff; + scmd->satacmd_features_reg_ext = (regn >> 8) & 0xff; + scmd->satacmd_device_reg = sd->satadev_addr.pmport; + scmd->satacmd_addr_type = 0; /* N/A */ + + scmd->satacmd_flags.sata_ignore_dev_reset = B_TRUE; + + if (type == SATA_RDWR_PMULT_PKT_TYPE_READ) { + scmd->satacmd_cmd_reg = SATAC_READ_PORTMULT; + scmd->satacmd_flags.sata_data_direction = SATA_DIR_READ; + scmd->satacmd_flags.sata_special_regs = 1; + scmd->satacmd_flags.sata_copy_out_lba_high_lsb = 1; + scmd->satacmd_flags.sata_copy_out_lba_mid_lsb = 1; + scmd->satacmd_flags.sata_copy_out_lba_low_lsb = 1; + scmd->satacmd_flags.sata_copy_out_sec_count_lsb = 1; + } else if (type == SATA_RDWR_PMULT_PKT_TYPE_WRITE) { + scmd->satacmd_cmd_reg = SATAC_WRITE_PORTMULT; + scmd->satacmd_flags.sata_data_direction = SATA_DIR_WRITE; + scmd->satacmd_sec_count_lsb = regv & 0xff; + scmd->satacmd_lba_low_lsb = regv >> 8 & 0xff; + scmd->satacmd_lba_mid_lsb = regv >> 16 & 0xff; + scmd->satacmd_lba_high_lsb = regv >> 24 & 0xff; + } + + return (spkt); +} + +/* + * Free sata packet and any associated resources allocated previously by + * sata_get_rdwr_pmult_pkt(). + * + * Void return. + */ +void +sata_free_rdwr_pmult_pkt(sata_pkt_t *sata_pkt) +{ + sata_pkt_txlate_t *spx = + (sata_pkt_txlate_t *)sata_pkt->satapkt_framework_private; + + /* Free allocated resources */ + sata_pkt_free(spx); + kmem_free(spx, sizeof (sata_pkt_txlate_t)); +} + +/* + * Search a port multiplier in the blacklist and update the flags if a match + * is found. + * + * Returns: + * SATA_SUCCESS if any matched entry is found. + * SATA_FAILURE if no matched entry is found. + */ +int +sata_check_pmult_blacklist(sata_device_t *sd) +{ + sata_pmult_bl_t *blp; + for (blp = sata_pmult_blacklist; blp->bl_gscr0; blp++) { + if (sd->satadev_gscr.gscr0 != blp->bl_gscr0 && blp->bl_gscr0) + continue; + if (sd->satadev_gscr.gscr1 != blp->bl_gscr1 && blp->bl_gscr1) + continue; + if (sd->satadev_gscr.gscr2 != blp->bl_gscr2 && blp->bl_gscr2) + continue; + + cmn_err(CE_WARN, "!Port multiplier is on the blacklist."); + sd->satadev_add_info = blp->bl_flags; + return (SATA_SUCCESS); + } + return (SATA_FAILURE); +} + +/* * sata_name_child is for composing the name of the node * the format of the name is "target,0". */ @@ -2125,9 +2330,11 @@ sata_hba_inst_t *sata_hba_inst = (sata_hba_inst_t *)(ap->a_hba_tran->tran_hba_private); sata_pkt_txlate_t *spx = (sata_pkt_txlate_t *)pkt->pkt_ha_private; + sata_device_t *sdevice = &spx->txlt_sata_pkt->satapkt_device; sata_drive_info_t *sdinfo; struct buf *bp; - int cport; + uint8_t cport, pmport; + boolean_t dev_gone = B_FALSE; int rval; SATADBG1(SATA_DBG_SCSI_IF, sata_hba_inst, @@ -2137,15 +2344,42 @@ spx->txlt_scsi_pkt == pkt && spx->txlt_sata_pkt != NULL); cport = SCSI_TO_SATA_CPORT(ap->a_target); + pmport = SCSI_TO_SATA_PMPORT(ap->a_target); mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); - sdinfo = sata_get_device_info(sata_hba_inst, - &spx->txlt_sata_pkt->satapkt_device); - if (sdinfo == NULL || - SATA_CPORT_INFO(sata_hba_inst, cport)->cport_tgtnode_clean == - B_FALSE || - (sdinfo->satadrv_state & SATA_DSTATE_FAILED) != 0) { - + + if (sdevice->satadev_addr.qual == SATA_ADDR_DCPORT) { + sdinfo = sata_get_device_info(sata_hba_inst, sdevice); + if (sdinfo == NULL || + SATA_CPORT_INFO(sata_hba_inst, cport)-> + cport_tgtnode_clean == B_FALSE || + (sdinfo->satadrv_state & SATA_DSTATE_FAILED) != 0) { + dev_gone = B_TRUE; + } + } else if (sdevice->satadev_addr.qual == SATA_ADDR_DPMPORT) { + if (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) != + SATA_DTYPE_PMULT || SATA_PMULT_INFO(sata_hba_inst, + cport) == NULL) { + dev_gone = B_TRUE; + } else if (SATA_PMPORT_INFO(sata_hba_inst, cport, + pmport) == NULL) { + dev_gone = B_TRUE; + } else { + mutex_enter(&(SATA_PMPORT_MUTEX(sata_hba_inst, + cport, pmport))); + sdinfo = sata_get_device_info(sata_hba_inst, sdevice); + if (sdinfo == NULL || + SATA_PMPORT_INFO(sata_hba_inst, cport, pmport)-> + pmport_tgtnode_clean == B_FALSE || + (sdinfo->satadrv_state & SATA_DSTATE_FAILED) != 0) { + dev_gone = B_TRUE; + } + mutex_exit(&(SATA_PMPORT_MUTEX(sata_hba_inst, + cport, pmport))); + } + } + + if (dev_gone == B_TRUE) { mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); pkt->pkt_reason = CMD_DEV_GONE; /* @@ -2424,8 +2658,12 @@ mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, sata_device.satadev_addr.cport))); if (level == RESET_ALL) { - /* port reset - cport only */ - sata_device.satadev_addr.qual = SATA_ADDR_CPORT; + /* port reset */ + if (sata_device.satadev_addr.qual == SATA_ADDR_DCPORT) + sata_device.satadev_addr.qual = SATA_ADDR_CPORT; + else + sata_device.satadev_addr.qual = SATA_ADDR_PMPORT; + if ((*SATA_RESET_DPORT_FUNC(sata_hba_inst)) (SATA_DIP(sata_hba_inst), &sata_device) == SATA_SUCCESS) return (1); @@ -2815,9 +3053,16 @@ case -1: /* Invalid address or invalid device type */ + SATADBG1(SATA_DBG_SCSI_IF, spx->txlt_sata_hba_inst, + "sata_scsi_start: reject command because " + "dev type or address is invalid\n", NULL); return (TRAN_BADPKT); case 1: /* valid address but no device - it has disappeared ? */ + SATADBG1(SATA_DBG_SCSI_IF, spx->txlt_sata_hba_inst, + "sata_scsi_start: reject command because " + "device is gone\n", NULL); + spx->txlt_scsi_pkt->pkt_reason = CMD_DEV_GONE; *reason = CMD_DEV_GONE; /* @@ -3142,7 +3387,7 @@ * SATA translate command: Inquiry / Identify Device * Use cached Identify Device data for now, rather than issuing actual * Device Identify cmd request. If device is detached and re-attached, - * asynchromous event processing should fetch and refresh Identify Device + * asynchronous event processing should fetch and refresh Identify Device * data. * Two VPD pages are supported now: * Vital Product Data page @@ -5766,22 +6011,23 @@ * have called the sata_pkt callback function for this packet. * * The scsi callback has to be performed by the caller of this routine. - * - * Note 2: No port multiplier support for now. */ static int sata_hba_start(sata_pkt_txlate_t *spx, int *rval) { - int stat, cport; + int stat; + uint8_t cport = SATA_TXLT_CPORT(spx); + uint8_t pmport = SATA_TXLT_PMPORT(spx); sata_hba_inst_t *sata_hba_inst = spx->txlt_sata_hba_inst; sata_drive_info_t *sdinfo; - sata_device_t *sata_device; + sata_pmult_info_t *pminfo; + sata_pmport_info_t *pmportinfo = NULL; + sata_device_t *sata_device = NULL; uint8_t cmd; struct sata_cmd_flags cmd_flags; ASSERT(spx->txlt_sata_pkt != NULL); - cport = SATA_TXLT_CPORT(spx); ASSERT(mutex_owned(&SATA_CPORT_MUTEX(sata_hba_inst, cport))); sdinfo = sata_get_device_info(sata_hba_inst, @@ -5789,13 +6035,40 @@ ASSERT(sdinfo != NULL); /* Clear device reset state? */ - if (sdinfo->satadrv_event_flags & SATA_EVNT_CLEAR_DEVICE_RESET) { - spx->txlt_sata_pkt->satapkt_cmd.satacmd_flags. - sata_clear_dev_reset = B_TRUE; - sdinfo->satadrv_event_flags &= ~SATA_EVNT_CLEAR_DEVICE_RESET; - SATADBG1(SATA_DBG_EVENTS, sata_hba_inst, - "sata_hba_start: clearing device reset state\n", NULL); - } + /* qual should be XXX_DPMPORT, but add XXX_PMPORT in case */ + if (sdinfo->satadrv_addr.qual == SATA_ADDR_DPMPORT || + sdinfo->satadrv_addr.qual == SATA_ADDR_PMPORT) { + + /* + * Get the pmult_info of the its parent port multiplier, all + * sub-devices share a common device reset flags on in + * pmult_info. + */ + pminfo = SATA_PMULT_INFO(sata_hba_inst, cport); + pmportinfo = pminfo->pmult_dev_port[pmport]; + ASSERT(pminfo != NULL); + if (pminfo->pmult_event_flags & SATA_EVNT_CLEAR_DEVICE_RESET) { + spx->txlt_sata_pkt->satapkt_cmd.satacmd_flags. + sata_clear_dev_reset = B_TRUE; + pminfo->pmult_event_flags &= + ~SATA_EVNT_CLEAR_DEVICE_RESET; + SATADBG1(SATA_DBG_EVENTS, sata_hba_inst, + "sata_hba_start: clearing device reset state" + "on pmult.\n", NULL); + } + } else { + if (sdinfo->satadrv_event_flags & + SATA_EVNT_CLEAR_DEVICE_RESET) { + spx->txlt_sata_pkt->satapkt_cmd.satacmd_flags. + sata_clear_dev_reset = B_TRUE; + sdinfo->satadrv_event_flags &= + ~SATA_EVNT_CLEAR_DEVICE_RESET; + SATADBG1(SATA_DBG_EVENTS, sata_hba_inst, + "sata_hba_start: clearing device reset state\n", + NULL); + } + } + cmd = spx->txlt_sata_pkt->satapkt_cmd.satacmd_cmd_reg; cmd_flags = spx->txlt_sata_pkt->satapkt_cmd.satacmd_flags; sata_device = &spx->txlt_sata_pkt->satapkt_device; @@ -5857,7 +6130,7 @@ sata_device->satadev_addr.cport); else sata_log(sata_hba_inst, CE_CONT, - "SATA port %d pmport %d error\n", + "SATA port %d:%d error\n", sata_device->satadev_addr.cport, sata_device->satadev_addr.pmport); @@ -5869,7 +6142,17 @@ * because original packet's sata address refered to a device * attached to some port. */ - sata_update_port_info(sata_hba_inst, sata_device); + if (sdinfo->satadrv_addr.qual == SATA_ADDR_DPMPORT || + sdinfo->satadrv_addr.qual == SATA_ADDR_PMPORT) { + mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); + mutex_enter(&pmportinfo->pmport_mutex); + sata_update_pmport_info(sata_hba_inst, sata_device); + mutex_exit(&pmportinfo->pmport_mutex); + mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); + } else { + sata_update_port_info(sata_hba_inst, sata_device); + } + spx->txlt_scsi_pkt->pkt_reason = CMD_TRAN_ERR; *rval = TRAN_FATAL_ERROR; break; @@ -5944,8 +6227,14 @@ * the device reset state, * so the next sata packet may carry it to HBA. */ - sdinfo->satadrv_event_flags |= - SATA_EVNT_CLEAR_DEVICE_RESET; + if (sdinfo->satadrv_addr.qual == SATA_ADDR_PMPORT || + sdinfo->satadrv_addr.qual == SATA_ADDR_DPMPORT) { + pminfo->pmult_event_flags |= + SATA_EVNT_CLEAR_DEVICE_RESET; + } else { + sdinfo->satadrv_event_flags |= + SATA_EVNT_CLEAR_DEVICE_RESET; + } } } return (-1); @@ -8779,10 +9068,6 @@ mutex_exit(&sata_mutex); } - - - - /* * Probe all SATA ports of the specified HBA instance. * The assumption is that there are no target and attachment point minor nodes @@ -8802,11 +9087,9 @@ sata_probe_ports(sata_hba_inst_t *sata_hba_inst) { dev_info_t *dip = SATA_DIP(sata_hba_inst); - int ncport, npmport; + int ncport; sata_cport_info_t *cportinfo; sata_drive_info_t *drive; - sata_pmult_info_t *pminfo; - sata_pmport_info_t *pmportinfo; sata_device_t sata_device; int rval; dev_t minor_number; @@ -8837,8 +9120,8 @@ * an attachment point */ mutex_exit(&cportinfo->cport_mutex); - minor_number = - SATA_MAKE_AP_MINOR(ddi_get_instance(dip), ncport, 0, 0); + minor_number = SATA_MAKE_AP_MINOR(ddi_get_instance(dip), + ncport, 0, SATA_ADDR_CPORT); (void) sprintf(name, "%d", ncport); if (ddi_create_minor_node(dip, name, S_IFCHR, minor_number, DDI_NT_SATA_ATTACHMENT_POINT, 0) != @@ -8860,7 +9143,7 @@ (dip, &sata_device); mutex_enter(&cportinfo->cport_mutex); - sata_update_port_scr(&cportinfo->cport_scr, &sata_device); + cportinfo->cport_scr = sata_device.satadev_scr; if (rval != SATA_SUCCESS) { /* Something went wrong? Fail the port */ cportinfo->cport_state = SATA_PSTATE_FAILED; @@ -8895,7 +9178,7 @@ drive->satadrv_state = SATA_STATE_UNKNOWN; mutex_exit(&cportinfo->cport_mutex); - if (sata_add_device(dip, sata_hba_inst, ncport, 0) != + if (sata_add_device(dip, sata_hba_inst, &sata_device) != SATA_SUCCESS) { /* * Plugged device was not correctly identified. @@ -8910,135 +9193,114 @@ goto reprobe_cport; } } - } else { - mutex_exit(&cportinfo->cport_mutex); - ASSERT(cportinfo->cport_dev_type == SATA_DTYPE_PMULT); - pminfo = kmem_zalloc(sizeof (sata_pmult_info_t), - KM_SLEEP); - mutex_enter(&cportinfo->cport_mutex); - ASSERT(pminfo != NULL); - SATA_CPORTINFO_PMULT_INFO(cportinfo) = pminfo; - pminfo->pmult_addr.cport = cportinfo->cport_addr.cport; - pminfo->pmult_addr.pmport = SATA_PMULT_HOSTPORT; - pminfo->pmult_addr.qual = SATA_ADDR_PMPORT; - pminfo->pmult_num_dev_ports = - sata_device.satadev_add_info; - mutex_init(&pminfo->pmult_mutex, NULL, MUTEX_DRIVER, - NULL); - pminfo->pmult_state = SATA_STATE_PROBING; + } else { /* SATA_DTYPE_PMULT */ mutex_exit(&cportinfo->cport_mutex); - /* Probe Port Multiplier ports */ - for (npmport = 0; - npmport < pminfo->pmult_num_dev_ports; - npmport++) { - pmportinfo = kmem_zalloc( - sizeof (sata_pmport_info_t), KM_SLEEP); - mutex_enter(&cportinfo->cport_mutex); - ASSERT(pmportinfo != NULL); - pmportinfo->pmport_addr.cport = ncport; - pmportinfo->pmport_addr.pmport = npmport; - pmportinfo->pmport_addr.qual = - SATA_ADDR_PMPORT; - pminfo->pmult_dev_port[npmport] = pmportinfo; - - mutex_init(&pmportinfo->pmport_mutex, NULL, - MUTEX_DRIVER, NULL); - - mutex_exit(&cportinfo->cport_mutex); - - /* Create an attachment point */ - minor_number = SATA_MAKE_AP_MINOR( - ddi_get_instance(dip), ncport, npmport, 1); - (void) sprintf(name, "%d.%d", ncport, npmport); - if (ddi_create_minor_node(dip, name, S_IFCHR, - minor_number, DDI_NT_SATA_ATTACHMENT_POINT, - 0) != DDI_SUCCESS) { - sata_log(sata_hba_inst, CE_WARN, - "sata_hba_attach: " - "cannot create SATA attachment " - "point for port %d pmult port %d", - ncport, npmport); - } - - start_time = ddi_get_lbolt(); - reprobe_pmport: - sata_device.satadev_addr.pmport = npmport; - sata_device.satadev_addr.qual = - SATA_ADDR_PMPORT; - - rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) - (dip, &sata_device); - mutex_enter(&cportinfo->cport_mutex); - - /* sata_update_port_info() */ - sata_update_port_scr(&pmportinfo->pmport_scr, - &sata_device); - - if (rval != SATA_SUCCESS) { - pmportinfo->pmport_state = - SATA_PSTATE_FAILED; - mutex_exit(&cportinfo->cport_mutex); - continue; - } - pmportinfo->pmport_state &= - ~SATA_STATE_PROBING; - pmportinfo->pmport_state |= SATA_STATE_PROBED; - pmportinfo->pmport_dev_type = - sata_device.satadev_type; - - pmportinfo->pmport_state |= SATA_STATE_READY; - if (pmportinfo->pmport_dev_type == - SATA_DTYPE_NONE) { - mutex_exit(&cportinfo->cport_mutex); - continue; - } - /* Port multipliers cannot be chained */ - ASSERT(pmportinfo->pmport_dev_type != - SATA_DTYPE_PMULT); - /* - * There is something attached to Port - * Multiplier device port - * Allocate device info structure - */ - if (pmportinfo->pmport_sata_drive == NULL) { - mutex_exit(&cportinfo->cport_mutex); - pmportinfo->pmport_sata_drive = - kmem_zalloc( - sizeof (sata_drive_info_t), - KM_SLEEP); - mutex_enter(&cportinfo->cport_mutex); - } - drive = pmportinfo->pmport_sata_drive; - drive->satadrv_addr.cport = - pmportinfo->pmport_addr.cport; - drive->satadrv_addr.pmport = npmport; - drive->satadrv_addr.qual = SATA_ADDR_DPMPORT; - drive->satadrv_type = pmportinfo-> - pmport_dev_type; - drive->satadrv_state = SATA_STATE_UNKNOWN; - - mutex_exit(&cportinfo->cport_mutex); - if (sata_add_device(dip, sata_hba_inst, ncport, - npmport) != SATA_SUCCESS) { - /* - * Plugged device was not correctly - * identified. Retry, within the - * SATA_DEV_IDENTIFY_TIMEOUT - */ - cur_time = ddi_get_lbolt(); - if ((cur_time - start_time) < - drv_usectohz( - SATA_DEV_IDENTIFY_TIMEOUT)) { - /* sleep for a while */ - delay(drv_usectohz( - SATA_DEV_RETRY_DLY)); - goto reprobe_pmport; - } - } - } + /* Allocate sata_pmult_info and sata_pmport_info */ + sata_alloc_pmult(sata_hba_inst, &sata_device); + + /* Log the information of the port multiplier */ + sata_show_pmult_info(sata_hba_inst, &sata_device); + + /* Probe its pmports */ + sata_probe_pmports(sata_hba_inst, ncport); + } + } +} + +/* + * Probe all device ports behind a port multiplier. + * + * PMult-related structure should be allocated before by sata_alloc_pmult(). + * + * NOTE1: Only called from sata_probe_ports() + * NOTE2: No mutex should be hold. + */ +static void +sata_probe_pmports(sata_hba_inst_t *sata_hba_inst, uint8_t ncport) +{ + dev_info_t *dip = SATA_DIP(sata_hba_inst); + sata_pmult_info_t *pmultinfo = NULL; + sata_pmport_info_t *pmportinfo = NULL; + sata_drive_info_t *drive = NULL; + sata_device_t sata_device; + + clock_t start_time, cur_time; + int npmport; + int rval; + + pmultinfo = SATA_PMULT_INFO(sata_hba_inst, ncport); + + /* Probe Port Multiplier ports */ + for (npmport = 0; npmport < pmultinfo->pmult_num_dev_ports; npmport++) { + pmportinfo = pmultinfo->pmult_dev_port[npmport]; + start_time = ddi_get_lbolt(); +reprobe_pmport: + sata_device.satadev_addr.cport = ncport; + sata_device.satadev_addr.pmport = npmport; + sata_device.satadev_addr.qual = SATA_ADDR_PMPORT; + sata_device.satadev_rev = SATA_DEVICE_REV; + + /* Let HBA driver probe it. */ + rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (dip, &sata_device); + mutex_enter(&pmportinfo->pmport_mutex); + + pmportinfo->pmport_scr = sata_device.satadev_scr; + + if (rval != SATA_SUCCESS) { pmportinfo->pmport_state = - SATA_STATE_PROBED | SATA_STATE_READY; + SATA_PSTATE_FAILED; + mutex_exit(&pmportinfo->pmport_mutex); + continue; + } + pmportinfo->pmport_state &= ~SATA_STATE_PROBING; + pmportinfo->pmport_state |= SATA_STATE_PROBED; + pmportinfo->pmport_dev_type = sata_device.satadev_type; + + pmportinfo->pmport_state |= SATA_STATE_READY; + if (pmportinfo->pmport_dev_type == + SATA_DTYPE_NONE) { + SATADBG2(SATA_DBG_PMULT, sata_hba_inst, + "no device found at port %d:%d", ncport, npmport); + mutex_exit(&pmportinfo->pmport_mutex); + continue; + } + /* Port multipliers cannot be chained */ + ASSERT(pmportinfo->pmport_dev_type != SATA_DTYPE_PMULT); + /* + * There is something attached to Port + * Multiplier device port + * Allocate device info structure + */ + if (pmportinfo->pmport_sata_drive == NULL) { + mutex_exit(&pmportinfo->pmport_mutex); + pmportinfo->pmport_sata_drive = + kmem_zalloc(sizeof (sata_drive_info_t), KM_SLEEP); + mutex_enter(&pmportinfo->pmport_mutex); + } + drive = pmportinfo->pmport_sata_drive; + drive->satadrv_addr.cport = pmportinfo->pmport_addr.cport; + drive->satadrv_addr.pmport = npmport; + drive->satadrv_addr.qual = SATA_ADDR_DPMPORT; + drive->satadrv_type = pmportinfo-> pmport_dev_type; + drive->satadrv_state = SATA_STATE_UNKNOWN; + + mutex_exit(&pmportinfo->pmport_mutex); + rval = sata_add_device(dip, sata_hba_inst, &sata_device); + + if (rval != SATA_SUCCESS) { + /* + * Plugged device was not correctly identified. + * Retry, within the SATA_DEV_IDENTIFY_TIMEOUT + */ + cur_time = ddi_get_lbolt(); + if ((cur_time - start_time) < drv_usectohz( + SATA_DEV_IDENTIFY_TIMEOUT)) { + /* sleep for a while */ + delay(drv_usectohz(SATA_DEV_RETRY_DLY)); + goto reprobe_pmport; + } } } } @@ -9052,28 +9314,29 @@ * * This function cannot be called from an interrupt context. * - * ONLY DISK TARGET NODES ARE CREATED NOW + * Create target nodes for disk, CD/DVD, Tape and ATAPI disk devices * * Returns SATA_SUCCESS when port/device was fully processed, SATA_FAILURE when * device identification failed - adding a device could be retried. * */ static int -sata_add_device(dev_info_t *pdip, sata_hba_inst_t *sata_hba_inst, int cport, - int pmport) +sata_add_device(dev_info_t *pdip, sata_hba_inst_t *sata_hba_inst, + sata_device_t *sata_device) { sata_cport_info_t *cportinfo; sata_pmult_info_t *pminfo; sata_pmport_info_t *pmportinfo; dev_info_t *cdip; /* child dip */ - sata_device_t sata_device; + sata_address_t *saddr = &sata_device->satadev_addr; + uint8_t cport, pmport; int rval; - - + cport = saddr->cport; + pmport = saddr->pmport; cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); ASSERT(cportinfo->cport_dev_type != SATA_DTYPE_NONE); - mutex_enter(&cportinfo->cport_mutex); + /* * Some device is attached to a controller port. * We rely on controllers distinquishing between no-device, @@ -9082,25 +9345,24 @@ * positively the dev type before trying to attach * the target driver. */ - sata_device.satadev_rev = SATA_DEVICE_REV; - if (cportinfo->cport_dev_type != SATA_DTYPE_PMULT) { - /* - * Not port multiplier. - */ - sata_device.satadev_addr = cportinfo->cport_addr; - sata_device.satadev_addr.qual = SATA_ADDR_DCPORT; - mutex_exit(&cportinfo->cport_mutex); - - rval = sata_probe_device(sata_hba_inst, &sata_device); + sata_device->satadev_rev = SATA_DEVICE_REV; + switch (saddr->qual) { + case SATA_ADDR_CPORT: + /* + * Add a non-port-multiplier device at controller port. + */ + saddr->qual = SATA_ADDR_DCPORT; + + rval = sata_probe_device(sata_hba_inst, sata_device); if (rval != SATA_SUCCESS || - sata_device.satadev_type == SATA_DTYPE_UNKNOWN) + sata_device->satadev_type == SATA_DTYPE_UNKNOWN) return (SATA_FAILURE); mutex_enter(&cportinfo->cport_mutex); sata_show_drive_info(sata_hba_inst, SATA_CPORTINFO_DRV_INFO(cportinfo)); - if ((sata_device.satadev_type & SATA_VALID_DEV_TYPE) == 0) { + if ((sata_device->satadev_type & SATA_VALID_DEV_TYPE) == 0) { /* * Could not determine device type or * a device is not supported. @@ -9110,7 +9372,7 @@ mutex_exit(&cportinfo->cport_mutex); return (SATA_SUCCESS); } - cportinfo->cport_dev_type = sata_device.satadev_type; + cportinfo->cport_dev_type = sata_device->satadev_type; cportinfo->cport_tgtnode_clean = B_TRUE; mutex_exit(&cportinfo->cport_mutex); @@ -9130,59 +9392,58 @@ "SATA device at port %d - " "default device features could not be set." " Device may not operate as expected.", - cportinfo->cport_addr.cport); - } - - cdip = sata_create_target_node(pdip, sata_hba_inst, - &sata_device.satadev_addr); - mutex_enter(&cportinfo->cport_mutex); + cport); + } + + cdip = sata_create_target_node(pdip, sata_hba_inst, saddr); if (cdip == NULL) { /* * Attaching target node failed. * We retain sata_drive_info structure... */ - mutex_exit(&cportinfo->cport_mutex); return (SATA_SUCCESS); } + + mutex_enter(&cportinfo->cport_mutex); (SATA_CPORTINFO_DRV_INFO(cportinfo))-> satadrv_state = SATA_STATE_READY; - } else { - /* This must be Port Multiplier type */ - if (cportinfo->cport_dev_type != SATA_DTYPE_PMULT) { - SATA_LOG_D((sata_hba_inst, CE_WARN, - "sata_add_device: " - "unrecognized dev type %x", - cportinfo->cport_dev_type)); - mutex_exit(&cportinfo->cport_mutex); - return (SATA_SUCCESS); - } + mutex_exit(&cportinfo->cport_mutex); + + break; + + case SATA_ADDR_PMPORT: + saddr->qual = SATA_ADDR_DPMPORT; + + mutex_enter(&cportinfo->cport_mutex); + /* It must be a Port Multiplier at the controller port */ + ASSERT(cportinfo->cport_dev_type == SATA_DTYPE_PMULT); + pminfo = SATA_CPORTINFO_PMULT_INFO(cportinfo); - pmportinfo = pminfo->pmult_dev_port[pmport]; - sata_device.satadev_addr = pmportinfo->pmport_addr; - sata_device.satadev_addr.qual = SATA_ADDR_DPMPORT; + pmportinfo = pminfo->pmult_dev_port[saddr->pmport]; mutex_exit(&cportinfo->cport_mutex); - rval = sata_probe_device(sata_hba_inst, &sata_device); + rval = sata_probe_device(sata_hba_inst, sata_device); if (rval != SATA_SUCCESS || - sata_device.satadev_type == SATA_DTYPE_UNKNOWN) { + sata_device->satadev_type == SATA_DTYPE_UNKNOWN) { return (SATA_FAILURE); } - mutex_enter(&cportinfo->cport_mutex); + + mutex_enter(&pmportinfo->pmport_mutex); sata_show_drive_info(sata_hba_inst, - SATA_CPORTINFO_DRV_INFO(cportinfo)); - - if ((sata_device.satadev_type & SATA_VALID_DEV_TYPE) == 0) { + SATA_PMPORTINFO_DRV_INFO(pmportinfo)); + + if ((sata_device->satadev_type & SATA_VALID_DEV_TYPE) == 0) { /* * Could not determine device type. * Degrade this device to unknown. */ pmportinfo->pmport_dev_type = SATA_DTYPE_UNKNOWN; - mutex_exit(&cportinfo->cport_mutex); + mutex_exit(&pmportinfo->pmport_mutex); return (SATA_SUCCESS); } - pmportinfo->pmport_dev_type = sata_device.satadev_type; + pmportinfo->pmport_dev_type = sata_device->satadev_type; pmportinfo->pmport_tgtnode_clean = B_TRUE; - mutex_exit(&cportinfo->cport_mutex); + mutex_exit(&pmportinfo->pmport_mutex); /* * Initialize device to the desired state. @@ -9197,30 +9458,106 @@ if (rval == SATA_RETRY) sata_log(sata_hba_inst, CE_WARN, - "SATA device at port %d pmport %d - " + "SATA device at port %d:%d - " "default device features could not be set." " Device may not operate as expected.", - pmportinfo->pmport_addr.cport, - pmportinfo->pmport_addr.pmport); - } - cdip = sata_create_target_node(pdip, sata_hba_inst, - &sata_device.satadev_addr); - mutex_enter(&cportinfo->cport_mutex); + cport, pmport); + } + + cdip = sata_create_target_node(pdip, sata_hba_inst, saddr); if (cdip == NULL) { /* * Attaching target node failed. * We retain sata_drive_info structure... */ - mutex_exit(&cportinfo->cport_mutex); return (SATA_SUCCESS); } + mutex_enter(&pmportinfo->pmport_mutex); pmportinfo->pmport_sata_drive->satadrv_state |= SATA_STATE_READY; - } - mutex_exit(&cportinfo->cport_mutex); + mutex_exit(&pmportinfo->pmport_mutex); + + break; + + default: + return (SATA_FAILURE); + } + return (SATA_SUCCESS); } +/* + * Clean up target node at specific address. + * + * NOTE: No Mutex should be hold. + */ +static int +sata_offline_device(sata_hba_inst_t *sata_hba_inst, + sata_device_t *sata_device, sata_drive_info_t *sdinfo) +{ + uint8_t cport, pmport, qual; + dev_info_t *tdip; + + cport = sata_device->satadev_addr.cport; + pmport = sata_device->satadev_addr.pmport; + qual = sata_device->satadev_addr.qual; + + if (qual == SATA_ADDR_DCPORT) { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_ioctl: disconnect device at port %d", cport)); + } else { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_ioctl: disconnect device at port %d:%d", + cport, pmport)); + } + + /* We are addressing attached device, not a port */ + sata_device->satadev_addr.qual = + sdinfo->satadrv_addr.qual; + tdip = sata_get_scsi_target_dip(SATA_DIP(sata_hba_inst), + &sata_device->satadev_addr); + if (tdip != NULL && ndi_devi_offline(tdip, + NDI_DEVI_REMOVE) != NDI_SUCCESS) { + /* + * Problem : + * The target node remained attached. + * This happens when the device file was open + * or a node was waiting for resources. + * Cannot do anything about it. + */ + if (qual == SATA_ADDR_DCPORT) { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_ioctl: disconnect: could " + "not unconfigure device before " + "disconnecting the SATA port %d", + cport)); + } else { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_ioctl: disconnect: could " + "not unconfigure device before " + "disconnecting the SATA port %d:%d", + cport, pmport)); + } + /* + * Set DEVICE REMOVED state in the target + * node. It will prevent access to the device + * even when a new device is attached, until + * the old target node is released, removed and + * recreated for a new device. + */ + sata_set_device_removed(tdip); + + /* + * Instruct event daemon to try the target + * node cleanup later. + */ + sata_set_target_node_cleanup( + sata_hba_inst, &sata_device->satadev_addr); + } + + + return (SATA_SUCCESS); +} /* @@ -9230,8 +9567,6 @@ * * A dev_info_t pointer is returned if operation is successful, NULL is * returned otherwise. - * - * No port multiplier support. */ static dev_info_t * @@ -9406,6 +9741,76 @@ return (NULL); } +/* + * Remove a target node. + */ +static void +sata_remove_target_node(sata_hba_inst_t *sata_hba_inst, + sata_address_t *sata_addr) +{ + dev_info_t *tdip; + uint8_t cport = sata_addr->cport; + uint8_t pmport = sata_addr->pmport; + uint8_t qual = sata_addr->qual; + + /* Note the sata daemon uses the address of the port/pmport */ + ASSERT(qual == SATA_ADDR_CPORT || qual == SATA_ADDR_PMPORT); + + /* Remove target node */ + tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), cport, pmport); + if (tdip != NULL) { + /* + * Target node exists. Unconfigure device + * then remove the target node (one ndi + * operation). + */ + if (ndi_devi_offline(tdip, NDI_DEVI_REMOVE) != NDI_SUCCESS) { + /* + * PROBLEM - no device, but target node remained. This + * happens when the file was open or node was waiting + * for resources. + */ + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_remove_target_node: " + "Failed to remove target node for " + "detached SATA device.")); + /* + * Set target node state to DEVI_DEVICE_REMOVED. But + * re-check first that the node still exists. + */ + tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), + cport, pmport); + if (tdip != NULL) { + sata_set_device_removed(tdip); + /* + * Instruct event daemon to retry the cleanup + * later. + */ + sata_set_target_node_cleanup(sata_hba_inst, + sata_addr); + } + } + + if (qual == SATA_ADDR_CPORT) + sata_log(sata_hba_inst, CE_WARN, + "SATA device detached at port %d", cport); + else + sata_log(sata_hba_inst, CE_WARN, + "SATA device detached at port %d:%d", + cport, pmport); + } +#ifdef SATA_DEBUG + else { + if (qual == SATA_ADDR_CPORT) + sata_log(sata_hba_inst, CE_WARN, + "target node not found at port %d", cport); + else + sata_log(sata_hba_inst, CE_WARN, + "target node not found at port %d:%d", + cport, pmport); + } +#endif +} /* @@ -9424,14 +9829,14 @@ * * This function cannot be called in interrupt context - it may sleep. * - * NOte: Port multiplier is not supported yet, although there may be some - * pieces of code referencing to it. + * Note: Port multiplier is supported. */ static int sata_reprobe_port(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device, int flag) { sata_cport_info_t *cportinfo; + sata_pmult_info_t *pmultinfo; sata_drive_info_t *sdinfo, *osdinfo; boolean_t init_device = B_FALSE; int prev_device_type = SATA_DTYPE_NONE; @@ -9439,11 +9844,28 @@ int prev_device_state = 0; clock_t start_time; int retry = B_FALSE; + uint8_t cport = sata_device->satadev_addr.cport; int rval_probe, rval_init; + /* + * If target is pmport, sata_reprobe_pmport() will handle it. + */ + if (sata_device->satadev_addr.qual == SATA_ADDR_PMPORT || + sata_device->satadev_addr.qual == SATA_ADDR_DPMPORT) + return (sata_reprobe_pmport(sata_hba_inst, sata_device, flag)); + /* We only care about host sata cport for now */ cportinfo = SATA_CPORT_INFO(sata_hba_inst, sata_device->satadev_addr.cport); + + /* + * If a port multiplier was previously attached (we have no idea it + * still there or not), sata_reprobe_pmult() will handle it. + */ + if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) + return (sata_reprobe_pmult(sata_hba_inst, sata_device, flag)); + + /* Store sata_drive_info when a non-pmult device was attached. */ osdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo); if (osdinfo != NULL) { /* @@ -9509,24 +9931,22 @@ } cportinfo->cport_state |= SATA_STATE_READY; + cportinfo->cport_state |= SATA_STATE_PROBED; + cportinfo->cport_dev_type = sata_device->satadev_type; sdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo); /* * If we are re-probing the port, there may be * sata_drive_info structure attached - * (or sata_pm_info, if PMult is supported). */ if (sata_device->satadev_type == SATA_DTYPE_NONE) { + /* * There is no device, so remove device info structure, * if necessary. - * Only direct attached drive is considered now, until - * port multiplier is supported. If the previously - * attached device was a port multiplier, we would need - * to take care of devices attached beyond the port - * multiplier. - */ + */ + /* Device change: Drive -> None */ SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL; cportinfo->cport_dev_type = SATA_DTYPE_NONE; if (sdinfo != NULL) { @@ -9537,9 +9957,12 @@ } mutex_exit(&cportinfo->cport_mutex); return (SATA_SUCCESS); + } if (sata_device->satadev_type != SATA_DTYPE_PMULT) { + + /* Device (may) change: Drive -> Drive */ if (sdinfo == NULL) { /* * There is some device attached, but there is @@ -9578,14 +10001,36 @@ cportinfo->cport_dev_type = SATA_DTYPE_UNKNOWN; sata_device->satadev_addr.qual = sdinfo->satadrv_addr.qual; } else { - /* - * The device is a port multiplier - not handled now. - */ - cportinfo->cport_dev_type = SATA_DTYPE_UNKNOWN; + /* Device change: Drive -> PMult */ + SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL; + if (sdinfo != NULL) { + kmem_free(sdinfo, sizeof (sata_drive_info_t)); + sata_log(sata_hba_inst, CE_WARN, + "SATA device detached " + "from port %d", cportinfo->cport_addr.cport); + } + + sata_log(sata_hba_inst, CE_WARN, + "SATA port multiplier detected at port %d", + cportinfo->cport_addr.cport); + + mutex_exit(&cportinfo->cport_mutex); + sata_alloc_pmult(sata_hba_inst, sata_device); + sata_show_pmult_info(sata_hba_inst, sata_device); + mutex_enter(&cportinfo->cport_mutex); + + /* + * Mark all the port multiplier port behind the port + * multiplier behind with link events, so that the sata daemon + * will update their status. + */ + pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport); + pmultinfo->pmult_event_flags |= SATA_EVNT_DEVICE_RESET; mutex_exit(&cportinfo->cport_mutex); return (SATA_SUCCESS); } mutex_exit(&cportinfo->cport_mutex); + /* * Figure out what kind of device we are really * dealing with. Failure of identifying device does not fail this @@ -9675,6 +10120,621 @@ } /* + * Reprobe a controller port that connected to a port multiplier. + * + * NOTE: No Mutex should be hold. + */ +static int +sata_reprobe_pmult(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device, + int flag) +{ + _NOTE(ARGUNUSED(flag)) + sata_cport_info_t *cportinfo; + sata_pmult_info_t *pmultinfo; + uint8_t cport = sata_device->satadev_addr.cport; + int rval_probe; + + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport); + + /* probe port */ + mutex_enter(&cportinfo->cport_mutex); + cportinfo->cport_state &= ~SATA_PORT_STATE_CLEAR_MASK; + cportinfo->cport_state |= SATA_STATE_PROBING; + mutex_exit(&cportinfo->cport_mutex); + + rval_probe = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), sata_device); + + mutex_enter(&cportinfo->cport_mutex); + if (rval_probe != SATA_SUCCESS) { + cportinfo->cport_state = SATA_PSTATE_FAILED; + SATA_LOG_D((sata_hba_inst, CE_WARN, "sata_reprobe_pmult: " + "SATA port %d probing failed", cport)); + sata_log(sata_hba_inst, CE_WARN, + "SATA port multiplier detached at port %d", cport); + mutex_exit(&cportinfo->cport_mutex); + sata_free_pmult(sata_hba_inst, sata_device); + return (SATA_FAILURE); + } + + /* + * update sata port state and set device type + */ + sata_update_port_info(sata_hba_inst, sata_device); + cportinfo->cport_state &= ~SATA_STATE_PROBING; + cportinfo->cport_state |= SATA_STATE_PROBED; + + /* + * Sanity check - Port is active? Is the link active? + * Is there any device attached? + */ + if ((cportinfo->cport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) || + (cportinfo->cport_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) != + SATA_PORT_DEVLINK_UP || + (sata_device->satadev_type == SATA_DTYPE_NONE)) { + cportinfo->cport_dev_type = SATA_DTYPE_NONE; + mutex_exit(&cportinfo->cport_mutex); + sata_free_pmult(sata_hba_inst, sata_device); + sata_log(sata_hba_inst, CE_WARN, + "SATA port multiplier detached at port %d", cport); + return (SATA_SUCCESS); + } + + /* + * Device changed: PMult -> Non-PMult + * + * This situation is uncommon, most possibly being caused by errors + * after which the port multiplier is not correct initialized and + * recognized. In that case the new device will be marked as unknown + * and will not be automatically probed in this routine. Instead + * system administrator could manually restart it via cfgadm(1M). + */ + if (sata_device->satadev_type != SATA_DTYPE_PMULT) { + cportinfo->cport_dev_type = SATA_DTYPE_UNKNOWN; + mutex_exit(&cportinfo->cport_mutex); + sata_free_pmult(sata_hba_inst, sata_device); + sata_log(sata_hba_inst, CE_WARN, + "SATA port multiplier detached at port %d", cport); + return (SATA_FAILURE); + } + + /* + * Now we know it is a port multiplier. However, if this is not the + * previously attached port multiplier - they may have different + * pmport numbers - we need to re-allocate data structures for every + * pmport and drive. + * + * Port multipliers of the same model have identical values in these + * registers, so it is still necessary to update the information of + * all drives attached to the previous port multiplier afterwards. + */ + if ((sata_device->satadev_gscr.gscr0 != pmultinfo->pmult_gscr.gscr0) || + (sata_device->satadev_gscr.gscr1 != pmultinfo->pmult_gscr.gscr1) || + (sata_device->satadev_gscr.gscr2 != pmultinfo->pmult_gscr.gscr2)) { + + /* Device changed: PMult -> another PMult */ + mutex_exit(&cportinfo->cport_mutex); + sata_free_pmult(sata_hba_inst, sata_device); + sata_alloc_pmult(sata_hba_inst, sata_device); + mutex_enter(&cportinfo->cport_mutex); + + SATADBG1(SATA_DBG_PMULT, sata_hba_inst, + "SATA port multiplier [changed] at port %d", cport); + sata_log(sata_hba_inst, CE_WARN, + "SATA port multiplier detected at port %d", cport); + } + + /* + * Mark all the port multiplier port behind the port + * multiplier behind with link events, so that the sata daemon + * will update their status. + */ + pmultinfo->pmult_event_flags |= SATA_EVNT_DEVICE_RESET; + mutex_exit(&cportinfo->cport_mutex); + + return (SATA_SUCCESS); +} + +/* + * Re-probe a port multiplier port, check for a device and attach info + * structures when necessary. Identify Device data is fetched, if possible. + * Assumption: sata address is already validated as port multiplier port. + * SATA_SUCCESS is returned if port is re-probed sucessfully, regardless of + * the presence of a device and its type. + * + * flag arg specifies that the function should try multiple times to identify + * device type and to initialize it, or it should return immediately on failure. + * SATA_DEV_IDENTIFY_RETRY - retry + * SATA_DEV_IDENTIFY_NORETRY - no retry + * + * SATA_FAILURE is returned if one of the operations failed. + * + * This function cannot be called in interrupt context - it may sleep. + * + * NOTE: Should be only called by sata_probe_port() in case target port is a + * port multiplier port. + * NOTE: No Mutex should be hold. + */ +static int +sata_reprobe_pmport(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device, + int flag) +{ + sata_cport_info_t *cportinfo = NULL; + sata_pmport_info_t *pmportinfo = NULL; + sata_drive_info_t *sdinfo, *osdinfo; + sata_device_t sdevice; + boolean_t init_device = B_FALSE; + int prev_device_type = SATA_DTYPE_NONE; + int prev_device_settings = 0; + int prev_device_state = 0; + clock_t start_time; + uint8_t cport = sata_device->satadev_addr.cport; + uint8_t pmport = sata_device->satadev_addr.pmport; + int rval; + + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + osdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); + + if (osdinfo != NULL) { + /* + * We are re-probing port with a previously attached device. + * Save previous device type and settings. + */ + prev_device_type = pmportinfo->pmport_dev_type; + prev_device_settings = osdinfo->satadrv_settings; + prev_device_state = osdinfo->satadrv_state; + } + + start_time = ddi_get_lbolt(); + + /* check parent status */ + mutex_enter(&cportinfo->cport_mutex); + if ((cportinfo->cport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) || + (cportinfo->cport_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) != + SATA_PORT_DEVLINK_UP) { + mutex_exit(&cportinfo->cport_mutex); + return (SATA_FAILURE); + } + mutex_exit(&cportinfo->cport_mutex); + +retry_probe_pmport: + + /* probe port */ + mutex_enter(&pmportinfo->pmport_mutex); + pmportinfo->pmport_state &= ~SATA_PORT_STATE_CLEAR_MASK; + pmportinfo->pmport_state |= SATA_STATE_PROBING; + mutex_exit(&pmportinfo->pmport_mutex); + + rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), sata_device); + + /* might need retry because we cannot touch registers. */ + if (rval == SATA_FAILURE) { + mutex_enter(&pmportinfo->pmport_mutex); + pmportinfo->pmport_state = SATA_PSTATE_FAILED; + mutex_exit(&pmportinfo->pmport_mutex); + SATA_LOG_D((sata_hba_inst, CE_WARN, "sata_reprobe_pmport: " + "SATA port %d:%d probing failed", + cport, pmport)); + return (SATA_FAILURE); + } else if (rval == SATA_RETRY) { + SATA_LOG_D((sata_hba_inst, CE_WARN, "sata_reprobe_pmport: " + "SATA port %d:%d probing failed, retrying...", + cport, pmport)); + clock_t cur_time = ddi_get_lbolt(); + /* + * A device was not successfully identified or initialized. + * Track retry time for device identification. + */ + if ((cur_time - start_time) < + drv_usectohz(SATA_DEV_REPROBE_TIMEOUT)) { + /* sleep for a while */ + delay(drv_usectohz(SATA_DEV_RETRY_DLY)); + goto retry_probe_pmport; + } else { + mutex_enter(&pmportinfo->pmport_mutex); + if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) + SATA_PMPORTINFO_DRV_INFO(pmportinfo)-> + satadrv_state = SATA_DSTATE_FAILED; + mutex_exit(&pmportinfo->pmport_mutex); + return (SATA_SUCCESS); + } + } + + /* + * Sanity check - Controller port is active? Is the link active? + * Is it still a port multiplier? + */ + if ((cportinfo->cport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) || + (cportinfo->cport_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) != + SATA_PORT_DEVLINK_UP || + (cportinfo->cport_dev_type != SATA_DTYPE_PMULT)) { + /* + * Port in non-usable state or no link active/no + * device. Free info structure. + */ + cportinfo->cport_dev_type = SATA_DTYPE_UNKNOWN; + + sdevice.satadev_addr.cport = cport; + sdevice.satadev_addr.pmport = pmport; + sdevice.satadev_addr.qual = SATA_ADDR_PMULT; + mutex_exit(&cportinfo->cport_mutex); + + sata_free_pmult(sata_hba_inst, &sdevice); + return (SATA_FAILURE); + } + + /* SATA_SUCCESS NOW */ + /* + * update sata port state and set device type + */ + mutex_enter(&pmportinfo->pmport_mutex); + sata_update_pmport_info(sata_hba_inst, sata_device); + pmportinfo->pmport_state &= ~SATA_STATE_PROBING; + + /* + * Sanity check - Port is active? Is the link active? + * Is there any device attached? + */ + if ((pmportinfo->pmport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) || + (pmportinfo->pmport_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) != + SATA_PORT_DEVLINK_UP) { + /* + * Port in non-usable state or no link active/no device. + * Free info structure if necessary (direct attached drive + * only, for now! + */ + sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); + SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL; + /* Add here differentiation for device attached or not */ + pmportinfo->pmport_dev_type = SATA_DTYPE_NONE; + mutex_exit(&pmportinfo->pmport_mutex); + if (sdinfo != NULL) + kmem_free(sdinfo, sizeof (sata_drive_info_t)); + return (SATA_SUCCESS); + } + + pmportinfo->pmport_state |= SATA_STATE_READY; + pmportinfo->pmport_dev_type = sata_device->satadev_type; + sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); + + /* + * If we are re-probing the port, there may be + * sata_drive_info structure attached + * (or sata_pm_info, if PMult is supported). + */ + if (sata_device->satadev_type == SATA_DTYPE_NONE) { + /* + * There is no device, so remove device info structure, + * if necessary. + */ + SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL; + pmportinfo->pmport_dev_type = SATA_DTYPE_NONE; + if (sdinfo != NULL) { + kmem_free(sdinfo, sizeof (sata_drive_info_t)); + sata_log(sata_hba_inst, CE_WARN, + "SATA device detached from port %d:%d", + cport, pmport); + } + mutex_exit(&pmportinfo->pmport_mutex); + return (SATA_SUCCESS); + } + + /* this should not be a pmult */ + ASSERT(sata_device->satadev_type != SATA_DTYPE_PMULT); + if (sdinfo == NULL) { + /* + * There is some device attached, but there is + * no sata_drive_info structure - allocate one + */ + mutex_exit(&pmportinfo->pmport_mutex); + sdinfo = kmem_zalloc(sizeof (sata_drive_info_t), + KM_SLEEP); + mutex_enter(&pmportinfo->pmport_mutex); + /* + * Recheck, that the port state did not change when we + * released mutex. + */ + if (pmportinfo->pmport_state & SATA_STATE_READY) { + SATA_PMPORTINFO_DRV_INFO(pmportinfo) = sdinfo; + sdinfo->satadrv_addr = pmportinfo->pmport_addr; + sdinfo->satadrv_addr.qual = SATA_ADDR_DPMPORT; + sdinfo->satadrv_type = SATA_DTYPE_UNKNOWN; + sdinfo->satadrv_state = SATA_STATE_UNKNOWN; + } else { + /* + * Port is not in ready state, we + * cannot attach a device. + */ + mutex_exit(&pmportinfo->pmport_mutex); + kmem_free(sdinfo, sizeof (sata_drive_info_t)); + return (SATA_SUCCESS); + } + /* + * Since we are adding device, presumably new one, + * indicate that it should be initalized, + * as well as some internal framework states). + */ + init_device = B_TRUE; + } + + pmportinfo->pmport_dev_type = SATA_DTYPE_UNKNOWN; + sata_device->satadev_addr.qual = sdinfo->satadrv_addr.qual; + + mutex_exit(&pmportinfo->pmport_mutex); + /* + * Figure out what kind of device we are really + * dealing with. + */ + rval = sata_probe_device(sata_hba_inst, sata_device); + + mutex_enter(&pmportinfo->pmport_mutex); + if (rval == SATA_SUCCESS) { + /* + * If we are dealing with the same type of a device as before, + * restore its settings flags. + */ + if (osdinfo != NULL && + sata_device->satadev_type == prev_device_type) + sdinfo->satadrv_settings = prev_device_settings; + + mutex_exit(&pmportinfo->pmport_mutex); + /* Set initial device features, if necessary */ + if (init_device == B_TRUE) { + rval = sata_initialize_device(sata_hba_inst, sdinfo); + } + if (rval == SATA_SUCCESS) + return (rval); + } else { + /* + * If there was some device info before we probe the device, + * restore previous device setting, so we can retry from scratch + * later. Providing, of course, that device has not disappeared + * during probing process. + */ + if (sata_device->satadev_type != SATA_DTYPE_NONE) { + if (osdinfo != NULL) { + pmportinfo->pmport_dev_type = prev_device_type; + sdinfo->satadrv_type = prev_device_type; + sdinfo->satadrv_state = prev_device_state; + } + } else { + /* device is gone */ + kmem_free(sdinfo, sizeof (sata_drive_info_t)); + pmportinfo->pmport_dev_type = SATA_DTYPE_NONE; + SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL; + mutex_exit(&pmportinfo->pmport_mutex); + return (SATA_SUCCESS); + } + mutex_exit(&pmportinfo->pmport_mutex); + } + + if (flag == SATA_DEV_IDENTIFY_RETRY) { + clock_t cur_time = ddi_get_lbolt(); + /* + * A device was not successfully identified or initialized. + * Track retry time for device identification. + */ + if ((cur_time - start_time) < + drv_usectohz(SATA_DEV_REPROBE_TIMEOUT)) { + /* sleep for a while */ + delay(drv_usectohz(SATA_DEV_RETRY_DLY)); + goto retry_probe_pmport; + } else { + mutex_enter(&pmportinfo->pmport_mutex); + if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) + SATA_PMPORTINFO_DRV_INFO(pmportinfo)-> + satadrv_state = SATA_DSTATE_FAILED; + mutex_exit(&pmportinfo->pmport_mutex); + } + } + return (SATA_SUCCESS); +} + +/* + * Allocated related structure for a port multiplier and its device ports + * + * Port multiplier should be ready and probed, and related information like + * the number of the device ports should be store in sata_device_t. + * + * NOTE: No Mutex should be hold. + */ +static void +sata_alloc_pmult(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device) +{ + dev_info_t *dip = SATA_DIP(sata_hba_inst); + sata_cport_info_t *cportinfo = NULL; + sata_pmult_info_t *pmultinfo = NULL; + sata_pmport_info_t *pmportinfo = NULL; + dev_t minor_number; + char name[16]; + uint8_t cport = sata_device->satadev_addr.cport; + int npmport; + + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + + /* This function might be called while a port-mult is hot-plugged. */ + mutex_enter(&cportinfo->cport_mutex); + + /* dev_type's not updated when get called from sata_reprobe_port() */ + cportinfo->cport_dev_type = SATA_DTYPE_PMULT; + if (SATA_CPORTINFO_PMULT_INFO(cportinfo) == NULL) { + /* Create a pmult_info structure */ + SATA_CPORTINFO_PMULT_INFO(cportinfo) = + kmem_zalloc(sizeof (sata_pmult_info_t), KM_SLEEP); + } + pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo); + + pmultinfo->pmult_addr = sata_device->satadev_addr; + pmultinfo->pmult_addr.qual = SATA_ADDR_PMULT; + pmultinfo->pmult_state = SATA_STATE_PROBING; + pmultinfo->pmult_gscr = sata_device->satadev_gscr; + pmultinfo->pmult_num_dev_ports = sata_device->satadev_add_info; + + /* Initialize pmport_info structure */ + for (npmport = 0; npmport < pmultinfo->pmult_num_dev_ports; + npmport++) { + + /* if everything is allocated, skip */ + if (SATA_PMPORT_INFO(sata_hba_inst, cport, npmport) != NULL) + continue; + + pmportinfo = kmem_zalloc(sizeof (sata_pmport_info_t), KM_SLEEP); + mutex_init(&pmportinfo->pmport_mutex, NULL, MUTEX_DRIVER, NULL); + mutex_exit(&cportinfo->cport_mutex); + + mutex_enter(&pmportinfo->pmport_mutex); + pmportinfo->pmport_addr.cport = cport; + pmportinfo->pmport_addr.pmport = (uint8_t)npmport; + pmportinfo->pmport_addr.qual = SATA_ADDR_PMPORT; + pmportinfo->pmport_state &= ~SATA_PORT_STATE_CLEAR_MASK; + mutex_exit(&pmportinfo->pmport_mutex); + + mutex_enter(&cportinfo->cport_mutex); + SATA_PMPORT_INFO(sata_hba_inst, cport, npmport) = pmportinfo; + + /* Create an attachment point */ + minor_number = SATA_MAKE_AP_MINOR(ddi_get_instance(dip), + cport, (uint8_t)npmport, SATA_ADDR_PMPORT); + (void) sprintf(name, "%d.%d", cport, npmport); + + if (ddi_create_minor_node(dip, name, S_IFCHR, minor_number, + DDI_NT_SATA_ATTACHMENT_POINT, 0) != DDI_SUCCESS) { + sata_log(sata_hba_inst, CE_WARN, "sata_hba_attach: " + "cannot create SATA attachment point for " + "port %d:%d", cport, npmport); + } + } + + pmultinfo->pmult_state &= ~SATA_STATE_PROBING; + pmultinfo->pmult_state |= (SATA_STATE_PROBED|SATA_STATE_READY); + + mutex_exit(&cportinfo->cport_mutex); +} + +/* + * Free data structures when a port multiplier is removed. + * + * NOTE: No Mutex should be hold. + */ +static void +sata_free_pmult(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device) +{ + sata_cport_info_t *cportinfo; + sata_pmult_info_t *pmultinfo; + sata_pmport_info_t *pmportinfo; + sata_device_t pmport_device; + sata_drive_info_t *sdinfo; + dev_info_t *tdip; + char name[16]; + uint8_t cport = sata_device->satadev_addr.cport; + int npmport; + + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + + /* This function might be called while port-mult is hot plugged. */ + mutex_enter(&cportinfo->cport_mutex); + + pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo); + ASSERT(pmultinfo != NULL); + + /* Free pmport_info structure */ + for (npmport = 0; npmport < pmultinfo->pmult_num_dev_ports; + npmport++) { + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, npmport); + if (pmportinfo == NULL) + continue; + mutex_exit(&cportinfo->cport_mutex); + + mutex_enter(&pmportinfo->pmport_mutex); + sdinfo = pmportinfo->pmport_sata_drive; + SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL; + mutex_exit(&pmportinfo->pmport_mutex); + + /* Remove attachment point. */ + name[0] = '\0'; + (void) sprintf(name, "%d.%d", cport, npmport); + ddi_remove_minor_node(SATA_DIP(sata_hba_inst), name); + sata_log(sata_hba_inst, CE_NOTE, + "Remove attachment point of port %d:%d", + cport, npmport); + + /* + * Rumove target node + */ + bzero(&pmport_device, sizeof (sata_device_t)); + pmport_device.satadev_rev = SATA_DEVICE_REV; + pmport_device.satadev_addr.cport = cport; + pmport_device.satadev_addr.pmport = (uint8_t)npmport; + pmport_device.satadev_addr.qual = SATA_ADDR_DPMPORT; + + tdip = sata_get_scsi_target_dip(SATA_DIP(sata_hba_inst), + &(pmport_device.satadev_addr)); + if (tdip != NULL && ndi_devi_offline(tdip, + NDI_DEVI_REMOVE) != NDI_SUCCESS) { + /* + * Problem : + * The target node remained attached. + * This happens when the device file was open + * or a node was waiting for resources. + * Cannot do anything about it. + */ + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_free_pmult: could not unconfigure device " + "before disconnecting the SATA port %d:%d", + cport, npmport)); + + /* + * Set DEVICE REMOVED state in the target + * node. It will prevent access to the device + * even when a new device is attached, until + * the old target node is released, removed and + * recreated for a new device. + */ + sata_set_device_removed(tdip); + + /* + * Instruct event daemon to try the target + * node cleanup later. + */ + sata_set_target_node_cleanup( + sata_hba_inst, &(pmport_device.satadev_addr)); + + } + mutex_enter(&cportinfo->cport_mutex); + + /* + * Add here differentiation for device attached or not + */ + if (sdinfo != NULL) { + sata_log(sata_hba_inst, CE_WARN, + "SATA device detached from port %d:%d", + cport, npmport); + kmem_free(sdinfo, sizeof (sata_drive_info_t)); + } + + mutex_destroy(&pmportinfo->pmport_mutex); + kmem_free(pmportinfo, sizeof (sata_pmport_info_t)); + } + + kmem_free(pmultinfo, sizeof (sata_pmult_info_t)); + + cportinfo->cport_devp.cport_sata_pmult = NULL; + cportinfo->cport_dev_type = SATA_DTYPE_NONE; + + sata_log(sata_hba_inst, CE_WARN, + "SATA port multiplier detached at port %d", cport); + + mutex_exit(&cportinfo->cport_mutex); +} + +/* * Initialize device * Specified device is initialized to a default state. * @@ -9826,6 +10886,8 @@ * returns 1 if address is valid but device is not attached, * returns -1 if bad address or device is of an unsupported type. * Upon return sata_device argument is set. + * + * Port multiplier is supported now. */ static int sata_validate_scsi_address(sata_hba_inst_t *sata_hba_inst, @@ -9859,8 +10921,7 @@ cportinfo->cport_dev_type == SATA_DTYPE_NONE) goto out; - if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT || - (cportinfo->cport_dev_type & + if ((cportinfo->cport_dev_type & SATA_VALID_DEV_TYPE) == 0) { rval = -1; goto out; @@ -9965,6 +11026,7 @@ static int sata_probe_device(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device) { + sata_pmport_info_t *pmportinfo; sata_drive_info_t *sdinfo; sata_drive_info_t new_sdinfo; /* local drive info struct */ int rval; @@ -9978,6 +11040,13 @@ mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, sata_device->satadev_addr.cport))); + if (sata_device->satadev_addr.qual == SATA_ADDR_DPMPORT) { + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, + sata_device->satadev_addr.cport, + sata_device->satadev_addr.pmport); + ASSERT(pmportinfo != NULL); + } + /* Get pointer to port-linked sata device info structure */ sdinfo = sata_get_device_info(sata_hba_inst, sata_device); if (sdinfo != NULL) { @@ -10041,11 +11110,14 @@ SATA_CPORT_DEV_TYPE(sata_hba_inst, sata_device->satadev_addr.cport) = sdinfo->satadrv_type; - else /* SATA_ADDR_DPMPORT */ + else { /* SATA_ADDR_DPMPORT */ + mutex_enter(&pmportinfo->pmport_mutex); SATA_PMPORT_DEV_TYPE(sata_hba_inst, sata_device->satadev_addr.cport, sata_device->satadev_addr.pmport) = sdinfo->satadrv_type; + mutex_exit(&pmportinfo->pmport_mutex); + } mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, sata_device->satadev_addr.cport))); return (SATA_SUCCESS); @@ -10069,6 +11141,7 @@ SATA_DTYPE_UNKNOWN; else { /* SATA_ADDR_DPMPORT */ + mutex_enter(&pmportinfo->pmport_mutex); if ((SATA_PMULT_INFO(sata_hba_inst, sata_device->satadev_addr.cport) != NULL) && (SATA_PMPORT_INFO(sata_hba_inst, @@ -10078,6 +11151,7 @@ sata_device->satadev_addr.cport, sata_device->satadev_addr.pmport) = SATA_DTYPE_UNKNOWN; + mutex_exit(&pmportinfo->pmport_mutex); } } mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, @@ -10085,7 +11159,6 @@ return (rval); } - /* * Get pointer to sata_drive_info structure. * @@ -10132,6 +11205,11 @@ if (pmport > SATA_NUM_PMPORTS(sata_hba_inst, cport)) return (NULL); + if (!(SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) & + (SATA_STATE_PROBED | SATA_STATE_READY))) + /* Port multiplier port not probed yet */ + return (NULL); + return (SATA_PMPORT_DRV_INFO(sata_hba_inst, cport, pmport)); } @@ -10300,7 +11378,7 @@ cmn_err(CE_CONT, "?\t%s port %d\n", msg_buf, sdinfo->satadrv_addr.cport); else - cmn_err(CE_CONT, "?\t%s port %d pmport %d\n", + cmn_err(CE_CONT, "?\t%s port %d:%d\n", msg_buf, sdinfo->satadrv_addr.cport, sdinfo->satadrv_addr.pmport); @@ -10414,6 +11492,59 @@ } } +/* + * Log/display port multiplier information + */ +static void +sata_show_pmult_info(sata_hba_inst_t *sata_hba_inst, + sata_device_t *sata_device) +{ + _NOTE(ARGUNUSED(sata_hba_inst)) + + char msg_buf[MAXPATHLEN]; + uint32_t gscr0, gscr1, gscr2, gscr64; + + gscr0 = sata_device->satadev_gscr.gscr0; + gscr1 = sata_device->satadev_gscr.gscr1; + gscr2 = sata_device->satadev_gscr.gscr2; + gscr64 = sata_device->satadev_gscr.gscr64; + + cmn_err(CE_CONT, "?Port Multiplier %d device-ports found at port %d", + sata_device->satadev_add_info, sata_device->satadev_addr.cport); + + (void) sprintf(msg_buf, "\tVendor_ID 0x%04x, Module_ID 0x%04x", + gscr0 & 0xffff, (gscr0 >> 16) & 0xffff); + cmn_err(CE_CONT, "?%s", msg_buf); + + (void) strcpy(msg_buf, "\tSupport SATA PMP Spec "); + if (gscr1 & (1 << 3)) + (void) strlcat(msg_buf, "1.2", MAXPATHLEN); + else if (gscr1 & (1 << 2)) + (void) strlcat(msg_buf, "1.1", MAXPATHLEN); + else if (gscr1 & (1 << 1)) + (void) strlcat(msg_buf, "1.0", MAXPATHLEN); + else + (void) strlcat(msg_buf, "unknown", MAXPATHLEN); + cmn_err(CE_CONT, "?%s", msg_buf); + + (void) strcpy(msg_buf, "\tSupport "); + if (gscr64 & (1 << 3)) + (void) strlcat(msg_buf, "Asy-Notif, ", + MAXPATHLEN); + if (gscr64 & (1 << 2)) + (void) strlcat(msg_buf, "Dyn-SSC, ", MAXPATHLEN); + if (gscr64 & (1 << 1)) + (void) strlcat(msg_buf, "Iss-PMREQ, ", MAXPATHLEN); + if (gscr64 & (1 << 0)) + (void) strlcat(msg_buf, "BIST", MAXPATHLEN); + if ((gscr64 & 0xf) == 0) + (void) strlcat(msg_buf, "nothing", MAXPATHLEN); + cmn_err(CE_CONT, "?%s", msg_buf); + + (void) sprintf(msg_buf, "\tNumber of exposed device fan-out ports: %d", + gscr2 & SATA_PMULT_PORTNUM_MASK); + cmn_err(CE_CONT, "?%s", msg_buf); +} /* * sata_save_drive_settings extracts current setting of the device and stores @@ -11128,7 +12259,6 @@ sata_common_free_dma_rsrcs(spx); } - /* * Fetch Device Identify data. * Send DEVICE IDENTIFY or IDENTIFY PACKET DEVICE (depending on a device type) @@ -11631,19 +12761,6 @@ /* - * Update port SCR block - */ -static void -sata_update_port_scr(sata_port_scr_t *port_scr, sata_device_t *device) -{ - port_scr->sstatus = device->satadev_scr.sstatus; - port_scr->serror = device->satadev_scr.serror; - port_scr->scontrol = device->satadev_scr.scontrol; - port_scr->sactive = device->satadev_scr.sactive; - port_scr->snotific = device->satadev_scr.snotific; -} - -/* * Update state and copy port ss* values from passed sata_device structure. * sata_address is validated - if not valid, nothing is changed in sata_scsi * configuration struct. @@ -11655,54 +12772,60 @@ */ static void sata_update_port_info(sata_hba_inst_t *sata_hba_inst, - sata_device_t *sata_device) -{ - ASSERT(mutex_owned(&SATA_CPORT_MUTEX(sata_hba_inst, - sata_device->satadev_addr.cport))); + sata_device_t *sata_device) +{ + sata_cport_info_t *cportinfo; if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT || sata_device->satadev_addr.qual == SATA_ADDR_DCPORT) { - - sata_cport_info_t *cportinfo; - if (SATA_NUM_CPORTS(sata_hba_inst) <= sata_device->satadev_addr.cport) return; cportinfo = SATA_CPORT_INFO(sata_hba_inst, sata_device->satadev_addr.cport); - sata_update_port_scr(&cportinfo->cport_scr, sata_device); + + ASSERT(mutex_owned(&cportinfo->cport_mutex)); + cportinfo->cport_scr = sata_device->satadev_scr; /* Preserve SATA_PSTATE_SHUTDOWN flag */ cportinfo->cport_state &= ~(SATA_PSTATE_PWRON | SATA_PSTATE_PWROFF | SATA_PSTATE_FAILED); cportinfo->cport_state |= sata_device->satadev_state & SATA_PSTATE_VALID; - } else { - sata_pmport_info_t *pmportinfo; - - if ((sata_device->satadev_addr.qual != SATA_ADDR_PMPORT) || - (sata_device->satadev_addr.qual != SATA_ADDR_DPMPORT) || - SATA_NUM_PMPORTS(sata_hba_inst, - sata_device->satadev_addr.cport) < - sata_device->satadev_addr.pmport) - return; - - pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, - sata_device->satadev_addr.cport, - sata_device->satadev_addr.pmport); - sata_update_port_scr(&pmportinfo->pmport_scr, sata_device); - - /* Preserve SATA_PSTATE_SHUTDOWN flag */ - pmportinfo->pmport_state &= - ~(SATA_PSTATE_PWRON | SATA_PSTATE_PWROFF | - SATA_PSTATE_FAILED); - pmportinfo->pmport_state |= - sata_device->satadev_state & SATA_PSTATE_VALID; - } -} - - + } +} + +void +sata_update_pmport_info(sata_hba_inst_t *sata_hba_inst, + sata_device_t *sata_device) +{ + sata_pmport_info_t *pmportinfo; + + if ((sata_device->satadev_addr.qual != SATA_ADDR_PMPORT && + sata_device->satadev_addr.qual != SATA_ADDR_DPMPORT) || + SATA_NUM_PMPORTS(sata_hba_inst, + sata_device->satadev_addr.cport) < + sata_device->satadev_addr.pmport) { + SATADBG1(SATA_DBG_PMULT, sata_hba_inst, + "sata_update_port_info: error address %p.", + &sata_device->satadev_addr); + return; + } + + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, + sata_device->satadev_addr.cport, + sata_device->satadev_addr.pmport); + + ASSERT(mutex_owned(&pmportinfo->pmport_mutex)); + pmportinfo->pmport_scr = sata_device->satadev_scr; + + /* Preserve SATA_PSTATE_SHUTDOWN flag */ + pmportinfo->pmport_state &= + ~(SATA_PSTATE_PWRON | SATA_PSTATE_PWROFF | SATA_PSTATE_FAILED); + pmportinfo->pmport_state |= + sata_device->satadev_state & SATA_PSTATE_VALID; +} /* * Extract SATA port specification from an IOCTL argument. @@ -11710,7 +12833,7 @@ * This function return the port the user land send us as is, unless it * cannot retrieve port spec, then -1 is returned. * - * Note: Only cport - no port multiplier port. + * Support port multiplier. */ static int32_t sata_get_port_num(sata_hba_inst_t *sata_hba_inst, struct devctl_iocdata *dcp) @@ -11736,22 +12859,41 @@ * way as a scsi target number. * At this moment it carries only cport number. * - * No PMult hotplug support. + * PMult hotplug is supported now. * * Returns dev_info_t pointer if target device was found, NULL otherwise. */ static dev_info_t * -sata_get_target_dip(dev_info_t *dip, int32_t port) +sata_get_target_dip(dev_info_t *dip, uint8_t cport, uint8_t pmport) { dev_info_t *cdip = NULL; int target, tgt; - int ncport; int circ; - - ncport = port & SATA_CFGA_CPORT_MASK; - target = SATA_TO_SCSI_TARGET(ncport, 0, SATA_ADDR_DCPORT); - + uint8_t qual; + + sata_hba_inst_t *sata_hba_inst; + scsi_hba_tran_t *scsi_hba_tran; + + /* Get target id */ + scsi_hba_tran = ddi_get_driver_private(dip); + if (scsi_hba_tran == NULL) + return (NULL); + + sata_hba_inst = scsi_hba_tran->tran_hba_private; + + if (sata_hba_inst == NULL) + return (NULL); + + /* Identify a port-mult by cport_info.cport_dev_type */ + if (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) == SATA_DTYPE_PMULT) + qual = SATA_ADDR_DPMPORT; + else + qual = SATA_ADDR_DCPORT; + + target = SATA_TO_SCSI_TARGET(cport, pmport, qual); + + /* Retrieve target dip */ ndi_devi_enter(dip, &circ); for (cdip = ddi_get_child(dip); cdip != NULL; ) { dev_info_t *next = ddi_get_next_sibling(cdip); @@ -11783,9 +12925,6 @@ * the AP - it is not a sata_address. * It is a combination of cport, pmport and address qualifier, encoded same * way as a scsi target number. - * At this moment it carries only cport number. - * - * No PMult hotplug support. * * Returns dev_info_t pointer if target device was found, NULL otherwise. */ @@ -11843,27 +12982,32 @@ * Failure of the port_deactivate may keep port in the physically active state, * or may fail the port. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier is supported. */ static int sata_ioctl_disconnect(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device) { - sata_drive_info_t *sdinfo = NULL; + sata_drive_info_t *sdinfo = NULL, *subsdinfo = NULL; sata_cport_info_t *cportinfo = NULL; sata_pmport_info_t *pmportinfo = NULL; sata_pmult_info_t *pmultinfo = NULL; - dev_info_t *tdip; + sata_device_t subsdevice; int cport, pmport, qual; int rval = SATA_SUCCESS; + int npmport = 0; int rv = 0; cport = sata_device->satadev_addr.cport; pmport = sata_device->satadev_addr.pmport; qual = sata_device->satadev_addr.qual; - ASSERT(qual == SATA_ADDR_CPORT || qual == SATA_ADDR_PMPORT); + ASSERT(qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_DPMPORT); + if (qual == SATA_ADDR_DCPORT) + qual = SATA_ADDR_CPORT; + else + qual = SATA_ADDR_PMPORT; /* * DEVCTL_AP_DISCONNECT invokes sata_hba_inst->satahba_tran-> @@ -11878,131 +13022,184 @@ /* Check the current state of the port */ rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) (SATA_DIP(sata_hba_inst), sata_device); - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - sata_update_port_info(sata_hba_inst, sata_device); - if (rval != SATA_SUCCESS || - (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) { - /* Device port status is unknown or it is in failed state */ - if (qual == SATA_ADDR_PMPORT) { - SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) = + + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + + /* + * Processing port mulitiplier + */ + if (qual == SATA_ADDR_CPORT && + SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) == SATA_DTYPE_PMULT) { + mutex_enter(&cportinfo->cport_mutex); + + /* Check controller port status */ + sata_update_port_info(sata_hba_inst, sata_device); + if (rval != SATA_SUCCESS || + (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) { + /* + * Device port status is unknown or it is in failed + * state + */ + SATA_CPORT_STATE(sata_hba_inst, cport) = SATA_PSTATE_FAILED; SATADBG1(SATA_DBG_IOCTL_IF, sata_hba_inst, "sata_hba_ioctl: connect: failed to deactivate " "SATA port %d", cport); - } else { - SATA_CPORT_STATE(sata_hba_inst, cport) = + mutex_exit(&cportinfo->cport_mutex); + return (EIO); + } + + /* Disconnect all sub-devices. */ + pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo); + if (pmultinfo != NULL) { + + for (npmport = 0; npmport < SATA_NUM_PMPORTS( + sata_hba_inst, cport); npmport ++) { + subsdinfo = SATA_PMPORT_DRV_INFO( + sata_hba_inst, cport, npmport); + if (subsdinfo == NULL) + continue; + + subsdevice.satadev_addr = subsdinfo-> + satadrv_addr; + + mutex_exit(&cportinfo->cport_mutex); + if (sata_ioctl_disconnect(sata_hba_inst, + &subsdevice) == SATA_SUCCESS) { + SATADBG2(SATA_DBG_PMULT, sata_hba_inst, + "[Remove] device at port %d:%d " + "successfully.", cport, npmport); + } + mutex_enter(&cportinfo->cport_mutex); + } + } + + /* Disconnect the port multiplier */ + cportinfo->cport_state &= ~SATA_STATE_READY; + mutex_exit(&cportinfo->cport_mutex); + + sata_device->satadev_addr.qual = qual; + rval = (*SATA_PORT_DEACTIVATE_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), sata_device); + + sata_gen_sysevent(sata_hba_inst, &sata_device->satadev_addr, + SE_NO_HINT); + + mutex_enter(&cportinfo->cport_mutex); + sata_update_port_info(sata_hba_inst, sata_device); + if (rval != SATA_SUCCESS && + sata_device->satadev_state & SATA_PSTATE_FAILED) { + cportinfo->cport_state = SATA_PSTATE_FAILED; + rv = EIO; + } else { + cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN; + } + mutex_exit(&cportinfo->cport_mutex); + + return (rv); + } + + /* + * Process non-port-multiplier device - it could be a drive connected + * to a port multiplier port or a controller port. + */ + if (qual == SATA_ADDR_PMPORT) { + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + mutex_enter(&pmportinfo->pmport_mutex); + sata_update_pmport_info(sata_hba_inst, sata_device); + if (rval != SATA_SUCCESS || + (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) { + SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) = SATA_PSTATE_FAILED; SATADBG2(SATA_DBG_IOCTL_IF, sata_hba_inst, "sata_hba_ioctl: connect: failed to deactivate " "SATA port %d:%d", cport, pmport); - } - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, - cport)->cport_mutex); - return (EIO); - } - /* - * Set port's dev_state to not ready - this will disable - * an access to a potentially attached device. - */ - cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); - if (qual == SATA_ADDR_PMPORT) { - pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + mutex_exit(&pmportinfo->pmport_mutex); + return (EIO); + } + if (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE) { sdinfo = pmportinfo->pmport_sata_drive; ASSERT(sdinfo != NULL); } + + /* + * Set port's dev_state to not ready - this will disable + * an access to a potentially attached device. + */ pmportinfo->pmport_state &= ~SATA_STATE_READY; - } else { - /* Assuming cport */ - - if (cportinfo->cport_dev_type != SATA_DTYPE_NONE) { - if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) { - pmultinfo = - cportinfo->cport_devp.cport_sata_pmult; - ASSERT(pmultinfo != NULL); - } else { - sdinfo = cportinfo->cport_devp.cport_sata_drive; - } + + /* Remove and release sata_drive info structure. */ + if (sdinfo != NULL) { + if ((sdinfo->satadrv_type & + SATA_VALID_DEV_TYPE) != 0) { + /* + * If a target node exists, try to offline + * a device and remove target node. + */ + mutex_exit(&pmportinfo->pmport_mutex); + (void) sata_offline_device(sata_hba_inst, + sata_device, sdinfo); + mutex_enter(&pmportinfo->pmport_mutex); + } + + SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL; + pmportinfo->pmport_dev_type = SATA_DTYPE_NONE; + (void) kmem_free((void *)sdinfo, + sizeof (sata_drive_info_t)); + } + mutex_exit(&pmportinfo->pmport_mutex); + + } else if (qual == SATA_ADDR_CPORT) { + mutex_enter(&cportinfo->cport_mutex); + sata_update_port_info(sata_hba_inst, sata_device); + if (rval != SATA_SUCCESS || + (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) { + /* + * Device port status is unknown or it is in failed + * state + */ + SATA_CPORT_STATE(sata_hba_inst, cport) = + SATA_PSTATE_FAILED; + SATADBG1(SATA_DBG_IOCTL_IF, sata_hba_inst, + "sata_hba_ioctl: connect: failed to deactivate " + "SATA port %d", cport); + mutex_exit(&cportinfo->cport_mutex); + return (EIO); + } + + if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) { + pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo); + ASSERT(pmultinfo != NULL); + } else if (cportinfo->cport_dev_type != SATA_DTYPE_NONE) { + sdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo); + ASSERT(sdinfo != NULL); } cportinfo->cport_state &= ~SATA_STATE_READY; - } - if (sdinfo != NULL) { - if ((sdinfo->satadrv_type & (SATA_VALID_DEV_TYPE)) != 0) { - /* - * If a target node exists, try to offline - * a device and remove target node. - */ - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, - cport)->cport_mutex); - /* We are addressing attached device, not a port */ - sata_device->satadev_addr.qual = - sdinfo->satadrv_addr.qual; - tdip = sata_get_scsi_target_dip(SATA_DIP(sata_hba_inst), - &sata_device->satadev_addr); - if (tdip != NULL && ndi_devi_offline(tdip, - NDI_DEVI_REMOVE) != NDI_SUCCESS) { - /* - * Problem - * The target node remained attached. - * This happens when the device file was open - * or a node was waiting for resources. - * Cannot do anything about it. - */ - if (qual == SATA_ADDR_CPORT) { - SATA_LOG_D((sata_hba_inst, CE_WARN, - "sata_hba_ioctl: disconnect: could " - "not unconfigure device before " - "disconnecting the SATA port %d", - cport)); - } else { - SATA_LOG_D((sata_hba_inst, CE_WARN, - "sata_hba_ioctl: disconnect: could " - "not unconfigure device before " - "disconnecting the SATA port %d:%d", - cport, pmport)); - } - /* - * Set DEVICE REMOVED state in the target - * node. It will prevent access to the device - * even when a new device is attached, until - * the old target node is released, removed and - * recreated for a new device. - */ - sata_set_device_removed(tdip); - - /* - * Instruct event daemon to try the target - * node cleanup later. - */ - sata_set_target_node_cleanup( - sata_hba_inst, &sata_device->satadev_addr); - } - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, - cport)->cport_mutex); - } - - /* Remove and release sata_drive info structure. */ - if (pmportinfo != NULL) { - SATA_PMPORT_DRV_INFO(sata_hba_inst, cport, pmport) = - NULL; - pmportinfo->pmport_dev_type = SATA_DTYPE_NONE; - } else { + + if (sdinfo != NULL) { + if ((sdinfo->satadrv_type & + SATA_VALID_DEV_TYPE) != 0) { + /* + * If a target node exists, try to offline + * a device and remove target node. + */ + mutex_exit(&cportinfo->cport_mutex); + (void) sata_offline_device(sata_hba_inst, + sata_device, sdinfo); + mutex_enter(&cportinfo->cport_mutex); + } + SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL; cportinfo->cport_dev_type = SATA_DTYPE_NONE; - } - (void) kmem_free((void *)sdinfo, sizeof (sata_drive_info_t)); - } -#if 0 - else if (pmultinfo != NULL) { - /* - * Port Multiplier itself needs special handling. - * All device ports need to be processed here! - */ - } -#endif - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); + (void) kmem_free((void *)sdinfo, + sizeof (sata_drive_info_t)); + } + mutex_exit(&cportinfo->cport_mutex); + } + /* Just ask HBA driver to deactivate port */ - /* sata_device->satadev_addr.qual = SATA_ADDR_DCPORT; */ + sata_device->satadev_addr.qual = qual; rval = (*SATA_PORT_DEACTIVATE_FUNC(sata_hba_inst)) (SATA_DIP(sata_hba_inst), sata_device); @@ -12014,33 +13211,46 @@ sata_gen_sysevent(sata_hba_inst, &sata_device->satadev_addr, SE_NO_HINT); - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - sata_update_port_info(sata_hba_inst, sata_device); - - if (rval != SATA_SUCCESS) { - /* - * Port deactivation failure - do not - * change port state unless the state - * returned by HBA indicates a port failure. - * NOTE: device structures were released, so devices now are - * invisible! Port reset is needed to re-enumerate devices. - */ - if (sata_device->satadev_state & SATA_PSTATE_FAILED) { - if (pmportinfo != NULL) - pmportinfo->pmport_state = SATA_PSTATE_FAILED; - else - cportinfo->cport_state = SATA_PSTATE_FAILED; + if (qual == SATA_ADDR_PMPORT) { + mutex_enter(&pmportinfo->pmport_mutex); + sata_update_pmport_info(sata_hba_inst, sata_device); + + if (rval != SATA_SUCCESS && + sata_device->satadev_state & SATA_PSTATE_FAILED) { + /* + * Port deactivation failure - do not change port + * state unless the state returned by HBA indicates a + * port failure. + * + * NOTE: device structures were released, so devices + * now are invisible! Port reset is needed to + * re-enumerate devices. + */ + pmportinfo->pmport_state = SATA_PSTATE_FAILED; rv = EIO; - } - } else { - /* - * Deactivation succeded. From now on the sata framework - * will not care what is happening to the device, until - * the port is activated again. - */ - cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN; - } - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); + } else { + /* + * Deactivation succeded. From now on the sata framework + * will not care what is happening to the device, until + * the port is activated again. + */ + pmportinfo->pmport_state |= SATA_PSTATE_SHUTDOWN; + } + mutex_exit(&pmportinfo->pmport_mutex); + } else if (qual == SATA_ADDR_CPORT) { + mutex_enter(&cportinfo->cport_mutex); + sata_update_port_info(sata_hba_inst, sata_device); + + if (rval != SATA_SUCCESS && + sata_device->satadev_state & SATA_PSTATE_FAILED) { + cportinfo->cport_state = SATA_PSTATE_FAILED; + rv = EIO; + } else { + cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN; + } + mutex_exit(&cportinfo->cport_mutex); + } + return (rv); } @@ -12066,21 +13276,29 @@ * This operation may remove port failed state and will * try to make port active and in good standing. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier is supported. */ static int sata_ioctl_connect(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device) { - int cport, pmport, qual; + sata_pmport_info_t *pmportinfo = NULL; + uint8_t cport, pmport, qual; int rv = 0; cport = sata_device->satadev_addr.cport; pmport = sata_device->satadev_addr.pmport; qual = sata_device->satadev_addr.qual; - ASSERT(qual == SATA_ADDR_CPORT || qual == SATA_ADDR_PMPORT); + ASSERT(qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_DPMPORT); + if (qual == SATA_ADDR_DCPORT) + qual = SATA_ADDR_CPORT; + else + qual = SATA_ADDR_PMPORT; + + if (qual == SATA_ADDR_PMPORT) + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); /* * DEVCTL_AP_CONNECT would invoke sata_hba_inst-> @@ -12098,40 +13316,46 @@ /* * Port activation failure. */ - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, - cport)->cport_mutex); - sata_update_port_info(sata_hba_inst, sata_device); - if (sata_device->satadev_state & SATA_PSTATE_FAILED) { - if (qual == SATA_ADDR_DCPORT) { + if (qual == SATA_ADDR_CPORT) { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); + sata_update_port_info(sata_hba_inst, sata_device); + if (sata_device->satadev_state & SATA_PSTATE_FAILED) { SATA_CPORT_STATE(sata_hba_inst, cport) = SATA_PSTATE_FAILED; SATADBG1(SATA_DBG_IOCTL_IF, sata_hba_inst, "sata_hba_ioctl: connect: failed to " "activate SATA port %d", cport); - } else { /* port multiplier device port */ + } + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); + } else { /* port multiplier device port */ + mutex_enter(&pmportinfo->pmport_mutex); + sata_update_pmport_info(sata_hba_inst, sata_device); + if (sata_device->satadev_state & SATA_PSTATE_FAILED) { SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) = SATA_PSTATE_FAILED; SATADBG2(SATA_DBG_IOCTL_IF, sata_hba_inst, "sata_hba_ioctl: connect: failed to " "activate SATA port %d:%d", cport, pmport); - - } - } - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, - cport)->cport_mutex); - SATADBG2(SATA_DBG_IOCTL_IF, sata_hba_inst, - "sata_hba_ioctl: connect: failed to activate SATA " - "port %d:%d", cport, pmport); + } + mutex_exit(&pmportinfo->pmport_mutex); + } return (EIO); } /* Virgin port state - will be updated by the port re-probe. */ - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - if (qual == SATA_ADDR_CPORT) + if (qual == SATA_ADDR_CPORT) { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); SATA_CPORT_STATE(sata_hba_inst, cport) = 0; - else /* port multiplier device port */ + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); + } else { /* port multiplier device port */ + mutex_enter(&pmportinfo->pmport_mutex); SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) = 0; - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); + mutex_exit(&pmportinfo->pmport_mutex); + } /* * Probe the port to find its state and attached device. @@ -12154,15 +13378,22 @@ if (sata_device->satadev_type != SATA_DTYPE_NONE) { if (qual == SATA_ADDR_CPORT) { - sata_log(sata_hba_inst, CE_WARN, - "SATA device detected at port %d", cport); - if (sata_device->satadev_type == SATA_DTYPE_UNKNOWN) { + if (sata_device->satadev_type == SATA_DTYPE_PMULT) { + sata_log(sata_hba_inst, CE_WARN, + "SATA port multiplier detected " + "at port %d", cport); + } else { + sata_log(sata_hba_inst, CE_WARN, + "SATA device detected at port %d", cport); + if (sata_device->satadev_type == + SATA_DTYPE_UNKNOWN) { /* * A device was not successfully identified */ sata_log(sata_hba_inst, CE_WARN, "Could not identify SATA " "device at port %d", cport); + } } } else { /* port multiplier device port */ sata_log(sata_hba_inst, CE_WARN, @@ -12250,7 +13481,7 @@ * If target node does not exist, a new target node is created. In both cases * an attempt is made to online (configure) the device. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier is supported. */ static int sata_ioctl_configure(sata_hba_inst_t *sata_hba_inst, @@ -12271,22 +13502,37 @@ /* Get current port state */ rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) (SATA_DIP(sata_hba_inst), sata_device); - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - sata_update_port_info(sata_hba_inst, sata_device); - - if (rval != SATA_SUCCESS || - (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) { - /* - * Obviously, device on a failed port is not visible - */ - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - return (ENXIO); - } cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); - if (qual == SATA_ADDR_PMPORT) + if (qual == SATA_ADDR_DPMPORT) { pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); + mutex_enter(&pmportinfo->pmport_mutex); + sata_update_pmport_info(sata_hba_inst, sata_device); + if (rval != SATA_SUCCESS || + (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) { + /* + * Obviously, device on a failed port is not visible + */ + mutex_exit(&pmportinfo->pmport_mutex); + return (ENXIO); + } + mutex_exit(&pmportinfo->pmport_mutex); + } else { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); + sata_update_port_info(sata_hba_inst, sata_device); + if (rval != SATA_SUCCESS || + (sata_device->satadev_state & SATA_PSTATE_FAILED) != 0) { + /* + * Obviously, device on a failed port is not visible + */ + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); + return (ENXIO); + } + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); + } if ((sata_device->satadev_state & SATA_PSTATE_SHUTDOWN) != 0) { /* need to activate port */ @@ -12304,40 +13550,52 @@ * unless the state returned by HBA indicates a port * failure. */ - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, - cport)->cport_mutex); - sata_update_port_info(sata_hba_inst, sata_device); - if (sata_device->satadev_state & SATA_PSTATE_FAILED) { - if (qual == SATA_ADDR_PMPORT) + if (qual == SATA_ADDR_DPMPORT) { + mutex_enter(&pmportinfo->pmport_mutex); + sata_update_pmport_info(sata_hba_inst, + sata_device); + if (sata_device->satadev_state & + SATA_PSTATE_FAILED) pmportinfo->pmport_state = SATA_PSTATE_FAILED; - else + mutex_exit(&pmportinfo->pmport_mutex); + } else { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); + sata_update_port_info(sata_hba_inst, + sata_device); + if (sata_device->satadev_state & + SATA_PSTATE_FAILED) cportinfo->cport_state = SATA_PSTATE_FAILED; - } - mutex_exit(&SATA_CPORT_INFO( - sata_hba_inst, cport)->cport_mutex); - SATA_LOG_D((sata_hba_inst, CE_WARN, - "sata_hba_ioctl: configure: " - "failed to activate SATA port %d:%d", - cport, pmport)); - return (EIO); - } - /* - * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE - * without the hint. - */ - sata_gen_sysevent(sata_hba_inst, - &sata_device->satadev_addr, SE_NO_HINT); - - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)-> - cport_mutex); - /* Virgin port state */ - if (qual == SATA_ADDR_PMPORT) - pmportinfo->pmport_state = 0; - else - cportinfo->cport_state = 0; - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); + mutex_exit(&SATA_CPORT_INFO( + sata_hba_inst, cport)->cport_mutex); + } + } + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_ioctl: configure: " + "failed to activate SATA port %d:%d", + cport, pmport)); + return (EIO); + } + /* + * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE + * without the hint. + */ + sata_gen_sysevent(sata_hba_inst, + &sata_device->satadev_addr, SE_NO_HINT); + + /* Virgin port state */ + if (qual == SATA_ADDR_DPMPORT) { + mutex_enter(&pmportinfo->pmport_mutex); + pmportinfo->pmport_state = 0; + mutex_exit(&pmportinfo->pmport_mutex); + } else { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + cport)-> cport_mutex); + cportinfo->cport_state = 0; + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); } /* * Always reprobe port, to get current device info. @@ -12347,7 +13605,7 @@ return (EIO); if (sata_device->satadev_type != SATA_DTYPE_NONE && target == FALSE) { - if (qual == SATA_ADDR_PMPORT) { + if (qual == SATA_ADDR_DPMPORT) { /* * That's the transition from "inactive" port * to active one with device attached. @@ -12379,7 +13637,7 @@ * For now, configure only disks and other valid target devices. */ if (!(sata_device->satadev_type & SATA_VALID_DEV_TYPE)) { - if (qual == SATA_ADDR_CPORT) { + if (qual == SATA_ADDR_DCPORT) { if (sata_device->satadev_type == SATA_DTYPE_UNKNOWN) { /* * A device was not successfully identified @@ -12411,7 +13669,7 @@ * to clear device reset condition. */ mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - if (qual == SATA_ADDR_PMPORT) + if (qual == SATA_ADDR_DPMPORT) sata_device->satadev_addr.qual = SATA_ADDR_DPMPORT; else sata_device->satadev_addr.qual = SATA_ADDR_DCPORT; @@ -12466,29 +13724,34 @@ "%d:%d failed", cport, pmport)); return (EIO); } - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, - cport)->cport_mutex); - - if (qual == SATA_ADDR_DPMPORT) + + if (qual == SATA_ADDR_DPMPORT) { + mutex_enter(&pmportinfo->pmport_mutex); pmportinfo->pmport_tgtnode_clean = B_TRUE; - else + mutex_exit(&pmportinfo->pmport_mutex); + } else { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + cport)->cport_mutex); cportinfo-> cport_tgtnode_clean = B_TRUE; - - mutex_exit(&SATA_CPORT_INFO( - sata_hba_inst, cport)->cport_mutex); + mutex_exit(&SATA_CPORT_INFO( + sata_hba_inst, cport)->cport_mutex); + } } else { /* * No target node - need to create a new target node. */ - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)-> - cport_mutex); - if (qual == SATA_ADDR_DPMPORT) + if (qual == SATA_ADDR_DPMPORT) { + mutex_enter(&pmportinfo->pmport_mutex); pmportinfo->pmport_tgtnode_clean = B_TRUE; - else + mutex_exit(&pmportinfo->pmport_mutex); + } else { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)-> + cport_mutex); cportinfo-> cport_tgtnode_clean = B_TRUE; - - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)-> - cport_mutex); + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)-> + cport_mutex); + } + tdip = sata_create_target_node(SATA_DIP(sata_hba_inst), sata_hba_inst, &sata_device->satadev_addr); if (tdip == NULL) { @@ -12510,7 +13773,7 @@ * Even if the unconfigure fails, proceed with the * port deactivation. * - * NOTE: Port Multiplier code is not completed and tested. + * NOTE: Port Multiplier is supported now. */ static int @@ -12519,10 +13782,13 @@ { int cport, pmport, qual; int rval, rv = 0; + int npmport; sata_cport_info_t *cportinfo; - sata_pmport_info_t *pmportinfo = NULL; + sata_pmport_info_t *pmportinfo; + sata_pmult_info_t *pmultinfo; dev_info_t *tdip; sata_drive_info_t *sdinfo = NULL; + sata_device_t subsdevice; /* Sanity check */ if (SATA_PORT_DEACTIVATE_FUNC(sata_hba_inst) == NULL) @@ -12532,33 +13798,101 @@ pmport = sata_device->satadev_addr.pmport; qual = sata_device->satadev_addr.qual; + /* SCSI_TO_SATA_ADDR_QUAL() translate ap_id into a device qualifier */ + ASSERT(qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_DPMPORT); + if (qual == SATA_ADDR_DCPORT) + qual = SATA_ADDR_CPORT; + else + qual = SATA_ADDR_PMPORT; + + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + if (qual == SATA_ADDR_PMPORT) + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + + /* + * Processing port multiplier + */ + if (qual == SATA_ADDR_CPORT && + SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) == SATA_DTYPE_PMULT) { + mutex_enter(&cportinfo->cport_mutex); + + /* Deactivate all sub-deices */ + pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo); + if (pmultinfo != NULL) { + for (npmport = 0; npmport < SATA_NUM_PMPORTS( + sata_hba_inst, cport); npmport++) { + + subsdevice.satadev_addr.cport = cport; + subsdevice.satadev_addr.pmport = + (uint8_t)npmport; + subsdevice.satadev_addr.qual = + SATA_ADDR_DPMPORT; + + SATADBG2(SATA_DBG_PMULT, sata_hba_inst, + "sata_hba_ioctl: deactivate: trying to " + "deactivate SATA port %d:%d", + cport, npmport); + + mutex_exit(&cportinfo->cport_mutex); + if (sata_ioctl_deactivate(sata_hba_inst, + &subsdevice) == SATA_SUCCESS) { + SATADBG2(SATA_DBG_PMULT, sata_hba_inst, + "[Deactivate] device at port %d:%d " + "successfully.", cport, npmport); + } + mutex_enter(&cportinfo->cport_mutex); + } + } + + /* Deactivate the port multiplier now. */ + cportinfo->cport_state &= ~SATA_STATE_READY; + mutex_exit(&cportinfo->cport_mutex); + + sata_device->satadev_addr.qual = qual; + rval = (*SATA_PORT_DEACTIVATE_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), sata_device); + + sata_gen_sysevent(sata_hba_inst, &sata_device->satadev_addr, + SE_NO_HINT); + + mutex_enter(&cportinfo->cport_mutex); + sata_update_port_info(sata_hba_inst, sata_device); + if (rval != SATA_SUCCESS) { + if (sata_device->satadev_state & SATA_PSTATE_FAILED) { + cportinfo->cport_state = SATA_PSTATE_FAILED; + } + rv = EIO; + } else { + cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN; + } + mutex_exit(&cportinfo->cport_mutex); + + return (rv); + } + + /* + * Process non-port-multiplier device - it could be a drive connected + * to a port multiplier port or a controller port. + */ mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); if (qual == SATA_ADDR_CPORT) { sata_device->satadev_addr.qual = SATA_ADDR_DCPORT; if (cportinfo->cport_dev_type != SATA_DTYPE_NONE) { - /* - * For now, assume that port multiplier is not - * supported, i.e. deal only with valid devices - */ + /* deal only with valid devices */ if ((cportinfo->cport_dev_type & SATA_VALID_DEV_TYPE) != 0) sdinfo = SATA_CPORTINFO_DRV_INFO(cportinfo); - /* - * If attached device is a port multiplier, we will - * have to unconfigure all devices attached to the - * port multiplier. Add this code here. - */ } cportinfo->cport_state &= ~SATA_STATE_READY; } else { /* Port multiplier device port */ - pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + mutex_enter(&pmportinfo->pmport_mutex); sata_device->satadev_addr.qual = SATA_ADDR_DPMPORT; if (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE && (pmportinfo->pmport_dev_type & SATA_VALID_DEV_TYPE) != 0) sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); pmportinfo->pmport_state &= ~SATA_STATE_READY; + mutex_exit(&pmportinfo->pmport_mutex); } if (sdinfo != NULL) { @@ -12611,17 +13945,22 @@ SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL; cportinfo->cport_dev_type = SATA_DTYPE_NONE; } else { /* port multiplier device port */ + mutex_enter(&pmportinfo->pmport_mutex); SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL; pmportinfo->pmport_dev_type = SATA_DTYPE_NONE; + mutex_exit(&pmportinfo->pmport_mutex); } (void) kmem_free((void *)sdinfo, sizeof (sata_drive_info_t)); } + if (qual == SATA_ADDR_CPORT) { cportinfo->cport_state &= ~(SATA_STATE_PROBED | SATA_STATE_PROBING); - } else { /* port multiplier device port */ + } else if (qual == SATA_ADDR_PMPORT) { + mutex_enter(&pmportinfo->pmport_mutex); pmportinfo->pmport_state &= ~(SATA_STATE_PROBED | SATA_STATE_PROBING); + mutex_exit(&pmportinfo->pmport_mutex); } mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); @@ -12658,6 +13997,7 @@ cportinfo->cport_state |= SATA_PSTATE_SHUTDOWN; } } else { + mutex_enter(&pmportinfo->pmport_mutex); if (rval != SATA_SUCCESS) { if (sata_device->satadev_state & SATA_PSTATE_FAILED) { SATA_PMPORT_STATE(sata_hba_inst, cport, @@ -12671,6 +14011,7 @@ } else { pmportinfo->pmport_state |= SATA_PSTATE_SHUTDOWN; } + mutex_exit(&pmportinfo->pmport_mutex); } mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); @@ -12681,7 +14022,7 @@ /* * Process ioctl port activate request. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier is supported now. */ static int sata_ioctl_activate(sata_hba_inst_t *sata_hba_inst, @@ -12700,8 +14041,19 @@ pmport = sata_device->satadev_addr.pmport; qual = sata_device->satadev_addr.qual; + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + + /* + * The qual translate from ap_id (by SCSI_TO_SATA_ADDR_QUAL()) + * is a device. But what we are dealing with is port/pmport. + */ + ASSERT(qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_DPMPORT); + if (qual == SATA_ADDR_DCPORT) + sata_device->satadev_addr.qual = qual = SATA_ADDR_CPORT; + else + sata_device->satadev_addr.qual = qual = SATA_ADDR_PMPORT; + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); if (qual == SATA_ADDR_PMPORT) { pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); if (pmportinfo->pmport_state & SATA_PSTATE_SHUTDOWN || @@ -12725,9 +14077,11 @@ cport)->cport_mutex); sata_update_port_info(sata_hba_inst, sata_device); if (sata_device->satadev_state & SATA_PSTATE_FAILED) { - if (qual == SATA_ADDR_PMPORT) + if (qual == SATA_ADDR_PMPORT) { + mutex_enter(&pmportinfo->pmport_mutex); pmportinfo->pmport_state = SATA_PSTATE_FAILED; - else + mutex_exit(&pmportinfo->pmport_mutex); + } else cportinfo->cport_state = SATA_PSTATE_FAILED; mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, @@ -12740,9 +14094,11 @@ mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); } mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - if (qual == SATA_ADDR_PMPORT) + if (qual == SATA_ADDR_PMPORT) { + mutex_enter(&pmportinfo->pmport_mutex); pmportinfo->pmport_state &= ~SATA_PSTATE_SHUTDOWN; - else + mutex_exit(&pmportinfo->pmport_mutex); + } else cportinfo->cport_state &= ~SATA_PSTATE_SHUTDOWN; mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); @@ -12788,13 +14144,6 @@ sata_log(sata_hba_inst, CE_WARN, "SATA port multiplier detected at port %d", cport); - /* - * Because the detected device is a port - * multiplier, we need to reprobe every device - * port on the port multiplier and show every - * device found attached. - * Add this code here. - */ } } } @@ -12806,7 +14155,7 @@ /* * Process ioctl reset port request. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port-Multiplier is supported. */ static int sata_ioctl_reset_port(sata_hba_inst_t *sata_hba_inst, @@ -12819,6 +14168,16 @@ pmport = sata_device->satadev_addr.pmport; qual = sata_device->satadev_addr.qual; + /* + * The qual translate from ap_id (by SCSI_TO_SATA_ADDR_QUAL()) + * is a device. But what we are dealing with is port/pmport. + */ + if (qual == SATA_ADDR_DCPORT) + sata_device->satadev_addr.qual = qual = SATA_ADDR_CPORT; + else + sata_device->satadev_addr.qual = qual = SATA_ADDR_PMPORT; + ASSERT(qual == SATA_ADDR_CPORT || qual == SATA_ADDR_PMPORT); + /* Sanity check */ if (SATA_RESET_DPORT_FUNC(sata_hba_inst) == NULL) { SATA_LOG_D((sata_hba_inst, CE_WARN, @@ -12839,9 +14198,14 @@ if (qual == SATA_ADDR_CPORT) SATA_CPORT_STATE(sata_hba_inst, cport) = SATA_PSTATE_FAILED; - else + else { + mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, + pmport)); SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) = SATA_PSTATE_FAILED; + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, + pmport)); + } mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)-> cport_mutex); rv = EIO; @@ -12862,13 +14226,14 @@ /* * Process ioctl reset device request. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier is supported. */ static int sata_ioctl_reset_device(sata_hba_inst_t *sata_hba_inst, sata_device_t *sata_device) { - sata_drive_info_t *sdinfo; + sata_drive_info_t *sdinfo = NULL; + sata_pmult_info_t *pmultinfo = NULL; int cport, pmport; int rv = 0; @@ -12884,17 +14249,21 @@ pmport = sata_device->satadev_addr.pmport; mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); - if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT) { - sata_device->satadev_addr.qual = SATA_ADDR_DCPORT; - sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst, - sata_device->satadev_addr.cport); + if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT) { + if (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) == + SATA_DTYPE_PMULT) + pmultinfo = SATA_CPORT_INFO(sata_hba_inst, cport)-> + cport_devp.cport_sata_pmult; + else + sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst, + sata_device->satadev_addr.cport); } else { /* port multiplier */ sata_device->satadev_addr.qual = SATA_ADDR_DPMPORT; sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, sata_device->satadev_addr.cport, sata_device->satadev_addr.pmport); } - if (sdinfo == NULL) { + if (sdinfo == NULL && pmultinfo == NULL) { mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); return (EINVAL); } @@ -12914,15 +14283,23 @@ * or port disconnect/connect and re-probing is * needed to change it's state */ - sdinfo->satadrv_state &= ~SATA_STATE_READY; - sdinfo->satadrv_state |= SATA_DSTATE_FAILED; + if (sdinfo != NULL) { + sdinfo->satadrv_state &= ~SATA_STATE_READY; + sdinfo->satadrv_state |= SATA_DSTATE_FAILED; + } else if (pmultinfo != NULL) { + pmultinfo->pmult_state &= ~SATA_STATE_READY; + pmultinfo->pmult_state |= SATA_DSTATE_FAILED; + } + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)->cport_mutex); rv = EIO; } /* * If attached device was a port multiplier, some extra processing - * may be needed, to bring it back (if port re-probing did not handle - * it). Add such code here. + * may be needed to bring it back. SATA specification requies a + * mandatory software reset on host port to reliably enumerate a port + * multiplier, the HBA driver should handle that after reset + * operation. */ return (rv); } @@ -12930,8 +14307,6 @@ /* * Process ioctl reset all request. - * - * NOTE: Port multiplier code is not completed nor tested. */ static int sata_ioctl_reset_all(sata_hba_inst_t *sata_hba_inst) @@ -12969,12 +14344,12 @@ rv = EBUSY; break; } else { + /* + * It is enough to lock cport in command-based + * switching mode. + */ SATA_CPORT_INFO(sata_hba_inst, tcport)-> cport_event_flags |= SATA_APCTL_LOCK_PORT_BUSY; - /* - * If there is a port multiplier attached, we may need - * to lock its port as well. If so, add such code here. - */ } mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, tcport)-> cport_mutex); @@ -12983,7 +14358,7 @@ if (rv == 0) { /* * All cports were successfully locked. - * Reset main SATA controller only for now - no PMult. + * Reset main SATA controller. * Set the device address to port 0, to have a valid device * address. */ @@ -13001,7 +14376,6 @@ * Because ports were reset, port states are unknown. * They should be re-probed to get their state and * attached devices should be reinitialized. - * Add code here to re-probe port multiplier device ports. */ for (tcport = 0; tcport < SATA_NUM_CPORTS(sata_hba_inst); tcport++) { @@ -13009,6 +14383,12 @@ sata_device.satadev_addr.pmport = tpmport; sata_device.satadev_addr.qual = SATA_ADDR_CPORT; + /* + * The sata_reprobe_port() will mark a + * SATA_EVNT_DEVICE_RESET event on the port + * multiplier, all its sub-ports will be probed by + * sata daemon afterwards. + */ if (sata_reprobe_port(sata_hba_inst, &sata_device, SATA_DEV_IDENTIFY_RETRY) != SATA_SUCCESS) rv = EIO; @@ -13070,9 +14450,14 @@ if (qual == SATA_ADDR_CPORT) SATA_CPORT_STATE(sata_hba_inst, cport) = SATA_PSTATE_FAILED; - else /* port ultiplier device port */ + else { /* port multiplier device port */ + mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, + cport, pmport)); SATA_PMPORT_STATE(sata_hba_inst, cport, pmport) = SATA_PSTATE_FAILED; + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, + cport, pmport)); + } mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, cport)-> cport_mutex); @@ -13102,20 +14487,23 @@ * SCSI_TO_SATA_PMPORT extracts pmport number and * SCSI_TO_SATA_ADDR_QUAL extracts port mulitplier qualifier flag. * - * For now, support is for cports only - no port multiplier device ports. + * Port multiplier is supported. */ static void sata_cfgadm_state(sata_hba_inst_t *sata_hba_inst, int32_t port, devctl_ap_state_t *ap_state) { - uint16_t cport; - int port_state; + uint8_t cport, pmport, qual; + uint32_t port_state, pmult_state; + uint32_t dev_type; sata_drive_info_t *sdinfo; - /* Cport only */ cport = SCSI_TO_SATA_CPORT(port); - + pmport = SCSI_TO_SATA_PMPORT(port); + qual = SCSI_TO_SATA_ADDR_QUAL(port); + + /* Check cport state */ port_state = SATA_CPORT_STATE(sata_hba_inst, cport); if (port_state & SATA_PSTATE_SHUTDOWN || port_state & SATA_PSTATE_FAILED) { @@ -13129,11 +14517,34 @@ return; } - /* Need to check pmult device port here as well, when supported */ + /* cport state is okay. Now check pmport state */ + if (qual == SATA_ADDR_DPMPORT || qual == SATA_ADDR_PMPORT) { + /* Sanity check */ + if (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport) != + SATA_DTYPE_PMULT || SATA_PMPORT_INFO(sata_hba_inst, + cport, pmport) == NULL) + return; + port_state = SATA_PMPORT_STATE(sata_hba_inst, cport, pmport); + if (port_state & SATA_PSTATE_SHUTDOWN || + port_state & SATA_PSTATE_FAILED) { + ap_state->ap_rstate = AP_RSTATE_DISCONNECTED; + ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED; + if (port_state & SATA_PSTATE_FAILED) + ap_state->ap_condition = AP_COND_FAILED; + else + ap_state->ap_condition = AP_COND_UNKNOWN; + + return; + } + } /* Port is enabled and ready */ - - switch (SATA_CPORT_DEV_TYPE(sata_hba_inst, cport)) { + if (qual == SATA_ADDR_DCPORT || qual == SATA_ADDR_CPORT) + dev_type = SATA_CPORT_DEV_TYPE(sata_hba_inst, cport); + else + dev_type = SATA_PMPORT_DEV_TYPE(sata_hba_inst, cport, pmport); + + switch (dev_type) { case SATA_DTYPE_NONE: { ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED; @@ -13142,8 +14553,30 @@ ap_state->ap_rstate = AP_RSTATE_EMPTY; break; } - case SATA_DTYPE_UNKNOWN: - case SATA_DTYPE_PMULT: /* Until PMult is supported */ + case SATA_DTYPE_PMULT: + { + /* Need to check port multiplier state */ + ASSERT(qual == SATA_ADDR_DCPORT); + pmult_state = SATA_PMULT_INFO(sata_hba_inst, cport)-> + pmult_state; + if (pmult_state & (SATA_PSTATE_SHUTDOWN|SATA_PSTATE_FAILED)) { + ap_state->ap_rstate = AP_RSTATE_DISCONNECTED; + ap_state->ap_ostate = AP_OSTATE_UNCONFIGURED; + if (pmult_state & SATA_PSTATE_FAILED) + ap_state->ap_condition = AP_COND_FAILED; + else + ap_state->ap_condition = AP_COND_UNKNOWN; + + return; + } + + /* Port multiplier is not configurable */ + ap_state->ap_ostate = AP_OSTATE_CONFIGURED; + ap_state->ap_rstate = AP_RSTATE_CONNECTED; + ap_state->ap_condition = AP_COND_OK; + break; + } + case SATA_DTYPE_ATADISK: case SATA_DTYPE_ATAPICD: case SATA_DTYPE_ATAPITAPE: @@ -13154,7 +14587,7 @@ int circ; dip = SATA_DIP(sata_hba_inst); - tdip = sata_get_target_dip(dip, port); + tdip = sata_get_target_dip(dip, cport, pmport); ap_state->ap_rstate = AP_RSTATE_CONNECTED; if (tdip != NULL) { ndi_devi_enter(dip, &circ); @@ -13225,7 +14658,9 @@ /* * Process ioctl get device path request. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier has no target dip. Devices connected to port + * multiplier have target node attached to the HBA node. The only difference + * between them and the directly-attached device node is a target address. */ static int sata_ioctl_get_device_path(sata_hba_inst_t *sata_hba_inst, @@ -13270,7 +14705,7 @@ /* * Process ioctl get attachment point type request. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier is supported. */ static int sata_ioctl_get_ap_type(sata_hba_inst_t *sata_hba_inst, @@ -13280,7 +14715,7 @@ const char *ap_type; int dev_type; - if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT) + if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT) dev_type = SATA_CPORT_DEV_TYPE(sata_hba_inst, sata_device->satadev_addr.cport); else /* pmport */ @@ -13307,7 +14742,7 @@ break; case SATA_DTYPE_PMULT: - ap_type = "pmult"; + ap_type = "sata-pmult"; break; case SATA_DTYPE_UNKNOWN: @@ -13343,7 +14778,7 @@ * This operation should return to cfgadm the device model * information string * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier is supported. */ static int sata_ioctl_get_model_info(sata_hba_inst_t *sata_hba_inst, @@ -13355,7 +14790,7 @@ mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, sata_device->satadev_addr.cport)->cport_mutex); - if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT) + if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT) sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst, sata_device->satadev_addr.cport); else /* port multiplier */ @@ -13401,7 +14836,7 @@ * This operation should return to cfgadm the device firmware revision * information string * - * NOTE: Port multiplier code is not completed nor tested. + * Port multiplier is supported. */ static int sata_ioctl_get_revfirmware_info(sata_hba_inst_t *sata_hba_inst, @@ -13413,7 +14848,7 @@ mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, sata_device->satadev_addr.cport)->cport_mutex); - if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT) + if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT) sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst, sata_device->satadev_addr.cport); else /* port multiplier */ @@ -13458,7 +14893,7 @@ * Process ioctl get device serial number info request. * This operation should return to cfgadm the device serial number string. * - * NOTE: Port multiplier code is not completed nor tested. + * NOTE: Port multiplier is supported. */ static int sata_ioctl_get_serialnumber_info(sata_hba_inst_t *sata_hba_inst, @@ -13470,7 +14905,7 @@ mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, sata_device->satadev_addr.cport)->cport_mutex); - if (sata_device->satadev_addr.qual == SATA_ADDR_CPORT) + if (sata_device->satadev_addr.qual == SATA_ADDR_DCPORT) sdinfo = SATA_CPORT_DRV_INFO(sata_hba_inst, sata_device->satadev_addr.cport); else /* port multiplier */ @@ -14719,6 +16154,7 @@ { sata_hba_inst_t *sata_hba_inst = NULL; sata_address_t *saddr; + sata_pmult_info_t *pmultinfo; sata_drive_info_t *sdinfo; sata_port_stats_t *pstats; sata_cport_info_t *cportinfo; @@ -14758,10 +16194,6 @@ saddr = &sata_device->satadev_addr; if (saddr->cport >= SATA_NUM_CPORTS(sata_hba_inst)) return; - if (saddr->qual == SATA_ADDR_PMPORT || - saddr->qual == SATA_ADDR_DPMPORT) - /* Port Multiplier not supported yet */ - return; cport = saddr->cport; pmport = saddr->pmport; @@ -14773,20 +16205,74 @@ * Port has to be initialized, or we cannot accept an event. */ if ((saddr->qual & (SATA_ADDR_CPORT | SATA_ADDR_PMPORT | - SATA_ADDR_DCPORT | SATA_ADDR_DPMPORT)) != 0) { - if ((saddr->qual & (SATA_ADDR_CPORT | SATA_ADDR_DCPORT)) != 0) { - mutex_enter(&sata_hba_inst->satahba_mutex); - cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); - mutex_exit(&sata_hba_inst->satahba_mutex); - if (cportinfo == NULL || cportinfo->cport_state == 0) - return; - } else { - mutex_enter(&sata_hba_inst->satahba_mutex); - pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, - cport, pmport); - mutex_exit(&sata_hba_inst->satahba_mutex); - if (pmportinfo == NULL || pmportinfo->pmport_state == 0) - return; + SATA_ADDR_DCPORT | SATA_ADDR_DPMPORT | SATA_ADDR_PMULT)) != 0) { + mutex_enter(&sata_hba_inst->satahba_mutex); + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + mutex_exit(&sata_hba_inst->satahba_mutex); + if (cportinfo == NULL || cportinfo->cport_state == 0) + return; + } + + if ((saddr->qual & (SATA_ADDR_PMULT | SATA_ADDR_PMPORT | + SATA_ADDR_DPMPORT)) != 0) { + if (cportinfo->cport_dev_type != SATA_DTYPE_PMULT) { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_event_notify: Non-pmult device (0x%x)" + "is attached to port %d, ignore pmult/pmport " + "event 0x%x", cportinfo->cport_dev_type, + cport, event)); + return; + } + + mutex_enter(&cportinfo->cport_mutex); + pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport); + mutex_exit(&cportinfo->cport_mutex); + + /* + * The daemon might be processing attachment of port + * multiplier, in that case we should ignore events on its + * sub-devices. + * + * NOTE: Only pmult_state is checked in sata_hba_event_notify. + * The pmport_state is checked by sata daemon. + */ + if (pmultinfo == NULL || + pmultinfo->pmult_state == SATA_STATE_UNKNOWN) { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_event_notify: pmult is not" + "available at port %d:%d, ignore event 0x%x", + cport, pmport, event)); + return; + } + } + + if ((saddr->qual & + (SATA_ADDR_PMPORT | SATA_ADDR_DPMPORT)) != 0) { + + mutex_enter(&cportinfo->cport_mutex); + if (pmport > SATA_NUM_PMPORTS(sata_hba_inst, cport)) { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_event_notify: invalid/" + "un-implemented port %d:%d (%d ports), " + "ignore event 0x%x", cport, pmport, + SATA_NUM_PMPORTS(sata_hba_inst, cport), event)); + mutex_exit(&cportinfo->cport_mutex); + return; + } + mutex_exit(&cportinfo->cport_mutex); + + mutex_enter(&sata_hba_inst->satahba_mutex); + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, + cport, pmport); + mutex_exit(&sata_hba_inst->satahba_mutex); + + /* pmport is implemented/valid? */ + if (pmportinfo == NULL) { + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_event_notify: invalid/" + "un-implemented port %d:%d, ignore " + "event 0x%x", cport, pmport, event)); + return; } } @@ -14816,6 +16302,8 @@ &(SATA_CPORT_INFO(sata_hba_inst, cport))-> cport_stats; } else { + mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); + mutex_enter(&pmportinfo->pmport_mutex); /* Port multiplier's device port event */ (SATA_PMPORT_INFO(sata_hba_inst, cport, pmport))-> pmport_event_flags |= @@ -14823,6 +16311,8 @@ pstats = &(SATA_PMPORT_INFO(sata_hba_inst, cport, pmport))-> pmport_stats; + mutex_exit(&pmportinfo->pmport_mutex); + mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); } /* @@ -14915,6 +16405,42 @@ event & ~SATA_EVNT_DRIVE_EVENTS); } mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); + } else if (saddr->qual == SATA_ADDR_PMULT) { + mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); + + /* qualify this event */ + if ((event & (SATA_EVNT_DEVICE_RESET | + SATA_EVNT_PMULT_LINK_CHANGED)) == 0) { + /* Invalid event for a port multiplier */ + (void) sprintf(buf2, err_msg_evnt_2, + event & SATA_EVNT_DEVICE_RESET); + mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); + goto event_info; + } + + pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport); + + if (event & SATA_EVNT_DEVICE_RESET) { + + SATADBG1(SATA_DBG_PMULT, sata_hba_inst, + "[Reset] port-mult on cport %d", cport); + pmultinfo->pmult_event_flags |= + SATA_EVNT_DEVICE_RESET; + (void) strlcat(buf1, "pmult reset, ", + SATA_EVENT_MAX_MSG_LENGTH); + } + + if (event & SATA_EVNT_PMULT_LINK_CHANGED) { + + SATADBG1(SATA_DBG_PMULT, sata_hba_inst, + "pmult link changed on cport %d", cport); + pmultinfo->pmult_event_flags |= + SATA_EVNT_PMULT_LINK_CHANGED; + (void) strlcat(buf1, "pmult link changed, ", + SATA_EVENT_MAX_MSG_LENGTH); + } + mutex_exit(&(SATA_CPORT_MUTEX(sata_hba_inst, cport))); + } else { if (saddr->qual != SATA_ADDR_NULL) { /* Wrong address qualifier */ @@ -15113,7 +16639,7 @@ * * NOTE: At the moment, device event processing is limited to hard disks * only. - * cports only are supported - no pmports. + * Port multiplier is supported now. */ static void sata_process_controller_events(sata_hba_inst_t *sata_hba_inst) @@ -15122,6 +16648,7 @@ uint32_t event_flags; sata_address_t *saddr; sata_cport_info_t *cportinfo; + sata_pmult_info_t *pmultinfo; SATADBG1(SATA_DBG_EVENTS_CNTRL, sata_hba_inst, "Processing controller %d event(s)", @@ -15236,7 +16763,38 @@ sata_hba_inst, saddr); } } + + + /* + * Scan port multiplier and all its sub-ports event flags. + * The events are marked by + * (1) sata_pmult_info.pmult_event_flags + * (2) sata_pmport_info.pmport_event_flags + */ mutex_enter(&(SATA_CPORT_MUTEX(sata_hba_inst, ncport))); + if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) { + /* + * There should be another extra check: this + * port multiplier still exists? + */ + pmultinfo = SATA_PMULT_INFO(sata_hba_inst, + ncport); + + if (pmultinfo != NULL) { + mutex_exit(&(SATA_CPORT_MUTEX( + sata_hba_inst, ncport))); + sata_process_pmult_events( + sata_hba_inst, ncport); + mutex_enter(&(SATA_CPORT_MUTEX( + sata_hba_inst, ncport))); + } else { + SATADBG1(SATA_DBG_PMULT, sata_hba_inst, + "Port-multiplier is gone. " + "Ignore all sub-device events " + "at port %d.", ncport); + } + } + if ((SATA_CPORT_DEV_TYPE(sata_hba_inst, ncport) != SATA_DTYPE_NONE) && (SATA_CPORT_DRV_INFO(sata_hba_inst, ncport) != NULL)) { @@ -15258,6 +16816,190 @@ } /* + * Specific port multiplier instance event processing. At the moment, device + * event processing is limited to link/attach event only. + * + * NOTE: power management event is not supported yet. + */ +static void +sata_process_pmult_events(sata_hba_inst_t *sata_hba_inst, uint8_t cport) +{ + sata_cport_info_t *cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + sata_pmult_info_t *pmultinfo; + sata_pmport_info_t *pmportinfo; + sata_address_t *saddr; + sata_device_t sata_device; + uint32_t event_flags; + int npmport; + int rval; + + SATADBG2(SATA_DBG_EVENTS_CNTRL|SATA_DBG_PMULT, sata_hba_inst, + "Processing pmult event(s) on cport %d of controller %d", + cport, ddi_get_instance(SATA_DIP(sata_hba_inst))); + + /* First process events on port multiplier */ + mutex_enter(&cportinfo->cport_mutex); + pmultinfo = SATA_PMULT_INFO(sata_hba_inst, cport); + event_flags = pmultinfo->pmult_event_flags; + + /* + * Reset event (of port multiplier) has higher priority because the + * port multiplier itself might be failed or removed after reset. + */ + if (event_flags & SATA_EVNT_DEVICE_RESET) { + /* + * The status of the sub-links are uncertain, + * so mark all sub-ports as RESET + */ + for (npmport = 0; npmport < SATA_NUM_PMPORTS( + sata_hba_inst, cport); npmport ++) { + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, + cport, npmport); + if (pmportinfo == NULL) { + /* That's weird. */ + SATA_LOG_D((sata_hba_inst, CE_WARN, + "sata_hba_event_notify: " + "invalid/un-implemented " + "port %d:%d (%d ports), ", + cport, npmport, SATA_NUM_PMPORTS( + sata_hba_inst, cport))); + continue; + } + + mutex_enter(&pmportinfo->pmport_mutex); + + /* Mark all pmport to unknow state. */ + pmportinfo->pmport_state = SATA_STATE_UNKNOWN; + /* Mark all pmports with link events. */ + pmportinfo->pmport_event_flags = + (SATA_EVNT_LINK_ESTABLISHED|SATA_EVNT_LINK_LOST); + mutex_exit(&pmportinfo->pmport_mutex); + } + + } else if (event_flags & SATA_EVNT_PMULT_LINK_CHANGED) { + /* + * We need probe the port multiplier to know what has + * happened. + */ + bzero(&sata_device, sizeof (sata_device_t)); + sata_device.satadev_rev = SATA_DEVICE_REV; + sata_device.satadev_addr.cport = cport; + sata_device.satadev_addr.pmport = SATA_PMULT_HOSTPORT; + sata_device.satadev_addr.qual = SATA_ADDR_PMULT; + + mutex_exit(&cportinfo->cport_mutex); + rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), &sata_device); + mutex_enter(&cportinfo->cport_mutex); + if (rval != SATA_SUCCESS) { + /* Something went wrong? Fail the port */ + cportinfo->cport_state = SATA_PSTATE_FAILED; + mutex_exit(&cportinfo->cport_mutex); + SATA_LOG_D((sata_hba_inst, CE_WARN, + "SATA port %d probing failed", cport)); + + /* PMult structure must be released. */ + sata_free_pmult(sata_hba_inst, &sata_device); + return; + } + + sata_update_port_info(sata_hba_inst, &sata_device); + + /* + * Sanity check - Port is active? Is the link active? + * The device is still a port multiplier? + */ + if ((cportinfo->cport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) || + ((cportinfo->cport_scr.sstatus & + SATA_PORT_DEVLINK_UP_MASK) != SATA_PORT_DEVLINK_UP) || + (cportinfo->cport_dev_type != SATA_DTYPE_PMULT)) { + mutex_exit(&cportinfo->cport_mutex); + + /* PMult structure must be released. */ + sata_free_pmult(sata_hba_inst, &sata_device); + return; + } + + /* Probed succeed, set port ready. */ + cportinfo->cport_state |= + SATA_STATE_PROBED | SATA_STATE_READY; + } + + /* Release port multiplier event flags. */ + pmultinfo->pmult_event_flags &= + ~(SATA_EVNT_DEVICE_RESET|SATA_EVNT_PMULT_LINK_CHANGED); + mutex_exit(&cportinfo->cport_mutex); + + /* + * Check all sub-links. + */ + for (npmport = 0; npmport < SATA_NUM_PMPORTS(sata_hba_inst, cport); + npmport ++) { + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, npmport); + mutex_enter(&pmportinfo->pmport_mutex); + event_flags = pmportinfo->pmport_event_flags; + mutex_exit(&pmportinfo->pmport_mutex); + saddr = &pmportinfo->pmport_addr; + + if ((event_flags & + (SATA_EVNT_PORT_EVENTS | SATA_EVNT_DRIVE_EVENTS)) != 0) { + /* + * Got port multiplier port event. + * We need some hierarchy of event processing as they + * are affecting each other: + * 1. device detached/attached + * 2. link events - link events may trigger device + * detached or device attached events in some + * circumstances. + */ + if (event_flags & SATA_EVNT_DEVICE_DETACHED) { + sata_process_pmdevice_detached(sata_hba_inst, + saddr); + } + if (event_flags & SATA_EVNT_DEVICE_ATTACHED) { + sata_process_pmdevice_attached(sata_hba_inst, + saddr); + } + if (event_flags & SATA_EVNT_LINK_ESTABLISHED || + event_flags & SATA_EVNT_LINK_LOST) { + sata_process_pmport_link_events(sata_hba_inst, + saddr); + } + if (event_flags & SATA_EVNT_TARGET_NODE_CLEANUP) { + sata_process_target_node_cleanup( + sata_hba_inst, saddr); + } + } + + /* Checking drive event(s). */ + mutex_enter(&pmportinfo->pmport_mutex); + if (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE && + pmportinfo->pmport_sata_drive != NULL) { + event_flags = pmportinfo->pmport_sata_drive-> + satadrv_event_flags; + if (event_flags & (SATA_EVNT_DEVICE_RESET | + SATA_EVNT_INPROC_DEVICE_RESET)) { + + /* Have device event */ + sata_process_pmdevice_reset(sata_hba_inst, + saddr); + } + } + mutex_exit(&pmportinfo->pmport_mutex); + + /* Release PORT_BUSY flag */ + mutex_enter(&cportinfo->cport_mutex); + cportinfo->cport_event_flags &= ~SATA_EVNT_LOCK_PORT_BUSY; + mutex_exit(&cportinfo->cport_mutex); + } + + SATADBG2(SATA_DBG_EVENTS_CNTRL|SATA_DBG_PMULT, sata_hba_inst, + "[DONE] pmult event(s) on cport %d of controller %d", + cport, ddi_get_instance(SATA_DIP(sata_hba_inst))); +} + +/* * Process HBA power level change reported by HBA driver. * Not implemented at this time - event is ignored. */ @@ -15355,6 +17097,16 @@ return; } + if ((SATA_CPORT_DEV_TYPE(sata_hba_inst, saddr->cport) == + SATA_DTYPE_PMULT)) { + /* + * Should not happened: this is already handled in + * sata_hba_event_notify() + */ + mutex_exit(&cportinfo->cport_mutex); + goto done; + } + if ((SATA_CPORT_DEV_TYPE(sata_hba_inst, saddr->cport) & SATA_VALID_DEV_TYPE) == 0) { /* @@ -15570,6 +17322,243 @@ /* + * Port Multiplier Port Device Reset Event processing. + * + * NOTE: This function has to be entered with pmport mutex held. It exits with + * mutex held as well, but can release mutex during the processing. + */ +static void +sata_process_pmdevice_reset(sata_hba_inst_t *sata_hba_inst, + sata_address_t *saddr) +{ + sata_drive_info_t old_sdinfo; /* local copy of the drive info */ + sata_drive_info_t *sdinfo = NULL; + sata_cport_info_t *cportinfo = NULL; + sata_pmport_info_t *pmportinfo = NULL; + sata_pmult_info_t *pminfo = NULL; + sata_device_t sata_device; + uint8_t cport = saddr->cport; + uint8_t pmport = saddr->pmport; + int rval; + + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Processing drive reset at port %d:%d", cport, pmport); + + cportinfo = SATA_CPORT_INFO(sata_hba_inst, cport); + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, cport, pmport); + + /* + * If the port is in SHUTDOWN or FAILED state, or device is in FAILED + * state, ignore reset event. + */ + if (((cportinfo->cport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) != 0) || + (sdinfo->satadrv_state & SATA_DSTATE_FAILED) != 0) { + sdinfo->satadrv_event_flags &= + ~(SATA_EVNT_DEVICE_RESET | SATA_EVNT_INPROC_DEVICE_RESET); + return; + } + + if ((pmportinfo->pmport_dev_type & SATA_VALID_DEV_TYPE) == 0) { + /* + * This should not happen - coding error. + * But we can recover, so do not panic, just clean up + * and if in debug mode, log the message. + */ +#ifdef SATA_DEBUG + sata_log(sata_hba_inst, CE_WARN, + "sata_process_pmdevice_reset: " + "Invalid device type with sdinfo!", NULL); +#endif + sdinfo->satadrv_event_flags = 0; + return; + } + +#ifdef SATA_DEBUG + if ((sdinfo->satadrv_event_flags & + (SATA_EVNT_DEVICE_RESET | SATA_EVNT_INPROC_DEVICE_RESET)) == 0) { + /* Nothing to do */ + /* Something is weird - why we are processing dev reset? */ + SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "No device reset event!!!!", NULL); + + return; + } + if ((sdinfo->satadrv_event_flags & + (SATA_EVNT_DEVICE_RESET | SATA_EVNT_INPROC_DEVICE_RESET)) == + (SATA_EVNT_DEVICE_RESET | SATA_EVNT_INPROC_DEVICE_RESET)) { + /* Something is weird - new device reset event */ + SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Overlapping device reset events!", NULL); + } +#endif + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Processing port %d:%d device reset", cport, pmport); + + /* Clear event flag */ + sdinfo->satadrv_event_flags &= ~SATA_EVNT_DEVICE_RESET; + + /* It seems that we always need to check the port state first */ + sata_device.satadev_rev = SATA_DEVICE_REV; + sata_device.satadev_addr = *saddr; + /* + * We have to exit mutex, because the HBA probe port function may + * block on its own mutex. + */ + mutex_exit(&pmportinfo->pmport_mutex); + rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), &sata_device); + mutex_enter(&pmportinfo->pmport_mutex); + + sata_update_pmport_info(sata_hba_inst, &sata_device); + if (rval != SATA_SUCCESS) { + /* Something went wrong? Fail the port */ + pmportinfo->pmport_state = SATA_PSTATE_FAILED; + sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, saddr->cport, + saddr->pmport); + if (sdinfo != NULL) + sdinfo->satadrv_event_flags = 0; + mutex_exit(&pmportinfo->pmport_mutex); + SATA_LOG_D((sata_hba_inst, CE_WARN, + "SATA port %d:%d probing failed", + saddr->cport, saddr->pmport)); + mutex_enter(&pmportinfo->pmport_mutex); + return; + } + if ((sata_device.satadev_scr.sstatus & + SATA_PORT_DEVLINK_UP_MASK) != + SATA_PORT_DEVLINK_UP || + sata_device.satadev_type == SATA_DTYPE_NONE) { + /* + * No device to process, anymore. Some other event processing + * would or have already performed port info cleanup. + * To be safe (HBA may need it), request clearing device + * reset condition. + */ + sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, saddr->cport, + saddr->pmport); + if (sdinfo != NULL) { + sdinfo->satadrv_event_flags &= + ~SATA_EVNT_INPROC_DEVICE_RESET; + /* must clear flags on cport */ + pminfo = SATA_PMULT_INFO(sata_hba_inst, + saddr->cport); + pminfo->pmult_event_flags |= + SATA_EVNT_CLEAR_DEVICE_RESET; + } + return; + } + + sdinfo = SATA_PMPORT_DRV_INFO(sata_hba_inst, saddr->cport, + saddr->pmport); + if (sdinfo == NULL) { + return; + } + if ((sdinfo->satadrv_event_flags & + SATA_EVNT_INPROC_DEVICE_RESET) == 0) { + /* + * Start tracking time for device feature restoration and + * identification. Save current time (lbolt value). + */ + sdinfo->satadrv_reset_time = ddi_get_lbolt(); + } + /* Mark device reset processing as active */ + sdinfo->satadrv_event_flags |= SATA_EVNT_INPROC_DEVICE_RESET; + + old_sdinfo = *sdinfo; /* local copy of the drive info */ + mutex_exit(&pmportinfo->pmport_mutex); + + if (sata_set_drive_features(sata_hba_inst, &old_sdinfo, 1) == + SATA_FAILURE) { + /* + * Restoring drive setting failed. + * Probe the port first, to check if the port state has changed + */ + sata_device.satadev_rev = SATA_DEVICE_REV; + sata_device.satadev_addr = *saddr; + sata_device.satadev_addr.qual = SATA_ADDR_PMPORT; + + /* probe port */ + rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), &sata_device); + mutex_enter(&pmportinfo->pmport_mutex); + if (rval == SATA_SUCCESS && + (sata_device.satadev_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) == 0 && + (sata_device.satadev_scr.sstatus & + SATA_PORT_DEVLINK_UP_MASK) == SATA_PORT_DEVLINK_UP && + sata_device.satadev_type != SATA_DTYPE_NONE) { + /* + * We may retry this a bit later - in-process reset + * condition should be already set. + * Track retry time for device identification. + */ + if ((pmportinfo->pmport_dev_type & + SATA_VALID_DEV_TYPE) != 0 && + SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL && + sdinfo->satadrv_reset_time != 0) { + clock_t cur_time = ddi_get_lbolt(); + /* + * If the retry time limit was not + * exceeded, retry. + */ + if ((cur_time - sdinfo->satadrv_reset_time) < + drv_usectohz(SATA_DEV_REPROBE_TIMEOUT)) { + mutex_enter( + &sata_hba_inst->satahba_mutex); + sata_hba_inst->satahba_event_flags |= + SATA_EVNT_MAIN; + mutex_exit( + &sata_hba_inst->satahba_mutex); + mutex_enter(&sata_mutex); + sata_event_pending |= SATA_EVNT_MAIN; + mutex_exit(&sata_mutex); + return; + } + } + /* Fail the drive */ + sdinfo->satadrv_state = SATA_DSTATE_FAILED; + + sata_log(sata_hba_inst, CE_WARN, + "SATA device at port %d:%d - device failed", + saddr->cport, saddr->pmport); + } else { + /* + * No point of retrying - some other event processing + * would or already did port info cleanup. + * To be safe (HBA may need it), + * request clearing device reset condition. + */ + sdinfo->satadrv_event_flags |= + SATA_EVNT_CLEAR_DEVICE_RESET; + } + sdinfo->satadrv_event_flags &= ~SATA_EVNT_INPROC_DEVICE_RESET; + sdinfo->satadrv_reset_time = 0; + return; + } + /* + * Raise the flag indicating that the next sata command could + * be sent with SATA_CLEAR_DEV_RESET_STATE flag, if no new device + * reset is reported. + */ + mutex_enter(&pmportinfo->pmport_mutex); + if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) { + sdinfo->satadrv_reset_time = 0; + if (pmportinfo->pmport_dev_type & SATA_VALID_DEV_TYPE) { + sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); + sdinfo->satadrv_event_flags &= + ~SATA_EVNT_INPROC_DEVICE_RESET; + /* must clear flags on cport */ + pminfo = SATA_PMULT_INFO(sata_hba_inst, + saddr->cport); + pminfo->pmult_event_flags |= + SATA_EVNT_CLEAR_DEVICE_RESET; + } + } +} + +/* * Port Link Events processing. * Every link established event may involve device reset (due to * COMRESET signal, equivalent of the hard reset) so arbitrarily @@ -15590,7 +17579,8 @@ * device detached event processing after link lost timeout. * Else, the event is ignored. * - * NOTE: Only cports are processed for now, i.e. no port multiplier ports + * NOTE: Port multiplier ports events are handled by + * sata_process_pmport_link_events(); */ static void sata_process_port_link_events(sata_hba_inst_t *sata_hba_inst, @@ -15674,8 +17664,7 @@ * For the sanity sake check if a device is attached - check * return state of a port probing. */ - if (sata_device.satadev_type != SATA_DTYPE_NONE && - sata_device.satadev_type != SATA_DTYPE_PMULT) { + if (sata_device.satadev_type != SATA_DTYPE_NONE) { /* * HBA port probe indicated that there is a device * attached. Check if the framework had device info @@ -15795,22 +17784,242 @@ } /* + * Port Multiplier Port Link Events processing. + */ +static void +sata_process_pmport_link_events(sata_hba_inst_t *sata_hba_inst, + sata_address_t *saddr) +{ + sata_device_t sata_device; + sata_pmport_info_t *pmportinfo = NULL; + sata_drive_info_t *sdinfo = NULL; + uint32_t event_flags; + uint8_t cport = saddr->cport; + uint8_t pmport = saddr->pmport; + int rval; + + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Processing port %d:%d link event(s)", + cport, pmport); + + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + mutex_enter(&pmportinfo->pmport_mutex); + event_flags = pmportinfo->pmport_event_flags; + + /* Reset event flags first */ + pmportinfo->pmport_event_flags &= + ~(SATA_EVNT_LINK_ESTABLISHED | SATA_EVNT_LINK_LOST); + + /* If the port is in SHUTDOWN or FAILED state, ignore link events. */ + if ((pmportinfo->pmport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) != 0) { + mutex_exit(&pmportinfo->pmport_mutex); + return; + } + + /* + * For the sanity sake get current port state. + * Set device address only. Other sata_device fields should be + * set by HBA driver. + */ + sata_device.satadev_rev = SATA_DEVICE_REV; + sata_device.satadev_addr = *saddr; + /* + * We have to exit mutex, because the HBA probe port function may + * block on its own mutex. + */ + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, saddr->cport, + saddr->pmport)); + rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), &sata_device); + mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, saddr->cport, + saddr->pmport)); + sata_update_pmport_info(sata_hba_inst, &sata_device); + if (rval != SATA_SUCCESS) { + /* Something went wrong? Fail the port */ + pmportinfo->pmport_state = SATA_PSTATE_FAILED; + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, saddr->cport, + saddr->pmport)); + SATA_LOG_D((sata_hba_inst, CE_WARN, + "SATA port %d:%d probing failed", + saddr->cport, saddr->pmport)); + /* + * We may want to release device info structure, but + * it is not necessary. + */ + return; + } else { + /* port probed successfully */ + pmportinfo->pmport_state |= + SATA_STATE_PROBED | SATA_STATE_READY; + } + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, + saddr->cport, saddr->pmport)); + mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, + saddr->cport, saddr->pmport)); + if (event_flags & SATA_EVNT_LINK_ESTABLISHED) { + + if ((sata_device.satadev_scr.sstatus & + SATA_PORT_DEVLINK_UP_MASK) != SATA_PORT_DEVLINK_UP) { + /* Ignore event */ + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Ignoring port %d:%d link established event - " + "link down", + saddr->cport, saddr->pmport); + goto linklost; + } + + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Processing port %d:%d link established event", + cport, pmport); + + /* + * For the sanity sake check if a device is attached - check + * return state of a port probing. + */ + if (sata_device.satadev_type != SATA_DTYPE_NONE && + sata_device.satadev_type != SATA_DTYPE_PMULT) { + /* + * HBA port probe indicated that there is a device + * attached. Check if the framework had device info + * structure attached for this device. + */ + if (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE) { + ASSERT(SATA_PMPORTINFO_DRV_INFO(pmportinfo) != + NULL); + + sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); + if ((sdinfo->satadrv_type & + SATA_VALID_DEV_TYPE) != 0) { + /* + * Dev info structure is present. + * If dev_type is set to known type in + * the framework's drive info struct + * then the device existed before and + * the link was probably lost + * momentarily - in such case + * we may want to check device + * identity. + * Identity check is not supported now. + * + * Link established event + * triggers device reset event. + */ + (SATA_PMPORTINFO_DRV_INFO(pmportinfo))-> + satadrv_event_flags |= + SATA_EVNT_DEVICE_RESET; + } + } else if (pmportinfo->pmport_dev_type == + SATA_DTYPE_NONE) { + /* + * We got new device attached! If HBA does not + * generate device attached events, trigger it + * here. + */ + if (!(SATA_FEATURES(sata_hba_inst) & + SATA_CTLF_HOTPLUG)) { + pmportinfo->pmport_event_flags |= + SATA_EVNT_DEVICE_ATTACHED; + } + } + /* Reset link lost timeout */ + pmportinfo->pmport_link_lost_time = 0; + } + } +linklost: + if (event_flags & SATA_EVNT_LINK_LOST) { +#ifdef SATA_DEBUG + if (pmportinfo->pmport_link_lost_time == 0) { + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Processing port %d:%d link lost event", + saddr->cport, saddr->pmport); + } +#endif + if ((sata_device.satadev_scr.sstatus & + SATA_PORT_DEVLINK_UP_MASK) == SATA_PORT_DEVLINK_UP) { + /* Ignore event */ + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Ignoring port %d:%d link lost event - link is up", + saddr->cport, saddr->pmport); + goto done; + } + /* + * When HBA cannot generate device attached/detached events, + * we need to track link lost time and eventually generate + * device detach event. + */ + if (!(SATA_FEATURES(sata_hba_inst) & SATA_CTLF_HOTPLUG)) { + /* We are tracking link lost time */ + if (pmportinfo->pmport_link_lost_time == 0) { + /* save current time (lbolt value) */ + pmportinfo->pmport_link_lost_time = + ddi_get_lbolt(); + /* just keep link lost event */ + pmportinfo->pmport_event_flags |= + SATA_EVNT_LINK_LOST; + } else { + clock_t cur_time = ddi_get_lbolt(); + if ((cur_time - + pmportinfo->pmport_link_lost_time) >= + drv_usectohz( + SATA_EVNT_LINK_LOST_TIMEOUT)) { + /* trigger device detach event */ + pmportinfo->pmport_event_flags |= + SATA_EVNT_DEVICE_DETACHED; + pmportinfo->pmport_link_lost_time = 0; + SATADBG2(SATA_DBG_EVENTS, + sata_hba_inst, + "Triggering port %d:%d " + "device detached event", + saddr->cport, saddr->pmport); + } else { + /* keep link lost event */ + pmportinfo->pmport_event_flags |= + SATA_EVNT_LINK_LOST; + } + } + } + /* + * We could change port state to disable/delay access to + * the attached device until the link is recovered. + */ + } +done: + event_flags = pmportinfo->pmport_event_flags; + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, saddr->cport, + saddr->pmport)); + if (event_flags != 0) { + mutex_enter(&sata_hba_inst->satahba_mutex); + sata_hba_inst->satahba_event_flags |= SATA_EVNT_MAIN; + mutex_exit(&sata_hba_inst->satahba_mutex); + mutex_enter(&sata_mutex); + sata_event_pending |= SATA_EVNT_MAIN; + mutex_exit(&sata_mutex); + } +} + +/* * Device Detached Event processing. * Port is probed to find if a device is really gone. If so, * the device info structure is detached from the SATA port info structure * and released. * Port status is updated. * - * NOTE: Process cports event only, no port multiplier ports. + * NOTE: Port multiplier ports events are handled by + * sata_process_pmdevice_detached() */ static void sata_process_device_detached(sata_hba_inst_t *sata_hba_inst, sata_address_t *saddr) { sata_cport_info_t *cportinfo; + sata_pmport_info_t *pmportinfo; sata_drive_info_t *sdevinfo; sata_device_t sata_device; - dev_info_t *tdip; + sata_address_t pmport_addr; + char name[16]; + uint8_t cport = saddr->cport; + int npmport; int rval; SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst, @@ -15878,58 +18087,205 @@ /* * We need to detach and release device info structure here */ - if (SATA_CPORTINFO_DRV_INFO(cportinfo) != NULL) { - sdevinfo = SATA_CPORTINFO_DRV_INFO(cportinfo); - SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL; + if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) { + /* + * A port-multiplier is removed. + * + * Calling sata_process_pmdevice_detached() does not work + * here. The port multiplier is gone, so we cannot probe + * sub-port any more and all pmult-related data structure must + * be de-allocated immediately. Following structure of every + * implemented sub-port behind the pmult are required to + * released. + * + * - attachment point + * - target node + * - sata_drive_info + * - sata_pmport_info + */ + for (npmport = 0; npmport < SATA_NUM_PMPORTS(sata_hba_inst, + cport); npmport ++) { + SATADBG2(SATA_DBG_PMULT|SATA_DBG_EVENTS_PROC, + sata_hba_inst, + "Detaching target node at port %d:%d", + cport, npmport); + + mutex_exit(&SATA_CPORT_MUTEX(sata_hba_inst, cport)); + + /* Remove attachment point. */ + name[0] = '\0'; + (void) sprintf(name, "%d.%d", cport, npmport); + ddi_remove_minor_node(SATA_DIP(sata_hba_inst), name); + sata_log(sata_hba_inst, CE_NOTE, + "Remove attachment point of port %d:%d", + cport, npmport); + + /* Remove target node */ + pmport_addr.cport = cport; + pmport_addr.pmport = (uint8_t)npmport; + pmport_addr.qual = SATA_ADDR_PMPORT; + sata_remove_target_node(sata_hba_inst, &pmport_addr); + + mutex_enter(&SATA_CPORT_MUTEX(sata_hba_inst, cport)); + + /* Release sata_pmport_info & sata_drive_info. */ + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, + cport, npmport); + ASSERT(pmportinfo != NULL); + + sdevinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); + if (sdevinfo != NULL) { + (void) kmem_free((void *) sdevinfo, + sizeof (sata_drive_info_t)); + } + + /* Release sata_pmport_info at last */ + (void) kmem_free((void *) pmportinfo, + sizeof (sata_pmport_info_t)); + } + + /* Finally, release sata_pmult_info */ + (void) kmem_free((void *) + SATA_CPORTINFO_PMULT_INFO(cportinfo), + sizeof (sata_pmult_info_t)); + SATA_CPORTINFO_PMULT_INFO(cportinfo) = NULL; + + sata_log(sata_hba_inst, CE_WARN, + "SATA port-multiplier detached at port %d", cport); + + cportinfo->cport_dev_type = SATA_DTYPE_NONE; + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + saddr->cport)->cport_mutex); + } else { + if (SATA_CPORTINFO_DRV_INFO(cportinfo) != NULL) { + sdevinfo = SATA_CPORTINFO_DRV_INFO(cportinfo); + SATA_CPORTINFO_DRV_INFO(cportinfo) = NULL; + (void) kmem_free((void *)sdevinfo, + sizeof (sata_drive_info_t)); + } + sata_log(sata_hba_inst, CE_WARN, + "SATA device detached at port %d", cport); + + cportinfo->cport_dev_type = SATA_DTYPE_NONE; + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + saddr->cport)->cport_mutex); + + /* + * Try to offline a device and remove target node + * if it still exists + */ + sata_remove_target_node(sata_hba_inst, saddr); + } + + + /* + * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE + * with the hint: SE_HINT_REMOVE + */ + sata_gen_sysevent(sata_hba_inst, saddr, SE_HINT_REMOVE); +} + +/* + * Port Multiplier Port Device Deattached Event processing. + * + * NOTE: No Mutex should be hold. + */ +static void +sata_process_pmdevice_detached(sata_hba_inst_t *sata_hba_inst, + sata_address_t *saddr) +{ + sata_pmport_info_t *pmportinfo; + sata_drive_info_t *sdevinfo; + sata_device_t sata_device; + int rval; + uint8_t cport, pmport; + + cport = saddr->cport; + pmport = saddr->pmport; + + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Processing port %d:%d device detached", + cport, pmport); + + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport)); + + /* Clear event flag */ + pmportinfo->pmport_event_flags &= ~SATA_EVNT_DEVICE_DETACHED; + + /* If the port is in SHUTDOWN or FAILED state, ignore detach event. */ + if ((pmportinfo->pmport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) != 0) { + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport)); + return; + } + /* For sanity, re-probe the port */ + sata_device.satadev_rev = SATA_DEVICE_REV; + sata_device.satadev_addr = *saddr; + + /* + * We have to exit mutex, because the HBA probe port function may + * block on its own mutex. + */ + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport)); + rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), &sata_device); + mutex_enter(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport)); + sata_update_pmport_info(sata_hba_inst, &sata_device); + if (rval != SATA_SUCCESS) { + /* Something went wrong? Fail the port */ + pmportinfo->pmport_state = SATA_PSTATE_FAILED; + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport)); + SATA_LOG_D((sata_hba_inst, CE_WARN, + "SATA port %d:%d probing failed", + saddr->pmport)); + /* + * We may want to release device info structure, but + * it is not necessary. + */ + return; + } else { + /* port probed successfully */ + pmportinfo->pmport_state |= + SATA_STATE_PROBED | SATA_STATE_READY; + } + /* + * Check if a device is still attached. For sanity, check also + * link status - if no link, there is no device. + */ + if ((sata_device.satadev_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) == + SATA_PORT_DEVLINK_UP && sata_device.satadev_type != + SATA_DTYPE_NONE) { + /* + * Device is still attached - ignore detach event. + */ + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport)); + SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Ignoring detach - device still attached to port %d", + sata_device.satadev_addr.pmport); + return; + } + /* + * We need to detach and release device info structure here + */ + if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) { + sdevinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); + SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL; (void) kmem_free((void *)sdevinfo, sizeof (sata_drive_info_t)); } - cportinfo->cport_dev_type = SATA_DTYPE_NONE; + pmportinfo->pmport_dev_type = SATA_DTYPE_NONE; /* * Device cannot be reached anymore, even if the target node may be * still present. */ - - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex); - sata_log(sata_hba_inst, CE_WARN, "SATA device detached at port %d", - sata_device.satadev_addr.cport); + mutex_exit(&SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport)); /* * Try to offline a device and remove target node if it still exists */ - tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport); - if (tdip != NULL) { - /* - * Target node exists. Unconfigure device then remove - * the target node (one ndi operation). - */ - if (ndi_devi_offline(tdip, NDI_DEVI_REMOVE) != NDI_SUCCESS) { - /* - * PROBLEM - no device, but target node remained - * This happens when the file was open or node was - * waiting for resources. - */ - SATA_LOG_D((sata_hba_inst, CE_WARN, - "sata_process_device_detached: " - "Failed to remove target node for " - "detached SATA device.")); - /* - * Set target node state to DEVI_DEVICE_REMOVED. - * But re-check first that the node still exists. - */ - tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), - saddr->cport); - if (tdip != NULL) { - sata_set_device_removed(tdip); - /* - * Instruct event daemon to retry the - * cleanup later. - */ - sata_set_target_node_cleanup(sata_hba_inst, - &sata_device.satadev_addr); - } - } - } + sata_remove_target_node(sata_hba_inst, saddr); + /* * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE * with the hint: SE_HINT_REMOVE @@ -15951,18 +18307,22 @@ * * This function cannot be called in interrupt context (it may sleep). * - * NOTE: Process cports event only, no port multiplier ports. + * NOTE: Port multiplier ports events are handled by + * sata_process_pmdevice_attached() */ static void sata_process_device_attached(sata_hba_inst_t *sata_hba_inst, sata_address_t *saddr) { - sata_cport_info_t *cportinfo; - sata_drive_info_t *sdevinfo; + sata_cport_info_t *cportinfo = NULL; + sata_drive_info_t *sdevinfo = NULL; + sata_pmult_info_t *pmultinfo = NULL; + sata_pmport_info_t *pmportinfo = NULL; sata_device_t sata_device; dev_info_t *tdip; - uint32_t event_flags; + uint32_t event_flags = 0, pmult_event_flags = 0; int rval; + int npmport; SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst, "Processing port %d device attached", saddr->cport); @@ -16103,6 +18463,40 @@ cportinfo->cport_event_flags |= SATA_EVNT_DEVICE_ATTACHED; } + } else if (cportinfo->cport_dev_type == SATA_DTYPE_PMULT) { + cportinfo->cport_dev_attach_time = 0; + sata_log(sata_hba_inst, CE_NOTE, + "SATA port-multiplier detected at port %d", + saddr->cport); + + if (SATA_CPORTINFO_PMULT_INFO(cportinfo) != NULL) { + /* Log the info of new port multiplier */ + sata_show_pmult_info(sata_hba_inst, + &sata_device); + } + + ASSERT(SATA_CPORTINFO_PMULT_INFO(cportinfo) != NULL); + pmultinfo = SATA_CPORTINFO_PMULT_INFO(cportinfo); + for (npmport = 0; npmport < + pmultinfo->pmult_num_dev_ports; npmport++) { + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, + saddr->cport, npmport); + ASSERT(pmportinfo != NULL); + + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + saddr->cport)->cport_mutex); + mutex_enter(&pmportinfo->pmport_mutex); + /* Marked all pmports with link events. */ + pmportinfo->pmport_event_flags = + SATA_EVNT_LINK_ESTABLISHED; + pmult_event_flags |= + pmportinfo->pmport_event_flags; + mutex_exit(&pmportinfo->pmport_mutex); + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + saddr->cport)->cport_mutex); + } + /* Auto-online is not available for PMult now. */ + } else { /* * If device was successfully attached, the subsequent @@ -16138,7 +18532,7 @@ * device was detached. */ tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), - saddr->cport); + saddr->cport, saddr->pmport); mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex); if (tdip != NULL) { @@ -16203,6 +18597,269 @@ event_flags = cportinfo->cport_event_flags; mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex); + if (event_flags != 0 || pmult_event_flags != 0) { + mutex_enter(&sata_hba_inst->satahba_mutex); + sata_hba_inst->satahba_event_flags |= SATA_EVNT_MAIN; + mutex_exit(&sata_hba_inst->satahba_mutex); + mutex_enter(&sata_mutex); + sata_event_pending |= SATA_EVNT_MAIN; + mutex_exit(&sata_mutex); + } +} + +/* + * Port Multiplier Port Device Attached Event processing. + * + * NOTE: No Mutex should be hold. + */ +static void +sata_process_pmdevice_attached(sata_hba_inst_t *sata_hba_inst, + sata_address_t *saddr) +{ + sata_pmport_info_t *pmportinfo; + sata_drive_info_t *sdinfo; + sata_device_t sata_device; + dev_info_t *tdip; + uint32_t event_flags; + uint8_t cport = saddr->cport; + uint8_t pmport = saddr->pmport; + int rval; + + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Processing port %d:%d device attached", cport, pmport); + + pmportinfo = SATA_PMPORT_INFO(sata_hba_inst, cport, pmport); + + mutex_enter(&pmportinfo->pmport_mutex); + + /* Clear attach event flag first */ + pmportinfo->pmport_event_flags &= ~SATA_EVNT_DEVICE_ATTACHED; + + /* If the port is in SHUTDOWN or FAILED state, ignore event. */ + if ((pmportinfo->pmport_state & + (SATA_PSTATE_SHUTDOWN | SATA_PSTATE_FAILED)) != 0) { + pmportinfo->pmport_dev_attach_time = 0; + mutex_exit(&pmportinfo->pmport_mutex); + return; + } + + /* + * If the sata_drive_info structure is found attached to the port info, + * despite the fact the device was removed and now it is re-attached, + * the old drive info structure was not removed. + * Arbitrarily release device info structure. + */ + if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) { + sdinfo = SATA_PMPORTINFO_DRV_INFO(pmportinfo); + SATA_PMPORTINFO_DRV_INFO(pmportinfo) = NULL; + (void) kmem_free((void *)sdinfo, + sizeof (sata_drive_info_t)); + SATADBG1(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Arbitrarily detaching old device info.", NULL); + } + pmportinfo->pmport_dev_type = SATA_DTYPE_NONE; + + /* For sanity, re-probe the port */ + sata_device.satadev_rev = SATA_DEVICE_REV; + sata_device.satadev_addr = *saddr; + + /* + * We have to exit mutex, because the HBA probe port function may + * block on its own mutex. + */ + mutex_exit(&pmportinfo->pmport_mutex); + rval = (*SATA_PROBE_PORT_FUNC(sata_hba_inst)) + (SATA_DIP(sata_hba_inst), &sata_device); + mutex_enter(&pmportinfo->pmport_mutex); + + sata_update_pmport_info(sata_hba_inst, &sata_device); + if (rval != SATA_SUCCESS) { + /* Something went wrong? Fail the port */ + pmportinfo->pmport_state = SATA_PSTATE_FAILED; + pmportinfo->pmport_dev_attach_time = 0; + mutex_exit(&pmportinfo->pmport_mutex); + SATA_LOG_D((sata_hba_inst, CE_WARN, + "SATA port %d:%d probing failed", cport, pmport)); + return; + } else { + /* pmport probed successfully */ + pmportinfo->pmport_state |= + SATA_STATE_PROBED | SATA_STATE_READY; + } + /* + * Check if a device is still attached. For sanity, check also + * link status - if no link, there is no device. + */ + if ((sata_device.satadev_scr.sstatus & SATA_PORT_DEVLINK_UP_MASK) != + SATA_PORT_DEVLINK_UP || sata_device.satadev_type == + SATA_DTYPE_NONE) { + /* + * No device - ignore attach event. + */ + pmportinfo->pmport_dev_attach_time = 0; + mutex_exit(&pmportinfo->pmport_mutex); + SATADBG2(SATA_DBG_EVENTS_PROC, sata_hba_inst, + "Ignoring attach - no device connected to port %d:%d", + cport, pmport); + return; + } + + mutex_exit(&pmportinfo->pmport_mutex); + /* + * Generate sysevent - EC_DR / ESC_DR_AP_STATE_CHANGE + * with the hint: SE_HINT_INSERT + */ + sata_gen_sysevent(sata_hba_inst, saddr, SE_HINT_INSERT); + + /* + * Port reprobing will take care of the creation of the device + * info structure and determination of the device type. + */ + sata_device.satadev_addr = *saddr; + (void) sata_reprobe_port(sata_hba_inst, &sata_device, + SATA_DEV_IDENTIFY_NORETRY); + + mutex_enter(&pmportinfo->pmport_mutex); + if ((pmportinfo->pmport_state & SATA_STATE_READY) && + (pmportinfo->pmport_dev_type != SATA_DTYPE_NONE)) { + /* Some device is attached to the port */ + if (pmportinfo->pmport_dev_type == SATA_DTYPE_UNKNOWN) { + /* + * A device was not successfully attached. + * Track retry time for device identification. + */ + if (pmportinfo->pmport_dev_attach_time != 0) { + clock_t cur_time = ddi_get_lbolt(); + /* + * If the retry time limit was not exceeded, + * reinstate attach event. + */ + if ((cur_time - + pmportinfo->pmport_dev_attach_time) < + drv_usectohz( + SATA_DEV_IDENTIFY_TIMEOUT)) { + /* OK, restore attach event */ + pmportinfo->pmport_event_flags |= + SATA_EVNT_DEVICE_ATTACHED; + } else { + /* Timeout - cannot identify device */ + pmportinfo->pmport_dev_attach_time = 0; + sata_log(sata_hba_inst, CE_WARN, + "Could not identify SATA device " + "at port %d:%d", + cport, pmport); + } + } else { + /* + * Start tracking time for device + * identification. + * Save current time (lbolt value). + */ + pmportinfo->pmport_dev_attach_time = + ddi_get_lbolt(); + /* Restore attach event */ + pmportinfo->pmport_event_flags |= + SATA_EVNT_DEVICE_ATTACHED; + } + } else { + /* + * If device was successfully attached, the subsequent + * action depends on a state of the + * sata_auto_online variable. If it is set to zero. + * an explicit 'configure' command will be needed to + * configure it. If its value is non-zero, we will + * attempt to online (configure) the device. + * First, log the message indicating that a device + * was attached. + */ + pmportinfo->pmport_dev_attach_time = 0; + sata_log(sata_hba_inst, CE_WARN, + "SATA device detected at port %d:%d", + cport, pmport); + + if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) { + sata_drive_info_t new_sdinfo; + + /* Log device info data */ + new_sdinfo = *(SATA_PMPORTINFO_DRV_INFO( + pmportinfo)); + sata_show_drive_info(sata_hba_inst, + &new_sdinfo); + } + + mutex_exit(&pmportinfo->pmport_mutex); + + /* + * Make sure that there is no target node for that + * device. If so, release it. It should not happen, + * unless we had problem removing the node when + * device was detached. + */ + tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), + saddr->cport, saddr->pmport); + mutex_enter(&pmportinfo->pmport_mutex); + if (tdip != NULL) { + +#ifdef SATA_DEBUG + if ((pmportinfo->pmport_event_flags & + SATA_EVNT_TARGET_NODE_CLEANUP) == 0) + sata_log(sata_hba_inst, CE_WARN, + "sata_process_device_attached: " + "old device target node exists!"); +#endif + /* + * target node exists - try to unconfigure + * device and remove the node. + */ + mutex_exit(&pmportinfo->pmport_mutex); + rval = ndi_devi_offline(tdip, + NDI_DEVI_REMOVE); + mutex_enter(&pmportinfo->pmport_mutex); + + if (rval == NDI_SUCCESS) { + pmportinfo->pmport_event_flags &= + ~SATA_EVNT_TARGET_NODE_CLEANUP; + pmportinfo->pmport_tgtnode_clean = + B_TRUE; + } else { + /* + * PROBLEM - the target node remained + * and it belongs to a previously + * attached device. + * This happens when the file was open + * or the node was waiting for + * resources at the time the + * associated device was removed. + * Instruct event daemon to retry the + * cleanup later. + */ + sata_log(sata_hba_inst, + CE_WARN, + "Application(s) accessing " + "previously attached SATA " + "device have to release " + "it before newly inserted " + "device can be made accessible." + "at port %d:%d", + cport, pmport); + pmportinfo->pmport_event_flags |= + SATA_EVNT_TARGET_NODE_CLEANUP; + pmportinfo->pmport_tgtnode_clean = + B_FALSE; + } + } + if (sata_auto_online != 0) { + pmportinfo->pmport_event_flags |= + SATA_EVNT_AUTOONLINE_DEVICE; + } + + } + } else { + pmportinfo->pmport_dev_attach_time = 0; + } + + event_flags = pmportinfo->pmport_event_flags; + mutex_exit(&pmportinfo->pmport_mutex); if (event_flags != 0) { mutex_enter(&sata_hba_inst->satahba_mutex); sata_hba_inst->satahba_event_flags |= SATA_EVNT_MAIN; @@ -16211,8 +18868,19 @@ sata_event_pending |= SATA_EVNT_MAIN; mutex_exit(&sata_mutex); } -} - + + /* clear the reset_in_progress events */ + if (SATA_PMPORTINFO_DRV_INFO(pmportinfo) != NULL) { + if (pmportinfo->pmport_dev_type & SATA_VALID_DEV_TYPE) { + /* must clear flags on cport */ + sata_pmult_info_t *pminfo = + SATA_PMULT_INFO(sata_hba_inst, + saddr->cport); + pminfo->pmult_event_flags |= + SATA_EVNT_CLEAR_DEVICE_RESET; + } + } +} /* * Device Target Node Cleanup Event processing. @@ -16241,7 +18909,8 @@ * Check if there is target node for that device and it is in the * DEVI_DEVICE_REMOVED state. If so, release it. */ - tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport); + tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport, + saddr->pmport); if (tdip != NULL) { /* * target node exists - check if it is target node of @@ -16275,12 +18944,32 @@ mutex_exit(&sata_mutex); } } else { - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, - saddr->cport)->cport_mutex); - cportinfo->cport_event_flags &= - ~SATA_EVNT_TARGET_NODE_CLEANUP; - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, - saddr->cport)->cport_mutex); + if (saddr->qual == SATA_ADDR_CPORT || + saddr->qual == SATA_ADDR_DCPORT) { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + saddr->cport)->cport_mutex); + cportinfo->cport_event_flags &= + ~SATA_EVNT_TARGET_NODE_CLEANUP; + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + saddr->cport)->cport_mutex); + } else { + /* sanity check */ + if (SATA_CPORT_DEV_TYPE(sata_hba_inst, saddr->cport) != + SATA_DTYPE_PMULT || SATA_PMULT_INFO(sata_hba_inst, + saddr->cport) == NULL) + return; + if (SATA_PMPORT_INFO(sata_hba_inst, saddr->cport, + saddr->pmport) == NULL) + return; + + mutex_enter(&SATA_PMPORT_INFO(sata_hba_inst, + saddr->cport, saddr->pmport)->pmport_mutex); + SATA_PMPORT_INFO(sata_hba_inst, saddr->cport, + saddr->pmport)->pmport_event_flags &= + ~SATA_EVNT_TARGET_NODE_CLEANUP; + mutex_exit(&SATA_PMPORT_INFO(sata_hba_inst, + saddr->cport, saddr->pmport)->pmport_mutex); + } } } @@ -16327,7 +19016,8 @@ * DEVI_DEVICE_REMOVED state. If so, abort onlining but keep * the event for later processing. */ - tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport); + tdip = sata_get_target_dip(SATA_DIP(sata_hba_inst), saddr->cport, + saddr->pmport); if (tdip != NULL) { /* * target node exists - check if it is target node of @@ -16490,12 +19180,26 @@ sata_set_target_node_cleanup(sata_hba_inst_t *sata_hba_inst, sata_address_t *saddr) { - mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex); - SATA_CPORT_EVENT_FLAGS(sata_hba_inst, saddr->cport) |= - SATA_EVNT_TARGET_NODE_CLEANUP; - SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_tgtnode_clean = - B_FALSE; - mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, saddr->cport)->cport_mutex); + if (saddr->qual == SATA_ADDR_CPORT || + saddr->qual == SATA_ADDR_DCPORT) { + mutex_enter(&SATA_CPORT_INFO(sata_hba_inst, + saddr->cport)->cport_mutex); + SATA_CPORT_EVENT_FLAGS(sata_hba_inst, saddr->cport) |= + SATA_EVNT_TARGET_NODE_CLEANUP; + SATA_CPORT_INFO(sata_hba_inst, saddr->cport)-> + cport_tgtnode_clean = B_FALSE; + mutex_exit(&SATA_CPORT_INFO(sata_hba_inst, + saddr->cport)->cport_mutex); + } else { + mutex_enter(&SATA_PMPORT_INFO(sata_hba_inst, + saddr->cport, saddr->pmport)->pmport_mutex); + SATA_PMPORT_EVENT_FLAGS(sata_hba_inst, saddr->cport, + saddr->pmport) |= SATA_EVNT_TARGET_NODE_CLEANUP; + SATA_PMPORT_INFO(sata_hba_inst, saddr->cport, saddr->pmport)-> + pmport_tgtnode_clean = B_FALSE; + mutex_exit(&SATA_PMPORT_INFO(sata_hba_inst, + saddr->cport, saddr->pmport)->pmport_mutex); + } mutex_enter(&sata_hba_inst->satahba_mutex); sata_hba_inst->satahba_event_flags |= SATA_EVNT_MAIN; mutex_exit(&sata_hba_inst->satahba_mutex); @@ -16512,8 +19216,6 @@ * * Returns B_TRUE if the target node is in DEVI_DEVICE_REMOVED state, * B_FALSE otherwise. - * - * NOTE: No port multiplier support. */ static boolean_t sata_check_device_removed(dev_info_t *tdip)
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_conn.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_conn.c Mon Aug 17 09:14:35 2009 -0400 @@ -478,13 +478,14 @@ /* * This logic assumes that the IDM login-snooping code - * and the initiator login code will agree on whether - * the connection is in FFP. The reason we do this - * is that we don't want to process CN_FFP_DISABLED until - * CN_FFP_ENABLED has been full handled. + * and the initiator login code will agree to go when + * the connection is in FFP or final error received. + * The reason we do this is that we don't want to process + * CN_FFP_DISABLED until CN_FFP_ENABLED has been full handled. */ mutex_enter(&icp->conn_login_mutex); - while (icp->conn_login_state != LOGIN_FFP) { + while ((icp->conn_login_state != LOGIN_FFP) && + (icp->conn_login_state != LOGIN_ERROR)) { cv_wait(&icp->conn_login_cv, &icp->conn_login_mutex); } mutex_exit(&icp->conn_login_mutex); @@ -524,8 +525,13 @@ case FD_CONN_FAIL: default: - iscsi_conn_update_state_locked(icp, - ISCSI_CONN_STATE_FAILED); + if (icp->conn_state == ISCSI_CONN_STATE_IN_LOGIN) { + iscsi_conn_update_state_locked(icp, + ISCSI_CONN_STATE_FREE); + } else { + iscsi_conn_update_state_locked(icp, + ISCSI_CONN_STATE_FAILED); + } break; } @@ -544,7 +550,8 @@ * what CN_CONNECT_LOST means to us. */ in_login = (boolean_t)data; - if (in_login) { + if (in_login || + (icp->conn_prev_state == ISCSI_CONN_STATE_IN_LOGIN)) { mutex_enter(&icp->conn_state_mutex); icp->conn_state_idm_connected = B_FALSE;
--- a/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_login.c Mon Aug 17 09:14:35 2009 -0400 @@ -57,6 +57,14 @@ #define ISCSI_LOGIN_RETRY_DELAY 5 /* seconds */ +#define ISCSI_LOGIN_TRANSIT_FFP(flags) \ + (!(flags & ISCSI_FLAG_LOGIN_CONTINUE) && \ + (flags & ISCSI_FLAG_LOGIN_TRANSIT) && \ + (ISCSI_LOGIN_CURRENT_STAGE(flags) == \ + ISCSI_OP_PARMS_NEGOTIATION_STAGE) && \ + (ISCSI_LOGIN_NEXT_STAGE(flags) == \ + ISCSI_FULL_FEATURE_PHASE)) + /* * +--------------------------------------------------------------------+ * | External Login Interface | @@ -310,7 +318,11 @@ } } break; + case ISCSI_CONN_STATE_FREE: + mutex_exit(&icp->conn_state_mutex); + break; default: + mutex_exit(&icp->conn_state_mutex); ASSERT(0); break; } @@ -535,6 +547,18 @@ icp->conn_login_max_data_length); /* pass back whatever error we discovered */ if (!ISCSI_SUCCESS(rval)) { + if (ISCSI_LOGIN_TRANSIT_FFP(ilrhp->flags)) { + /* + * iSCSI connection transit to next + * FFP stage while iscsi params + * ngeotiate error, LOGIN_ERROR + * marked so CN_FFP_ENABLED can + * be fully handled before + * CN_FFP_DISABLED can be processed. + */ + iscsi_login_update_state(icp, + LOGIN_ERROR); + } goto iscsi_login_done; }
--- a/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs_tape.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/scsi/adapters/scsi_vhci/fops/tpgs_tape.c Mon Aug 17 09:14:35 2009 -0400 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -96,7 +96,11 @@ tpgs_tape_device_probe(struct scsi_device *sd, struct scsi_inquiry *inq, void **ctpriv) { - int mode, state, xlf, preferred = 0; + int mode; + int state; + int xlf; + int preferred = 0; + int support; VHCI_DEBUG(6, (CE_NOTE, NULL, "tpgs_tape_device_probe: vidpid %s\n", inq->inq_vid)); @@ -104,48 +108,58 @@ if (inq->inq_tpgs == TPGS_FAILOVER_NONE) { VHCI_DEBUG(4, (CE_WARN, NULL, "!tpgs_tape_device_probe: not a standard tpgs device")); - return (SFO_DEVICE_PROBE_PHCI); - } - - if (inq->inq_dtype != DTYPE_SEQUENTIAL) { + support = SFO_DEVICE_PROBE_PHCI; + } else if (inq->inq_dtype != DTYPE_SEQUENTIAL) { VHCI_DEBUG(4, (CE_NOTE, NULL, "!tpgs_tape_device_probe: Detected a " "Standard Asymmetric device " "not yet supported\n")); - return (SFO_DEVICE_PROBE_PHCI); - } - - if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, &preferred)) { + support = SFO_DEVICE_PROBE_PHCI; + } else if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, + &preferred)) { VHCI_DEBUG(4, (CE_WARN, NULL, "!unable to fetch fo " "mode: sd(%p)", (void *) sd)); - return (SFO_DEVICE_PROBE_PHCI); - } - - if (inq->inq_tpgs == TPGS_FAILOVER_IMPLICIT) { + support = SFO_DEVICE_PROBE_PHCI; + } else if (inq->inq_tpgs == TPGS_FAILOVER_IMPLICIT) { VHCI_DEBUG(1, (CE_NOTE, NULL, "!tpgs_tape_device_probe: Detected a " "Standard Asymmetric device " "with implicit failover\n")); - return (SFO_DEVICE_PROBE_VHCI); - } - if (inq->inq_tpgs == TPGS_FAILOVER_EXPLICIT) { + support = SFO_DEVICE_PROBE_VHCI; + } else if (inq->inq_tpgs == TPGS_FAILOVER_EXPLICIT) { VHCI_DEBUG(1, (CE_NOTE, NULL, "!tpgs_tape_device_probe: Detected a " "Standard Asymmetric device " "with explicit failover\n")); - return (SFO_DEVICE_PROBE_VHCI); - } - if (inq->inq_tpgs == TPGS_FAILOVER_BOTH) { + support = SFO_DEVICE_PROBE_VHCI; + } else if (inq->inq_tpgs == TPGS_FAILOVER_BOTH) { VHCI_DEBUG(1, (CE_NOTE, NULL, "!tpgs_tape_device_probe: Detected a " "Standard Asymmetric device " "which supports both implicit and explicit failover\n")); - return (SFO_DEVICE_PROBE_VHCI); + support = SFO_DEVICE_PROBE_VHCI; + } else { + VHCI_DEBUG(1, (CE_WARN, NULL, + "!tpgs_tape_device_probe: " + "Unknown tpgs_bits: %x", inq->inq_tpgs)); + support = SFO_DEVICE_PROBE_PHCI; } - VHCI_DEBUG(1, (CE_WARN, NULL, - "!tpgs_tape_device_probe: " - "Unknown tpgs_bits: %x", inq->inq_tpgs)); - return (SFO_DEVICE_PROBE_PHCI); + + if (support == SFO_DEVICE_PROBE_VHCI) { + /* + * Policy only applies to 'client' probe, not + * vhci_is_dev_supported() pHCI probe. Detect difference + * based on ctpriv. + */ + if (ctpriv && + (mdi_set_lb_policy(sd->sd_dev, LOAD_BALANCE_NONE) != + MDI_SUCCESS)) { + VHCI_DEBUG(6, (CE_NOTE, NULL, "!fail load balance none" + ": %s\n", inq->inq_vid)); + support = SFO_DEVICE_PROBE_PHCI; + } + } + return (support); } static void
--- a/usr/src/uts/common/io/scsi/targets/sd.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/scsi/targets/sd.c Mon Aug 17 09:14:35 2009 -0400 @@ -879,6 +879,7 @@ #define sd_blank_cmp ssd_blank_cmp #define sd_chk_vers1_data ssd_chk_vers1_data #define sd_set_vers1_properties ssd_set_vers1_properties +#define sd_check_solid_state ssd_check_solid_state #define sd_get_physical_geometry ssd_get_physical_geometry #define sd_get_virtual_geometry ssd_get_virtual_geometry @@ -1271,6 +1272,7 @@ static int sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled); static void sd_get_nv_sup(sd_ssc_t *ssc); static dev_t sd_make_device(dev_info_t *devi); +static void sd_check_solid_state(sd_ssc_t *ssc); static void sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity); @@ -5667,8 +5669,8 @@ * Description: This routine sends an inquiry command with the EVPD bit set and * a page code of 0x00 to the device. It is used to determine which * vital product pages are available to find the devid. We are - * looking for pages 0x83 or 0x80. If we return a negative 1, the - * device does not support that command. + * looking for pages 0x83 0x80 or 0xB1. If we return a negative 1, + * the device does not support that command. * * Arguments: un - driver soft state (unit) structure * @@ -5724,7 +5726,7 @@ * Pages are returned in ascending order, and 0x83 is what we * are hoping for. */ - while ((page_list[counter] <= 0x86) && + while ((page_list[counter] <= 0xB1) && (counter <= (page_list[VPD_PAGE_LENGTH] + VPD_HEAD_OFFSET))) { /* @@ -5751,6 +5753,9 @@ case 0x86: un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG; break; + case 0xB1: + un->un_vpd_page_mask |= SD_VPD_DEV_CHARACTER_PG; + break; } counter++; } @@ -8092,6 +8097,11 @@ un->un_mediastate = DKIO_NONE; + /* + * Check if this is a SSD(Solid State Drive). + */ + sd_check_solid_state(ssc); + cmlb_alloc_handle(&un->un_cmlbhandle); #if defined(__i386) || defined(__amd64) @@ -30828,6 +30838,8 @@ mutex_enter(SD_MUTEX(un)); ((tg_attribute_t *)arg)->media_is_writable = un->un_f_mmc_writable_media; + ((tg_attribute_t *)arg)->media_is_solid_state = + un->un_f_is_solid_state; mutex_exit(SD_MUTEX(un)); return (0); default: @@ -31289,3 +31301,73 @@ xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1); ssc->ssc_uscsi_info->ui_ena = xp->xb_ena; } + + +/* + * Function: sd_check_solid_state + * + * Description: Query the optional INQUIRY VPD page 0xb1. If the device + * supports VPD page 0xb1, sd examines the MEDIUM ROTATION + * RATE. If the MEDIUM ROTATION RATE is 1, sd assumes the + * device is a solid state drive. + * + * Context: Kernel thread or interrupt context. + */ + +static void +sd_check_solid_state(sd_ssc_t *ssc) +{ + int rval = 0; + uchar_t *inqb1 = NULL; + size_t inqb1_len = MAX_INQUIRY_SIZE; + size_t inqb1_resid = 0; + struct sd_lun *un; + + ASSERT(ssc != NULL); + un = ssc->ssc_un; + ASSERT(un != NULL); + ASSERT(!mutex_owned(SD_MUTEX(un))); + + mutex_enter(SD_MUTEX(un)); + un->un_f_is_solid_state = FALSE; + + if (ISCD(un)) { + mutex_exit(SD_MUTEX(un)); + return; + } + + if (sd_check_vpd_page_support(ssc) == 0 && + un->un_vpd_page_mask & SD_VPD_DEV_CHARACTER_PG) { + mutex_exit(SD_MUTEX(un)); + /* collect page b1 data */ + inqb1 = kmem_zalloc(inqb1_len, KM_SLEEP); + + rval = sd_send_scsi_INQUIRY(ssc, inqb1, inqb1_len, + 0x01, 0xB1, &inqb1_resid); + + if (rval == 0 && (inqb1_len - inqb1_resid > 5)) { + SD_TRACE(SD_LOG_COMMON, un, + "sd_check_solid_state: \ + successfully get VPD page: %x \ + PAGE LENGTH: %x BYTE 4: %x \ + BYTE 5: %x", inqb1[1], inqb1[3], inqb1[4], + inqb1[5]); + + mutex_enter(SD_MUTEX(un)); + /* + * Check the MEDIUM ROTATION RATE. If it is set + * to 1, the device is a solid state drive. + */ + if (inqb1[4] == 0 && inqb1[5] == 1) { + un->un_f_is_solid_state = TRUE; + } + mutex_exit(SD_MUTEX(un)); + } else if (rval != 0) { + sd_ssc_assessment(ssc, SD_FMT_IGNORE); + } + + kmem_free(inqb1, inqb1_len); + } else { + mutex_exit(SD_MUTEX(un)); + } +}
--- a/usr/src/uts/common/io/srn.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/srn.c Mon Aug 17 09:14:35 2009 -0400 @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -78,6 +78,7 @@ int srn_type[SRN_MAX_CLONE]; /* type of handshake */ int srn_delivered[SRN_MAX_CLONE]; srn_event_info_t srn_pending[SRN_MAX_CLONE]; + int srn_fault[SRN_MAX_CLONE]; } srn = { NULL, -1}; typedef struct srnstate *srn_state_t; @@ -85,6 +86,9 @@ uint_t srn_poll_cnt[SRN_MAX_CLONE]; /* count of events for poll */ int srn_apm_count; int srn_autosx_count; +/* Number of seconds to wait for clients to ack a poll */ +int srn_timeout = 10; + struct pollhead srn_pollhead[SRN_MAX_CLONE]; static int srn_open(dev_t *, int, int, cred_t *); @@ -251,7 +255,7 @@ srn_chpoll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) { - extern struct pollhead srn_pollhead[]; /* common/os/sunpm.c */ + extern struct pollhead srn_pollhead[]; int clone; clone = SRN_MINOR_TO_CLONE(getminor(dev)); @@ -342,6 +346,7 @@ crfree(srn.srn_cred[clone]); srn.srn_cred[clone] = 0; srn_poll_cnt[clone] = 0; + srn.srn_fault[clone] = 0; if (srn.srn_pending[clone].ae_type || srn.srn_delivered[clone]) { srn.srn_pending[clone].ae_type = 0; srn.srn_delivered[clone] = 0; @@ -407,6 +412,7 @@ } ASSERT(srn.srn_type[clone] == SRN_TYPE_APM); srn.srn_type[clone] = SRN_TYPE_AUTOSX; + srn.srn_fault[clone] = 0; srn_apm_count--; ASSERT(srn_apm_count >= 0); ASSERT(srn_autosx_count >= 0); @@ -423,6 +429,11 @@ * then wake up the kernel thread sleeping for the delivery */ PMD(PMD_SX, ("SRN_IOC_NEXTEVENT entered\n")) + if (srn.srn_fault[clone]) { + PMD(PMD_SX, ("SRN_IOC_NEXTEVENT clone %d fault " + "cleared\n", clone)) + srn.srn_fault[clone] = 0; + } mutex_enter(&srn_clone_lock); if (srn_poll_cnt[clone] == 0) { mutex_exit(&srn_clone_lock); @@ -450,6 +461,11 @@ case SRN_IOC_SUSPEND: /* ack suspend */ PMD(PMD_SX, ("SRN_IOC_SUSPEND entered clone %d\n", clone)) + if (srn.srn_fault[clone]) { + PMD(PMD_SX, ("SRN_IOC_SUSPEND clone %d fault " + "cleared\n", clone)) + srn.srn_fault[clone] = 0; + } mutex_enter(&srn_clone_lock); if (srn.srn_delivered[clone] != SRN_SUSPEND_REQ) { mutex_exit(&srn_clone_lock); @@ -467,6 +483,11 @@ case SRN_IOC_RESUME: /* ack resume */ PMD(PMD_SX, ("SRN_IOC_RESUME entered clone %d\n", clone)) + if (srn.srn_fault[clone]) { + PMD(PMD_SX, ("SRN_IOC_RESUME clone %d fault " + "cleared\n", clone)) + srn.srn_fault[clone] = 0; + } mutex_enter(&srn_clone_lock); if (srn.srn_delivered[clone] != SRN_NORMAL_RESUME) { mutex_exit(&srn_clone_lock); @@ -522,11 +543,13 @@ PMD(PMD_SX, ("count %d\n", count)) for (clone = 0; clone < SRN_MAX_CLONE; clone++) { if (srn.srn_type[clone] == type) { +#ifdef DEBUG if (type == SRN_TYPE_APM) { ASSERT(srn.srn_pending[clone].ae_type == 0); ASSERT(srn_poll_cnt[clone] == 0); ASSERT(srn.srn_delivered[clone] == 0); } +#endif srn.srn_pending[clone].ae_type = event; srn_poll_cnt[clone] = 1; PMD(PMD_SX, ("pollwake %d\n", clone)) @@ -544,7 +567,7 @@ /* otherwise wait for acks */ restart: /* - * We wait untill all of the pending events are cleared. + * We wait until all of the pending events are cleared. * We have to start over every time we do a cv_wait because * we give up the mutex and can be re-entered */ @@ -552,10 +575,22 @@ if (srn.srn_clones[clone] == 0 || srn.srn_type[clone] != SRN_TYPE_APM) continue; - if (srn.srn_pending[clone].ae_type) { + if (srn.srn_pending[clone].ae_type && !srn.srn_fault[clone]) { PMD(PMD_SX, ("srn_notify waiting for ack for clone %d, " "event %x\n", clone, event)) - cv_wait(&srn_clones_cv[clone], &srn_clone_lock); + if (cv_timedwait(&srn_clones_cv[clone], + &srn_clone_lock, ddi_get_lbolt() + + drv_usectohz(srn_timeout * 1000000)) == -1) { + /* + * Client didn't respond, mark it as faulted + * and continue as if a regular signal. + */ + PMD(PMD_SX, ("srn_notify: clone %d did not " + "ack event %x\n", clone, event)) + cmn_err(CE_WARN, "srn_notify: clone %d did " + "not ack event %x\n", clone, event); + srn.srn_fault[clone] = 1; + } goto restart; } }
--- a/usr/src/uts/common/io/usb/usba/usba.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/usb/usba/usba.c Mon Aug 17 09:14:35 2009 -0400 @@ -73,8 +73,11 @@ /* * compatible name handling */ -/* allowing for 15 compat names, plus one force bind name */ -#define USBA_MAX_COMPAT_NAMES 16 +/* + * allowing for 15 compat names, plus one force bind name and + * one possible specified client driver name + */ +#define USBA_MAX_COMPAT_NAMES 17 #define USBA_MAX_COMPAT_NAME_LEN 64 /* double linked list for usba_devices */ @@ -1841,7 +1844,7 @@ usb_dev_descr_t *usb_dev_descr; uint_t n_cfgs; /* number of configs */ uint_t n_ifs; /* number of interfaces */ - uint_t port; + uint_t port, bus_num; size_t usb_config_length; uchar_t *usb_config; int reg[1]; @@ -1867,7 +1870,7 @@ usb_dev_descr = usba_device->usb_dev_descr; n_cfgs = usba_device->usb_n_cfgs; n_ifs = usba_device->usb_n_ifs; - + bus_num = usba_device->usb_addr; if (address != ROOT_HUB_ADDR) { size = usb_parse_if_descr( @@ -1974,6 +1977,35 @@ USBA_MAX_COMPAT_NAME_LEN); } + /* + * If the callback function of specified driver is registered, + * it will be called here to check whether to take over the device. + */ + if (usb_cap.usba_dev_driver_cb != NULL) { + char *dev_drv = NULL; + usb_dev_str_t dev_str; + char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + dev_str.usb_mfg = usba_device->usb_mfg_str; + dev_str.usb_product = usba_device->usb_product_str; + dev_str.usb_serialno = usba_device->usb_serialno_str; + + (void) ddi_pathname(child_dip, pathname); + + if ((usb_cap.usba_dev_driver_cb(usb_dev_descr, &dev_str, + pathname, bus_num, port, &dev_drv, NULL) == USB_SUCCESS) && + (dev_drv != NULL)) { + USB_DPRINTF_L3(DPRINT_MASK_USBA, usba_log_handle, + "usba_ready_device_node: dev_driver=%s, port =%d," + "bus =%d, path=%s\n\t", + dev_drv, port, bus_num, pathname); + + (void) strncpy(usba_name[n++], dev_drv, + USBA_MAX_COMPAT_NAME_LEN); + } + kmem_free(pathname, MAXPATHLEN); + } + /* create compatible names */ if (combined_node) {
--- a/usr/src/uts/common/io/usb/usba/usbai.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/usb/usba/usbai.c Mon Aug 17 09:14:35 2009 -0400 @@ -18,7 +18,7 @@ * * CDDL HEADER END * - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -82,7 +82,6 @@ */ int usb_force_enable_pm = 0; - /* USBA framework initializations */ void usba_usbai_initialization() @@ -1288,3 +1287,38 @@ { return (usba_hubdi_reset_device(dip, reset_level)); } + +/* + * usb device driver registration + */ +int +usb_register_dev_driver(dev_info_t *dip, usb_dev_driver_callback_t cb) +{ + USB_DPRINTF_L4(DPRINT_MASK_USBAI, usbai_log_handle, + "usb_register_dev_driver: register the specified driver " + "in usba: dip = 0x%p", (void *)dip); + + if (cb != NULL) { + usb_cap.dip = dip; + usb_cap.usba_dev_driver_cb = cb; + + return (USB_SUCCESS); + } + + return (USB_FAILURE); +} + +/* + * usb device driver unregistration + */ +void +usb_unregister_dev_driver(dev_info_t *dip) +{ + USB_DPRINTF_L4(DPRINT_MASK_USBAI, usbai_log_handle, + "usb_unregister_dev_driver: unregister the registered " + "driver: dip =0x%p", (void *)dip); + + ASSERT(dip == usb_cap.dip); + usb_cap.dip = NULL; + usb_cap.usba_dev_driver_cb = NULL; +}
--- a/usr/src/uts/common/io/warlock/hid_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/hid_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -86,6 +86,8 @@ root usb_get_current_cfgidx root usb_register_client root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usba_common_power root usba_common_register_events
--- a/usr/src/uts/common/io/warlock/scsa2usb_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/scsa2usb_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -101,6 +101,8 @@ root usb_register_client root usb_ugen_power root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usba_common_power root usba_common_register_events
--- a/usr/src/uts/common/io/warlock/ugen_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/ugen_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -75,6 +75,8 @@ root usb_is_pm_enabled root usb_register_client root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usba_common_power root usba_common_register_events
--- a/usr/src/uts/common/io/warlock/usb_ia_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usb_ia_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -128,7 +128,8 @@ root usb_ugen_reconnect_ev_cb root usb_ugen_write root usba_ready_interface_association_node - +root usb_register_dev_driver +root usb_unregister_dev_driver root hubd_root_hub_cleanup_thread root hubd_restore_state_cb
--- a/usr/src/uts/common/io/warlock/usb_mid_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usb_mid_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -120,6 +120,8 @@ root usb_register_client root usb_reset_device root usb_ugen_power +root usb_register_dev_driver +root usb_unregister_dev_driver root hubd_root_hub_cleanup_thread root hubd_restore_state_cb
--- a/usr/src/uts/common/io/warlock/usbftdi_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usbftdi_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -95,6 +95,8 @@ root usb_get_current_cfgidx root usb_register_client root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usb_ugen_attach root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbprn_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usbprn_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -89,6 +89,8 @@ root usb_register_client root usb_reset_device root usb_ugen_power +root usb_register_dev_driver +root usb_unregister_dev_driver root hcdi_autoclearing root hcdi_cb_thread
--- a/usr/src/uts/common/io/warlock/usbsacm_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usbsacm_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -17,10 +17,9 @@ # # CDDL HEADER END # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" one ohci_state one ehci_state @@ -99,6 +98,8 @@ root usb_get_current_cfgidx root usb_register_client root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usb_ugen_attach root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbser_edge_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usbser_edge_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -79,6 +79,8 @@ root usb_get_current_cfgidx root usb_register_client root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usb_ugen_attach root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbser_keyspan_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usbser_keyspan_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -100,6 +100,8 @@ root usb_get_current_cfgidx root usb_register_client root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usb_ugen_attach root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbskel_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usbskel_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -84,6 +84,8 @@ root usb_get_current_cfgidx root usb_register_client root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usb_ugen_attach root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbsprl_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usbsprl_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -94,6 +94,8 @@ root usb_get_current_cfgidx root usb_register_client root usb_reset_device +root usb_register_dev_driver +root usb_unregister_dev_driver root usb_ugen_attach root usb_ugen_close
--- a/usr/src/uts/common/io/warlock/usbvc_with_usba.wlcmd Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/io/warlock/usbvc_with_usba.wlcmd Mon Aug 17 09:14:35 2009 -0400 @@ -100,7 +100,8 @@ root usb_ugen_read root usb_ugen_reconnect_ev_cb root usb_ugen_write - +root usb_register_dev_driver +root usb_unregister_dev_driver root hcdi_autoclearing root hcdi_cb_thread
--- a/usr/src/uts/common/os/sunpm.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/os/sunpm.c Mon Aug 17 09:14:35 2009 -0400 @@ -5222,7 +5222,7 @@ ASSERT(keeper != kept); if (PM_GET_PM_INFO(keeper) == NULL) { cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device " - "%s@%s(%s#%d), but the latter is not power managed", + "%s@%s(%s#%d), but the former is not power managed", PM_DEVICE(keeper), PM_DEVICE(kept)); PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not" "power managed\n", pmf, PM_DEVICE(keeper)))
--- a/usr/src/uts/common/sys/cmlb.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/cmlb.h Mon Aug 17 09:14:35 2009 -0400 @@ -49,6 +49,7 @@ typedef struct tg_attribute { int media_is_writable; + int media_is_solid_state; } tg_attribute_t;
--- a/usr/src/uts/common/sys/fs/zfs.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/fs/zfs.h Mon Aug 17 09:14:35 2009 -0400 @@ -116,6 +116,7 @@ ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */ ZFS_PROP_DEFER_DESTROY, ZFS_PROP_USERREFS, + ZFS_PROP_LOGBIAS, ZFS_NUM_PROPS } zfs_prop_t; @@ -245,6 +246,11 @@ ZFS_CANMOUNT_NOAUTO = 2 } zfs_canmount_type_t; +typedef enum { + ZFS_LOGBIAS_LATENCY = 0, + ZFS_LOGBIAS_THROUGHPUT = 1 +} zfs_logbias_op_t; + typedef enum zfs_share_op { ZFS_SHARE_NFS = 0, ZFS_UNSHARE_NFS = 1,
--- a/usr/src/uts/common/sys/ib/clients/ibd/ibd.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/ib/clients/ibd/ibd.h Mon Aug 17 09:14:35 2009 -0400 @@ -297,7 +297,6 @@ uint16_t id_pkix; uint8_t id_port; ibt_mcg_info_t *id_mcinfo; - boolean_t id_bgroup_created; mac_handle_t id_mh; mac_resource_handle_t id_rh; @@ -363,6 +362,9 @@ int id_hca_res_lkey_capab; ibt_lkey_t id_res_lkey; + boolean_t id_bgroup_created; + kmutex_t id_macst_lock; + kcondvar_t id_macst_cv; uint32_t id_mac_state; } ibd_state_t;
--- a/usr/src/uts/common/sys/mac_client_priv.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/mac_client_priv.h Mon Aug 17 09:14:35 2009 -0400 @@ -120,7 +120,7 @@ extern void mac_rx_client_restart(mac_client_handle_t); extern void mac_srs_perm_quiesce(mac_client_handle_t, boolean_t); extern int mac_hwrings_get(mac_client_handle_t, mac_group_handle_t *, - mac_ring_handle_t *); + mac_ring_handle_t *, mac_ring_type_t); extern void mac_hwring_setup(mac_ring_handle_t, mac_resource_handle_t); extern void mac_hwring_teardown(mac_ring_handle_t); extern int mac_hwring_disable_intr(mac_ring_handle_t);
--- a/usr/src/uts/common/sys/mac_impl.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/mac_impl.h Mon Aug 17 09:14:35 2009 -0400 @@ -262,7 +262,7 @@ #define MAC_RING_TX_DEFAULT(mip, mp) \ ((mip->mi_default_tx_ring == NULL) ? \ mip->mi_tx(mip->mi_driver, mp) : \ - mac_ring_tx(mip->mi_default_tx_ring, mp)) + mac_hwring_tx(mip->mi_default_tx_ring, mp)) #define MAC_TX(mip, ring, mp, mcip) { \ /* \ @@ -275,7 +275,7 @@ (ring == NULL)) \ mp = MAC_RING_TX_DEFAULT(mip, mp); \ else \ - mp = mac_ring_tx(ring, mp); \ + mp = mac_hwring_tx(ring, mp); \ } /* mci_tx_flag */ @@ -585,7 +585,7 @@ extern int mac_group_remmac(mac_group_t *, const uint8_t *); extern int mac_rx_group_add_flow(mac_client_impl_t *, flow_entry_t *, mac_group_t *); -extern mblk_t *mac_ring_tx(mac_ring_handle_t, mblk_t *); +extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); extern mac_ring_t *mac_reserve_tx_ring(mac_impl_t *, mac_ring_t *); extern void mac_release_tx_ring(mac_ring_handle_t); extern mac_group_t *mac_reserve_tx_group(mac_impl_t *, mac_share_handle_t);
--- a/usr/src/uts/common/sys/mac_soft_ring.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/mac_soft_ring.h Mon Aug 17 09:14:35 2009 -0400 @@ -131,6 +131,9 @@ void *st_arg1; void *st_arg2; mac_group_t *st_group; /* TX group for share */ + uint32_t st_ring_count; /* no. of tx rings */ + mac_ring_handle_t *st_rings; + boolean_t st_woken_up; /*
--- a/usr/src/uts/common/sys/multiboot.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/multiboot.h Mon Aug 17 09:14:35 2009 -0400 @@ -35,8 +35,13 @@ * Definitions of structures/data for using a multiboot compliant OS loader. */ #define MB_HEADER_MAGIC 0x1BADB002 /* magic */ -#define MB_HEADER_FLAGS 0x00010003 /* flags we use */ -#define MB_HEADER_CHECKSUM -0x1BAEB005 /* -(magic + flag) */ + +/* The 32-bit kernel does not require the use of the AOUT kludge */ +#define MB_HEADER_FLAGS_32 0x00000003 /* flags we use */ +#define MB_HEADER_CHECKSUM_32 -0x1BADB005 /* -(magic + flag) */ + +#define MB_HEADER_FLAGS_64 0x00010003 /* flags we use */ +#define MB_HEADER_CHECKSUM_64 -0x1BAEB005 /* -(magic + flag) */ /* * passed by boot loader to kernel
--- a/usr/src/uts/common/sys/nxge/nxge_hio.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/nxge/nxge_hio.h Mon Aug 17 09:14:35 2009 -0400 @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,9 +35,6 @@ #include <nxge_ipp.h> #include <nxge_fflp.h> #include <sys/mac_provider.h> -#if defined(sun4v) -#include <sys/vnet_res.h> -#endif #define isLDOMservice(nxge) \ (nxge->environs == SOLARIS_SERVICE_DOMAIN) @@ -46,6 +43,9 @@ #define isLDOMs(nxge) \ (isLDOMservice(nxge) || isLDOMguest(nxge)) +#define NXGE_HIO_SHARE_MIN_CHANNELS 2 +#define NXGE_HIO_SHARE_MAX_CHANNELS 2 + /* ------------------------------------------------------------------ */ typedef uint8_t nx_rdc_t; typedef uint8_t nx_tdc_t; @@ -88,37 +88,19 @@ dc_getinfo getinfo; } nxhv_dc_fp_t; -#if defined(sun4v) -typedef struct { - vio_net_resource_reg_t __register; - vio_net_resource_unreg_t unregister; - - vio_net_callbacks_t cb; - -} nx_vio_fp_t; -#endif - typedef struct { boolean_t ldoms; - nxhv_vr_fp_t vr; nxhv_dc_fp_t tx; nxhv_dc_fp_t rx; - -#if defined(sun4v) - nx_vio_fp_t vio; -#endif - } nxhv_fp_t; /* ------------------------------------------------------------------ */ #define NXGE_VR_SR_MAX 8 /* There are 8 subregions (SR). */ typedef enum { - NXGE_HIO_TYPE_SERVICE, /* We are a service domain driver. */ NXGE_HIO_TYPE_GUEST /* We are a guest domain driver. */ - } nxge_hio_type_t; typedef enum { @@ -130,7 +112,6 @@ FUNC2_VIR = 0x5000000, FUNC3_MNT = 0x6000000, FUNC3_VIR = 0x7000000 - } vr_base_address_t; #define VR_STEP 0x2000000 @@ -146,7 +127,6 @@ FUNC3_VIR0, FUNC3_VIR1, FUNC_VIR_MAX - } vr_region_t; typedef enum { @@ -159,13 +139,11 @@ VP_CHANNEL_6, VP_CHANNEL_7, VP_CHANNEL_MAX - } vp_channel_t; typedef enum { VP_BOUND_TX = 1, VP_BOUND_RX - } vpc_type_t; #define VP_VC_OFFSET(channel) (channel << 10) @@ -254,9 +232,6 @@ ether_addr_t altmac; /* The alternate MAC address. */ int slot; /* According to nxge_m_mmac_add(). */ -#if defined(sun4v) - vio_net_handle_t vhp; /* The handle given to us by the vnet. */ -#endif nxge_grp_t rx_group; nxge_grp_t tx_group; @@ -273,7 +248,6 @@ uint64_t map; /* Currently unused */ int vector; /* The DDI vector number (index) */ - } hio_ldg_t; /*
--- a/usr/src/uts/common/sys/sata/adapters/ahci/ahcireg.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/sata/adapters/ahci/ahcireg.h Mon Aug 17 09:14:35 2009 -0400 @@ -212,7 +212,17 @@ #define AHCI_TFD_ERR_SHIFT 8 #define AHCI_TFD_ERR_SGS (0x1 << 0) /* DDR1: Send_good_status */ +/* FIS-Based Switching Control Register */ +#define AHCI_FBS_SWE_MASK (0xf << 16) /* Device With Error */ +#define AHCI_FBS_ADO_MASK (0xf << 12) /* Active Device Optimization */ +#define AHCI_FBS_DEV_MASK (0xf << 8) /* Device To Issue */ +#define AHCI_FBS_SDE (0x1 << 2) /* Single Device Error */ +#define AHCI_FBS_DEC (0x1 << 1) /* Device Error Clear */ +#define AHCI_FBS_EN (0x1 << 0) /* Enable */ + +/* Sxxx Registers */ #define AHCI_SERROR_CLEAR_ALL 0xffffffff +#define AHCI_SNOTIF_CLEAR_ALL 0xffffffff /* per port registers offset */ #define AHCI_PORT_OFFSET(ahci_ctlp, port) \ @@ -262,6 +272,9 @@ /* SNotification */ #define AHCI_PORT_PxSNTF(ahci_ctlp, port) \ (AHCI_PORT_OFFSET(ahci_ctlp, port) + 0x3c) + /* FIS-Based Switching Control */ +#define AHCI_PORT_PxFBS(ahci_ctlp, port) \ + (AHCI_PORT_OFFSET(ahci_ctlp, port) + 0x40) #define AHCI_SLOT_MASK(ahci_ctlp) \ ((ahci_ctlp->ahcictl_num_cmd_slots == AHCI_PORT_MAX_CMD_SLOTS) ? \ @@ -269,19 +282,14 @@ #define AHCI_NCQ_SLOT_MASK(ahci_portp) \ ((ahci_portp->ahciport_max_ncq_tags == AHCI_PORT_MAX_CMD_SLOTS) ? \ 0xffffffff : ((0x1 << ahci_portp->ahciport_max_ncq_tags) - 1)) +#define AHCI_PMPORT_MASK(ahci_portp) \ + ((0x1 << ahci_portp->ahciport_pmult_info->ahcipmi_num_dev_ports) - 1) /* Device signatures */ #define AHCI_SIGNATURE_PORT_MULTIPLIER 0x96690101 #define AHCI_SIGNATURE_ATAPI 0xeb140101 #define AHCI_SIGNATURE_DISK 0x00000101 -/* - * The address of the control port for the port multiplier, which is - * used for control and status communication with the port multiplier - * itself. - */ -#define AHCI_PORTMULT_CONTROL_PORT 0x0f - #define AHCI_H2D_REGISTER_FIS_TYPE 0x27 #define AHCI_H2D_REGISTER_FIS_LENGTH 5
--- a/usr/src/uts/common/sys/sata/adapters/ahci/ahcivar.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/sata/adapters/ahci/ahcivar.h Mon Aug 17 09:14:35 2009 -0400 @@ -32,18 +32,105 @@ extern "C" { #endif +/* + * AHCI address qualifier flags (in qual field of ahci_addr struct). + */ +#define AHCI_ADDR_NULL 0x00 +#define AHCI_ADDR_PORT 0x01 +#define AHCI_ADDR_PMPORT 0x02 +#define AHCI_ADDR_PMULT 0x04 +#define AHCI_ADDR_VALID (AHCI_ADDR_PORT | \ + AHCI_ADDR_PMULT | \ + AHCI_ADDR_PMPORT) + +/* + * AHCI address structure. + */ +struct ahci_addr { + + /* HBA port number */ + uint8_t aa_port; + + /* Port multiplier port number */ + uint8_t aa_pmport; + + /* + * AHCI_ADDR_NULL + * AHCI_ADDR_PORT + * AHCI_ADDR_PMPORT + * AHCI_ADDR_PMULT + */ + uint8_t aa_qual; +}; +typedef struct ahci_addr ahci_addr_t; + +_NOTE(SCHEME_PROTECTS_DATA("unshared data", ahci_addr)) + +#define AHCI_ADDR_IS_PORT(addrp) \ + ((addrp)->aa_qual & AHCI_ADDR_PORT) +#define AHCI_ADDR_IS_PMPORT(addrp) \ + ((addrp)->aa_qual & AHCI_ADDR_PMPORT) +#define AHCI_ADDR_IS_PMULT(addrp) \ + ((addrp)->aa_qual & AHCI_ADDR_PMULT) +#define AHCI_ADDR_IS_VALID(addrp) \ + ((addrp)->aa_port < SATA_MAX_CPORTS) && \ + ((addrp)->aa_pmport < SATA_MAX_PMPORTS) && \ + ((addrp)->aa_qual & AHCI_ADDR_VALID) + +#define AHCI_ADDR_SET(addrp, port, pmport, qual) \ + { \ + (addrp)->aa_port = port; \ + (addrp)->aa_pmport = pmport; \ + (addrp)->aa_qual = qual; \ + } +#define AHCI_ADDR_SET_PORT(addrp, port) \ + AHCI_ADDR_SET(addrp, port, 0, AHCI_ADDR_PORT) +#define AHCI_ADDR_SET_PMPORT(addrp, port, pmport) \ + AHCI_ADDR_SET(addrp, port, pmport, AHCI_ADDR_PMPORT) +#define AHCI_ADDR_SET_PMULT(addrp, port) \ + AHCI_ADDR_SET(addrp, port, SATA_PMULT_HOSTPORT, AHCI_ADDR_PMULT) + /* Type for argument of event handler */ typedef struct ahci_event_arg { void *ahciea_ctlp; void *ahciea_portp; + void *ahciea_addrp; uint32_t ahciea_event; } ahci_event_arg_t; /* Warlock annotation */ _NOTE(DATA_READABLE_WITHOUT_LOCK(ahci_event_arg_t::ahciea_ctlp)) _NOTE(DATA_READABLE_WITHOUT_LOCK(ahci_event_arg_t::ahciea_portp)) +_NOTE(DATA_READABLE_WITHOUT_LOCK(ahci_event_arg_t::ahciea_addrp)) _NOTE(DATA_READABLE_WITHOUT_LOCK(ahci_event_arg_t::ahciea_event)) + +/* + * ahci_pmult_info stores the information of a port multiplier and its + * sub-devices in case a port multiplier is attached to an HBA port. + */ +struct ahci_pmult_info { + + /* Number of the device ports */ + int ahcipmi_num_dev_ports; + + /* Device type of the sub-devices of the port multipler */ + uint8_t ahcipmi_device_type[SATA_MAX_PMPORTS]; + + /* State of port multiplier port */ + uint32_t ahcipmi_port_state[SATA_MAX_PMPORTS]; + + /* + * Port multiplier port on which there is outstanding NCQ + * commands. Only make sense in command based switching mode. + */ + uint8_t ahcipmi_ncq_pmport; + + /* Pending asynchronous notification events tags */ + uint32_t ahcipmi_snotif_tags; +}; +typedef struct ahci_pmult_info ahci_pmult_info_t; + /* * flags for ahciport_flags * @@ -69,14 +156,31 @@ * * AHCI_PORT_FLAG_NODEV: this flag will be set when a device is found gone * during ahci_restart_port_wait_till_ready process. + * + * AHCI_PORT_FLAG_RDWR_PMULT: this flags will be set when a READ/WRITE + * PORTMULT command is being executed. + * + * AHCI_PORT_FLAG_IGNORE_IPMS: this flags will be set when enumerating a port + * multiplier. According AHCI spec, IPMS error should be ignore during + * enumeration of port multiplier. + * + * AHCI_PORT_FLAG_PMULT_SNTF: this flags will be set when the a asynchronous + * notification event on the port multiplier is being handled. + * + * AHCI_PORT_FLAG_HOTPLUG: this flags will be set when a hot plug event is + * being handled. */ -#define AHCI_PORT_FLAG_SPINUP 0x01 -#define AHCI_PORT_FLAG_MOPPING 0x02 -#define AHCI_PORT_FLAG_POLLING 0x04 -#define AHCI_PORT_FLAG_RQSENSE 0x08 -#define AHCI_PORT_FLAG_STARTED 0x10 -#define AHCI_PORT_FLAG_RDLOGEXT 0x20 -#define AHCI_PORT_FLAG_NODEV 0x40 +#define AHCI_PORT_FLAG_SPINUP 0x01 +#define AHCI_PORT_FLAG_MOPPING 0x02 +#define AHCI_PORT_FLAG_POLLING 0x04 +#define AHCI_PORT_FLAG_RQSENSE 0x08 +#define AHCI_PORT_FLAG_STARTED 0x10 +#define AHCI_PORT_FLAG_RDLOGEXT 0x20 +#define AHCI_PORT_FLAG_NODEV 0x40 +#define AHCI_PORT_FLAG_RDWR_PMULT 0x80 +#define AHCI_PORT_FLAG_IGNORE_IPMS 0x100 +#define AHCI_PORT_FLAG_PMULT_SNTF 0x200 +#define AHCI_PORT_FLAG_HOTPLUG 0x400 typedef struct ahci_port { /* The physical port number */ @@ -87,6 +191,9 @@ /* State of the port */ uint32_t ahciport_port_state; + /* Port multiplier struct */ + ahci_pmult_info_t *ahciport_pmult_info; + /* * AHCI_PORT_FLAG_SPINUP * AHCI_PORT_FLAG_MOPPING @@ -95,6 +202,10 @@ * AHCI_PORT_FLAG_STARTED * AHCI_PORT_FLAG_RDLOGEXT * AHCI_PORT_FLAG_NODEV + * AHCI_PORT_FLAG_RDWR_PMULT + * AHCI_PORT_FLAG_IGNORE_IPMS + * AHCI_PORT_FLAG_PMULT_SNTF + * AHCI_PORT_FLAG_HOTPLUG */ int ahciport_flags; @@ -146,6 +257,9 @@ /* Keep the error retrieval sata packet */ sata_pkt_t *ahciport_err_retri_pkt; + /* Keep the read/write port multiplier packet */ + sata_pkt_t *ahciport_rdwr_pmult_pkt; + /* * SATA HBA driver is supposed to remember and maintain device * reset state. While the reset is in progress, it doesn't accept @@ -185,6 +299,50 @@ _NOTE(MUTEX_PROTECTS_DATA(ahci_port_t::ahciport_mutex, ahci_port_t::ahciport_event_taskq)) +#define AHCI_NUM_PORTS(ctlp) \ + (ctlp)->ahcictl_num_ports + +#define AHCIPORT_NUM_PMPORTS(portp) \ + (portp)->ahciport_pmult_info->ahcipmi_num_dev_ports + +#define AHCIPORT_NCQ_PMPORT(ahci_portp) \ + (ahci_portp->ahciport_pmult_info->ahcipmi_ncq_pmport) + +#define AHCIPORT_DEV_TYPE(portp, addrp) \ + (portp)->ahciport_device_type + +#define AHCIPORT_PMDEV_TYPE(portp, addrp) \ + (portp)->ahciport_pmult_info->ahcipmi_device_type \ + [(addrp)->aa_pmport] + +#define AHCIPORT_GET_DEV_TYPE(portp, addrp) \ + (AHCI_ADDR_IS_PORT(addrp) | AHCI_ADDR_IS_PMULT(addrp) ? \ + AHCIPORT_DEV_TYPE(portp, addrp) : \ + AHCIPORT_PMDEV_TYPE(portp, addrp)) + +#define AHCIPORT_SET_DEV_TYPE(portp, addrp, type) \ + if (AHCI_ADDR_IS_PORT(addrp) | AHCI_ADDR_IS_PMULT(addrp)) \ + AHCIPORT_DEV_TYPE(portp, addrp) = type; \ + else \ + AHCIPORT_PMDEV_TYPE(portp, addrp) = type; + +#define AHCIPORT_STATE(portp, addrp) \ + (portp)->ahciport_port_state + +#define AHCIPORT_PMSTATE(portp, addrp) \ + (portp)->ahciport_pmult_info->ahcipmi_port_state \ + [(addrp)->aa_pmport] + +#define AHCIPORT_GET_STATE(portp, addrp) \ + (AHCI_ADDR_IS_PORT(addrp) | AHCI_ADDR_IS_PMULT(addrp) ? \ + AHCIPORT_STATE(portp, addrp) : AHCIPORT_PMSTATE(portp, addrp)) + +#define AHCIPORT_SET_STATE(portp, addrp, state) \ + if (AHCI_ADDR_IS_PORT(addrp) | AHCI_ADDR_IS_PMULT(addrp)) \ + AHCIPORT_STATE(portp, addrp) = state; \ + else \ + AHCIPORT_PMSTATE(portp, addrp) = state; + typedef struct ahci_ctl { dev_info_t *ahcictl_dip; /* To map port number to cport number */ @@ -213,6 +371,10 @@ * AHCI_CAP_PM * AHCI_CAP_32BIT_DMA * AHCI_CAP_SCLO + * AHCI_CAP_INIT_PORT_RESET + * AHCI_CAP_SNTF + * AHCI_CAP_PMULT_CBSS + * AHCI_CAP_PMULT_FBSS */ int ahcictl_cap; @@ -291,7 +453,12 @@ #define AHCI_CAP_COMMU_32BIT_DMA 0x40 /* Port reset is needed for initialization */ #define AHCI_CAP_INIT_PORT_RESET 0x80 - +/* Port Asychronous Notification */ +#define AHCI_CAP_SNTF 0x100 +/* Port Multiplier Command-Based Switching Support (PMULT_CBSS) */ +#define AHCI_CAP_PMULT_CBSS 0x200 +/* Port Multiplier FIS-Based Switching Support (PMULT_FBSS) */ +#define AHCI_CAP_PMULT_FBSS 0x400 /* Flags controlling the restart port behavior */ #define AHCI_PORT_RESET 0x0001 /* Reset the port */ @@ -301,6 +468,10 @@ (ahci_portp->ahciport_flags & \ (AHCI_PORT_FLAG_RQSENSE|AHCI_PORT_FLAG_RDLOGEXT)) +#define RDWR_PMULT_CMD_IN_PROGRESS(ahci_portp) \ + (ahci_portp->ahciport_flags & \ + AHCI_PORT_FLAG_RDWR_PMULT) + #define NON_NCQ_CMD_IN_PROGRESS(ahci_portp) \ (!ERR_RETRI_CMD_IN_PROGRESS(ahci_portp) && \ ahci_portp->ahciport_pending_tags != 0 && \ @@ -314,6 +485,7 @@ #define AHCI_NON_NCQ_CMD 0x0 #define AHCI_NCQ_CMD 0x1 #define AHCI_ERR_RETRI_CMD 0x2 +#define AHCI_RDWR_PMULT_CMD 0x4 /* State values for ahci_attach */ #define AHCI_ATTACH_STATE_NONE (0x1 << 0) @@ -374,6 +546,7 @@ #define AHCIDBG_PM 0x8000 #define AHCIDBG_UNDERFLOW 0x10000 #define AHCIDBG_MSI 0x20000 +#define AHCIDBG_PMULT 0x40000 extern uint32_t ahci_debug_flags;
--- a/usr/src/uts/common/sys/sata/impl/sata.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/sata/impl/sata.h Mon Aug 17 09:14:35 2009 -0400 @@ -244,8 +244,6 @@ /* Port Multiplier & host port info and state */ struct sata_pmult_info { sata_address_t pmult_addr; /* this PMult SATA Address */ - kmutex_t pmult_mutex; /* pmult (host port) mutex */ - /* * PMult state flags * SATA_STATE_UNKNOWN @@ -256,13 +254,17 @@ */ uint32_t pmult_state; uint32_t pmult_event_flags; /* Undefined for now */ - struct sata_port_scr pmult_scr; /* Host port SCR block */ + struct sata_pmult_gscr pmult_gscr; /* PMult GSCR block */ uint32_t pmult_num_dev_ports; /* Number of data ports */ struct sata_pmport_info *pmult_dev_port[SATA_MAX_PMPORTS - 1]; }; typedef struct sata_pmult_info sata_pmult_info_t; +_NOTE(SCHEME_PROTECTS_DATA("unshared data", sata_pmult_info)) +_NOTE(MUTEX_PROTECTS_DATA(sata_cport_info::cport_mutex, \ + sata_pmult_info::pmult_dev_port)) + /* Port Multiplier's device port info & state */ struct sata_pmport_info { sata_address_t pmport_addr; /* this SATA port address */ @@ -414,6 +416,7 @@ SATA_EVNT_DEVICE_DETACHED | \ SATA_EVNT_LINK_LOST | \ SATA_EVNT_LINK_ESTABLISHED | \ + SATA_EVNT_PMULT_LINK_CHANGED | \ SATA_EVNT_PORT_FAILED | \ SATA_EVNT_TARGET_NODE_CLEANUP | \ SATA_EVNT_AUTOONLINE_DEVICE) @@ -618,6 +621,10 @@ sata_hba_inst->satahba_dev_port[cport]->\ cport_devp.cport_sata_pmult->pmult_num_dev_ports +#define SATA_PMPORT_MUTEX(sata_hba_inst, cport, pmport) \ + sata_hba_inst->satahba_dev_port[cport]->\ + cport_devp.cport_sata_pmult->pmult_dev_port[pmport]->pmport_mutex + #define SATA_PMPORT_INFO(sata_hba_inst, cport, pmport) \ sata_hba_inst->satahba_dev_port[cport]->\ cport_devp.cport_sata_pmult->pmult_dev_port[pmport] @@ -639,6 +646,11 @@ sata_hba_inst->satahba_dev_port[cport]->\ cport_devp.cport_sata_pmult->pmult_dev_port[pmport]->pmport_dev_type +#define SATA_PMPORT_EVENT_FLAGS(sata_hba_inst, cport, pmport) \ + sata_hba_inst->satahba_dev_port[cport]->\ + cport_devp.cport_sata_pmult->pmult_dev_port[pmport]->\ + pmport_event_flags + #define SATA_PMPORTINFO_DRV_TYPE(pmportinfo) \ pmportinfo->pmport_dev_type @@ -651,6 +663,12 @@ #define SATA_TXLT_CPORT(spx) \ spx->txlt_sata_pkt->satapkt_device.satadev_addr.cport +#define SATA_TXLT_PMPORT(spx) \ + spx->txlt_sata_pkt->satapkt_device.satadev_addr.pmport + +#define SATA_TXLT_QUAL(spx) \ + spx->txlt_sata_pkt->satapkt_device.satadev_addr.qual + #define SATA_TXLT_CPORT_MUTEX(spx) \ spx->txlt_sata_hba_inst->\ satahba_dev_port[spx->txlt_sata_pkt->\ @@ -785,6 +803,7 @@ #define SATA_DBG_ATAPI 0x1000 #define SATA_DBG_ATAPI_PACKET 0x8000 #define SATA_DBG_INTR_CTX 0x10000 +#define SATA_DBG_PMULT 0x20000 typedef struct sata_atapi_cmd { uint8_t acdb[SATA_ATAPI_MAX_CDB_LEN]; @@ -816,7 +835,7 @@ #endif -/* sata_rev_tag 1.43 */ +/* sata_rev_tag 1.44 */ #ifdef __cplusplus }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/sys/sata/sata_blacklist.h Mon Aug 17 09:14:35 2009 -0400 @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SATA_BLACKLIST_H +#define _SATA_BLACKLIST_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * SATA port multiplier blacklist + * + * The number of the device ports is indicated by GSCR2[3:0]. These port + * multipliers got faulty values in GSCR2 (w/ pseudo port) by vendor + * configuration. + * + * Following is a list of some black-listed port multipliers with the actual + * number of device ports. + */ +struct sata_pmult_bl { + uint32_t bl_gscr0; + uint32_t bl_gscr1; + uint32_t bl_gscr2; + uint32_t bl_flags; +}; + +typedef struct sata_pmult_bl sata_pmult_bl_t; + +sata_pmult_bl_t sata_pmult_blacklist[] = { + {0x37261095, 0x0, 0x6, 0x5}, /* Silicon Image 3726, 5 ports. */ + {0x47261095, 0x0, 0x7, 0x5}, /* Silicon Image 4726, 5 ports. */ + {0x47231095, 0x0, 0x4, 0x2}, /* Silicon Image 4723, 2 ports. */ + NULL +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* _SATA_BLACKLIST_H */
--- a/usr/src/uts/common/sys/sata/sata_defs.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/sata/sata_defs.h Mon Aug 17 09:14:35 2009 -0400 @@ -91,6 +91,12 @@ #define SATA_LOG_PAGE_10 0x10 /* log page 0x10 - SATA error */ /* + * Port Multiplier Commands + */ +#define SATAC_READ_PORTMULT 0xe4 /* read port multiplier */ +#define SATAC_WRITE_PORTMULT 0xe8 /* write port multiplier */ + +/* * Power Managment Commands (subset) */ #define SATAC_CHECK_POWER_MODE 0xe5 /* check power mode */ @@ -658,6 +664,41 @@ #define MODEPAGE_ACOUSTIC_MANAG 0x30 /* + * Port Multiplier registers' offsets + */ +#define SATA_PMULT_GSCR0 0x0 +#define SATA_PMULT_GSCR1 0x1 +#define SATA_PMULT_GSCR2 0x2 +#define SATA_PMULT_GSCR32 0x20 +#define SATA_PMULT_GSCR33 0x21 +#define SATA_PMULT_GSCR64 0x40 +#define SATA_PMULT_GSCR96 0x60 + +#define SATA_PMULT_PORTNUM_MASK 0xf + +#define SATA_PMULT_PSCR0 0x0 +#define SATA_PMULT_PSCR1 0x1 +#define SATA_PMULT_PSCR2 0x2 +#define SATA_PMULT_PSCR3 0x3 +#define SATA_PMULT_PSCR4 0x4 + +#define SATA_PMULT_REG_SSTS (SATA_PMULT_PSCR0) +#define SATA_PMULT_REG_SERR (SATA_PMULT_PSCR1) +#define SATA_PMULT_REG_SCTL (SATA_PMULT_PSCR2) +#define SATA_PMULT_REG_SACT (SATA_PMULT_PSCR3) +#define SATA_PMULT_REG_SNTF (SATA_PMULT_PSCR4) + +/* + * Port Multiplier capabilities + * (Indicated by GSCR64, and enabled by GSCR96) + */ +#define SATA_PMULT_CAP_BIST (1 << 0) +#define SATA_PMULT_CAP_PMREQ (1 << 1) +#define SATA_PMULT_CAP_SSC (1 << 2) +#define SATA_PMULT_CAP_SNOTIF (1 << 3) +#define SATA_PMULT_CAP_PHYEVENT (1 << 4) + +/* * sstatus field definitions */ #define SSTATUS_DET_SHIFT 0
--- a/usr/src/uts/common/sys/sata/sata_hba.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/sata/sata_hba.h Mon Aug 17 09:14:35 2009 -0400 @@ -52,7 +52,6 @@ /* SATA Framework definitions */ #define SATA_MAX_CPORTS 32 /* Max number of controller ports */ - /* Multiplier (PMult) */ #define SATA_MAX_PMPORTS 16 /* Maximum number of ports on PMult */ #define SATA_PMULT_HOSTPORT 0xf /* Port Multiplier host port number */ @@ -105,6 +104,21 @@ typedef struct sata_port_scr sata_port_scr_t; /* + * SATA Port Multiplier general status and control register block. + * The gscr0, gscr1, gscr2 are the copyies of the register on port multiplier. + * GSCR[0], GSCR[1], GSCR[2] are defined in SATA defined by Port Multiplier + * 1.0/1.1/1.2 spec. + */ +struct sata_pmult_gscr { + uint32_t gscr0; /* Product Identifier register */ + uint32_t gscr1; /* Resrved Information register */ + uint32_t gscr2; /* Port Information register */ + uint32_t gscr64; /* Feature register */ +}; + +typedef struct sata_pmult_gscr sata_pmult_gscr_t; + +/* * SATA Device Structure (rev 1) * Used to request/return state of the controller, port, port multiplier * or an attached drive: @@ -122,7 +136,8 @@ * a value specific for a reported event. */ #define SATA_DEVICE_REV_1 1 -#define SATA_DEVICE_REV SATA_DEVICE_REV_1 +#define SATA_DEVICE_REV_2 2 +#define SATA_DEVICE_REV SATA_DEVICE_REV_2 struct sata_device { @@ -131,6 +146,9 @@ uint32_t satadev_state; /* Port or device state */ uint32_t satadev_type; /* Attached device type */ struct sata_port_scr satadev_scr; /* Port status and ctrl regs */ + struct sata_pmult_gscr satadev_gscr; /* Port multiplier specific */ + /* global status and control */ + /* registers */ uint32_t satadev_add_info; /* additional information, */ /* function specific */ }; @@ -162,6 +180,7 @@ #define SATA_DSTATE_PWR_IDLE 0x000200 #define SATA_DSTATE_PWR_STANDBY 0x000400 #define SATA_DSTATE_RESET 0x001000 +#define SATA_DSTATE_PMULT_INIT 0x002000 #define SATA_DSTATE_FAILED 0x008000 /* Mask for drive power states */ @@ -547,6 +566,12 @@ #define SATA_ERR_RETR_PKT_TYPE_ATAPI 2 /* + * Read/write port multiplier packet types + */ +#define SATA_RDWR_PMULT_PKT_TYPE_READ 1 +#define SATA_RDWR_PMULT_PKT_TYPE_WRITE 2 + +/* * Hoplug functions vector structure (rev 1) */ #define SATA_TRAN_HOTPLUG_OPS_REV_1 1 @@ -602,7 +627,8 @@ */ #define SATA_TRAN_HBA_REV_1 1 #define SATA_TRAN_HBA_REV_2 2 -#define SATA_TRAN_HBA_REV SATA_TRAN_HBA_REV_2 +#define SATA_TRAN_HBA_REV_3 3 +#define SATA_TRAN_HBA_REV SATA_TRAN_HBA_REV_3 struct sata_hba_tran { int sata_tran_hba_rev; /* version */ @@ -643,6 +669,7 @@ #define SATA_CTLF_ASN 0x040 /* Asynchronous Event Support */ #define SATA_CTLF_QCMD 0x080 /* Queued commands support */ #define SATA_CTLF_NCQ 0x100 /* NCQ support */ +#define SATA_CTLF_PMULT_FBS 0x200 /* FIS-based switching support */ /* * sata_tran_start() return values. @@ -693,6 +720,9 @@ * SATA_EVNT_PWR_LEVEL_CHANGED * A port or entire SATA controller power level has changed * + * SATA_EVNT_PMULT_LINK_CHANGED + * Port multiplier detect change on a link of its device port + * */ #define SATA_EVNT_DEVICE_ATTACHED 0x01 #define SATA_EVNT_DEVICE_DETACHED 0x02 @@ -701,6 +731,7 @@ #define SATA_EVNT_PORT_FAILED 0x10 #define SATA_EVNT_DEVICE_RESET 0x20 #define SATA_EVNT_PWR_LEVEL_CHANGED 0x40 +#define SATA_EVNT_PMULT_LINK_CHANGED 0x80 /* * SATA Framework interface entry points @@ -712,6 +743,10 @@ void sata_hba_event_notify(dev_info_t *, sata_device_t *, int); sata_pkt_t *sata_get_error_retrieval_pkt(dev_info_t *, sata_device_t *, int); void sata_free_error_retrieval_pkt(sata_pkt_t *); +sata_pkt_t *sata_get_rdwr_pmult_pkt(dev_info_t *, sata_device_t *, uint8_t, + uint32_t, uint32_t); +void sata_free_rdwr_pmult_pkt(sata_pkt_t *); +int sata_check_pmult_blacklist(sata_device_t *); void sata_free_dma_resources(sata_pkt_t *); /*
--- a/usr/src/uts/common/sys/scsi/targets/sddef.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/scsi/targets/sddef.h Mon Aug 17 09:14:35 2009 -0400 @@ -453,7 +453,8 @@ /* field by hardware */ un_f_pm_log_sense_smart :1, /* log sense support SMART */ /* feature attribute */ - un_f_reserved :7; + un_f_is_solid_state :1, /* has solid state media */ + un_f_reserved :6; /* Ptr to table of strings for ASC/ASCQ error message printing */ struct scsi_asq_key_strings *un_additional_codes; @@ -2357,6 +2358,7 @@ #define SD_VPD_ASCII_OP_PG 0x08 /* 0x82 - ASCII Op Defs */ #define SD_VPD_DEVID_WWN_PG 0x10 /* 0x83 - Device Identification */ #define SD_VPD_EXTENDED_DATA_PG 0x80 /* 0x86 - Extended data about the lun */ +#define SD_VPD_DEV_CHARACTER_PG 0x400 /* 0xB1 - Device Characteristics */ /* * Non-volatile cache support
--- a/usr/src/uts/common/sys/usb/usba/usba_impl.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/usb/usba/usba_impl.h Mon Aug 17 09:14:35 2009 -0400 @@ -435,6 +435,15 @@ /* free request wrappers */ void usba_req_wrapper_free(usba_req_wrapper_t *); +/* usb device capture for the specific client driver */ +typedef struct usb_dev_cap { + dev_info_t *dip; + usb_dev_driver_callback_t usba_dev_driver_cb; +} usb_dev_cap_t; + +usb_dev_cap_t usb_cap; +_NOTE(SCHEME_PROTECTS_DATA("unique device capture data", usb_cap)) + #ifdef __cplusplus } #endif
--- a/usr/src/uts/common/sys/usb/usbai.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/common/sys/usb/usbai.h Mon Aug 17 09:14:35 2009 -0400 @@ -2555,6 +2555,65 @@ /* + * ************************************************************************** + * USB device driver registration and callback functions remaining + * Contracted Project Private (for VirtualBox USB Device Capture) + * ************************************************************************** + */ + +/* + * getting the device strings of manufacturer, product and serial number + */ +typedef struct usb_dev_str { + char *usb_mfg; /* manufacturer string */ + char *usb_product; /* product string */ + char *usb_serialno; /* serial number string */ +} usb_dev_str_t; + +/* + * It is the callback function type for capture driver. + * Arguments: + * dev_descr - pointer to device descriptor + * dev_str - pointer to device strings + * path - pointer to device physical path + * bus - USB bus address + * port - USB port number + * drv - capture driver name. + * It is returned by the callback func. + * Return Values: + * USB_SUCCESS - VirtualBox will capture the device + * USB_FAILURE - VirtualBox will not capture the device + */ +typedef int (*usb_dev_driver_callback_t)( + usb_dev_descr_t *dev_descr, + usb_dev_str_t *dev_str, + char *path, + int bus, + int port, + char **drv, + void *reserved); + +/* + * Register the callback function in the usba. + * Argument: + * dip - client driver's devinfo pointer + * cb - callback function + * + * Return Values: + * USB_SUCCESS - the registeration was successful + * USB_FAILURE - the registeration failed + */ +int usb_register_dev_driver( + dev_info_t *dip, + usb_dev_driver_callback_t cb); + +/* + * Unregister the callback function in the usba. + */ +void usb_unregister_dev_driver(dev_info_t *dip); + + +/* * *************************************************************************** * USB Device and interface class, subclass and protocol codes * ***************************************************************************
--- a/usr/src/uts/i86pc/dboot/dboot_grub.s Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/i86pc/dboot/dboot_grub.s Mon Aug 17 09:14:35 2009 -0400 @@ -21,7 +21,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -57,13 +57,20 @@ .globl mb_header mb_header: .long MB_HEADER_MAGIC /* magic number */ - .long MB_HEADER_FLAGS /* flags */ - .long MB_HEADER_CHECKSUM /* checksum */ - .long 0x11111111 /* header_addr: patched by elfpatch */ - .long 0x100000 /* load_addr: patched by elfpatch */ +#if defined(_BOOT_TARGET_i386) + .long MB_HEADER_FLAGS_32 /* flags */ + .long MB_HEADER_CHECKSUM_32 /* checksum */ +#elif defined (_BOOT_TARGET_amd64) + .long MB_HEADER_FLAGS_64 /* flags */ + .long MB_HEADER_CHECKSUM_64 /* checksum */ +#else +#error No architecture defined +#endif + .long 0x11111111 /* header_addr: patched by mbh_patch */ + .long 0x100000 /* load_addr: patched by mbh_patch */ .long 0 /* load_end_addr - 0 means entire file */ .long 0 /* bss_end_addr */ - .long 0x2222222 /* entry_addr: patched by elfpatch */ + .long 0x2222222 /* entry_addr: patched by mbh_patch */ .long 0 /* video mode.. */ .long 0 /* width 0 == don't care */ .long 0 /* height 0 == don't care */
--- a/usr/src/uts/i86pc/dboot/dboot_startkern.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/i86pc/dboot/dboot_startkern.c Mon Aug 17 09:14:35 2009 -0400 @@ -799,6 +799,10 @@ DBG_MSG("Entered init_mem_alloc()\n"); DBG((uintptr_t)mb_info); + if (mb_info->mods_count > MAX_MODULES) { + dboot_panic("Too many modules (%d) -- the maximum is %d.", + mb_info->mods_count, MAX_MODULES); + } /* * search the modules to find the last used address * we'll build the module list while we're walking through here @@ -814,7 +818,11 @@ (ulong_t)mod->mod_start, (ulong_t)mod->mod_end); } modules[i].bm_addr = mod->mod_start; - modules[i].bm_size = mod->mod_end; + if (mod->mod_start > mod->mod_end) { + dboot_panic("module[%d]: Invalid module start address " + "(0x%llx)", i, (uint64_t)mod->mod_start); + } + modules[i].bm_size = mod->mod_end - mod->mod_start; check_higher(mod->mod_end); }
--- a/usr/src/uts/i86pc/os/ddi_impl.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/i86pc/os/ddi_impl.c Mon Aug 17 09:14:35 2009 -0400 @@ -2514,6 +2514,10 @@ void impl_setup_ddi(void) { +#if !defined(__xpv) + extern void startup_bios_disk(void); + extern int post_fastreboot; +#endif dev_info_t *xdip, *isa_dip; rd_existing_t rd_mem_prop; int err; @@ -2555,11 +2559,15 @@ */ get_boot_properties(); + /* not framebuffer should be enumerated, if present */ + get_vga_properties(); + +#if !defined(__xpv) + if (!post_fastreboot) + startup_bios_disk(); +#endif /* do bus dependent probes. */ impl_bus_initialprobe(); - - /* not framebuffer should be enumerated, if present */ - get_vga_properties(); } dev_t
--- a/usr/src/uts/i86pc/os/fakebop.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/i86pc/os/fakebop.c Mon Aug 17 09:14:35 2009 -0400 @@ -1087,9 +1087,9 @@ * Fast Reboot. */ static void -save_boot_info(multiboot_info_t *mbi) +save_boot_info(multiboot_info_t *mbi, struct xboot_info *xbi) { - mb_module_t *mbp; + struct boot_modules *modp; int i; bcopy(mbi, &saved_mbi, sizeof (multiboot_info_t)); @@ -1115,12 +1115,14 @@ /* * Current file sizes. Used by fastboot.c to figure out how much * memory to reserve for panic reboot. + * Use the module list from the dboot-constructed xboot_info + * instead of the list referenced by the multiboot structure + * because that structure may not be addressable now. */ saved_file_size[FASTBOOT_NAME_UNIX] = FOUR_MEG - PAGESIZE; - for (i = 0, mbp = (mb_module_t *)(uintptr_t)mbi->mods_addr; - i < mbi->mods_count; i++, mbp += sizeof (mb_module_t)) { - saved_file_size[FASTBOOT_NAME_BOOTARCHIVE] += - mbp->mod_end - mbp->mod_start; + for (i = 0, modp = (struct boot_modules *)(uintptr_t)xbi->bi_modules; + i < xbi->bi_module_cnt; i++, modp++) { + saved_file_size[FASTBOOT_NAME_BOOTARCHIVE] += modp->bm_size; } } @@ -1379,7 +1381,7 @@ /* * Save various boot information for Fast Reboot */ - save_boot_info(mbi); + save_boot_info(mbi, xbootp); if (mbi != NULL && mbi->flags & 0x2) { boot_device = mbi->boot_device >> 24;
--- a/usr/src/uts/i86pc/os/startup.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/i86pc/os/startup.c Mon Aug 17 09:14:35 2009 -0400 @@ -668,7 +668,6 @@ startup(void) { #if !defined(__xpv) - extern void startup_bios_disk(void); extern void startup_pci_bios(void); extern int post_fastreboot; #endif @@ -706,10 +705,7 @@ startup_xen_mca(); #endif startup_modules(); -#if !defined(__xpv) - if (!post_fastreboot) - startup_bios_disk(); -#endif + startup_end(); progressbar_start(); }
--- a/usr/src/uts/intel/io/pci/pci_boot.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/intel/io/pci/pci_boot.c Mon Aug 17 09:14:35 2009 -0400 @@ -600,7 +600,7 @@ int rv, cap_ptr, physhi; dev_info_t *dip; uint16_t cmd_reg; - struct memlist *list; + struct memlist *list, *scratch_list; /* skip root (peer) PCI busses */ if (pci_bus_res[secbus].par_bus == (uchar_t)-1) @@ -785,13 +785,13 @@ io_limit = ((io_limit & 0xf0) << 8) | 0xfff; /* Form list of all resources passed (avail + used) */ - list = memlist_dup(pci_bus_res[secbus].io_avail); - memlist_merge(&pci_bus_res[secbus].io_used, &list); + scratch_list = memlist_dup(pci_bus_res[secbus].io_avail); + memlist_merge(&pci_bus_res[secbus].io_used, &scratch_list); if ((pci_bus_res[parbus].io_reprogram || (io_base > io_limit) || (!(cmd_reg & PCI_COMM_IO))) && - !list_is_vga_only(list, IO)) { + !list_is_vga_only(scratch_list, IO)) { if (pci_bus_res[secbus].io_used) { memlist_subsume(&pci_bus_res[secbus].io_used, &pci_bus_res[secbus].io_avail); @@ -857,6 +857,7 @@ bus, dev, func, io_base, io_limit); } } + memlist_free_all(&scratch_list); /* * Check memory space as we did I/O space. @@ -866,13 +867,13 @@ mem_limit = (uint_t)pci_getw(bus, dev, func, PCI_BCNF_MEM_LIMIT); mem_limit = ((mem_limit & 0xfff0) << 16) | 0xfffff; - list = memlist_dup(pci_bus_res[secbus].mem_avail); - memlist_merge(&pci_bus_res[secbus].mem_used, &list); + scratch_list = memlist_dup(pci_bus_res[secbus].mem_avail); + memlist_merge(&pci_bus_res[secbus].mem_used, &scratch_list); if ((pci_bus_res[parbus].mem_reprogram || (mem_base > mem_limit) || (!(cmd_reg & PCI_COMM_MAE))) && - !list_is_vga_only(list, MEM)) { + !list_is_vga_only(scratch_list, MEM)) { if (pci_bus_res[secbus].mem_used) { memlist_subsume(&pci_bus_res[secbus].mem_used, &pci_bus_res[secbus].mem_avail); @@ -959,6 +960,7 @@ bus, dev, func, mem_base, mem_limit); } } + memlist_free_all(&scratch_list); cmd_enable: if (pci_bus_res[secbus].io_avail)
--- a/usr/src/uts/sun/io/eri/eri.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/sun/io/eri/eri.c Mon Aug 17 09:14:35 2009 -0400 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2894,6 +2894,9 @@ ETX_COMPLETION_MASK); macupdate |= eri_reclaim(erip, erip->tx_completion); + if (macupdate) + erip->wantw = B_FALSE; + mutex_exit(&erip->xmitlock); } @@ -2961,8 +2964,6 @@ erip->rx_completion = rmdi; } - erip->wantw = B_FALSE; - mutex_exit(&erip->intrlock); if (head) @@ -3454,7 +3455,6 @@ uint_t start_offset = 0; uint_t stuff_offset = 0; uint_t flags = 0; - boolean_t macupdate = B_FALSE; caddr_t ptr; uint32_t offset; @@ -3514,9 +3514,16 @@ if (i > (ERI_TPENDING - 4)) goto notmds; - if (i >= (ERI_TPENDING >> 1) && !(erip->starts & 0x7)) + if (i >= (ERI_TPENDING >> 1) && !(erip->starts & 0x7)) { int_me = ERI_TMD_INTME; + if (!erip->tx_int_me) { + PUT_GLOBREG(intmask, GET_GLOBREG(intmask) & + ~(ERI_G_MASK_TX_INT_ME)); + erip->tx_int_me = 1; + } + } + i = tmdp - tbasep; /* index */ offset = (i * ERI_BUFSIZE); @@ -3562,36 +3569,18 @@ if (erip->tx_cur_cnt >= tx_interrupt_rate) { erip->tx_completion = (uint32_t)(GET_ETXREG(tx_completion) & ETX_COMPLETION_MASK); - macupdate |= eri_reclaim(erip, erip->tx_completion); + (void) eri_reclaim(erip, erip->tx_completion); } mutex_exit(&erip->xmitlock); - if (macupdate) - mac_tx_update(erip->mh); - return (B_TRUE); notmds: HSTAT(erip, notmds); erip->wantw = B_TRUE; - if (!erip->tx_int_me) { - PUT_GLOBREG(intmask, GET_GLOBREG(intmask) & - ~(ERI_G_MASK_TX_INT_ME)); - erip->tx_int_me = 1; - } - - if (erip->tx_cur_cnt >= tx_interrupt_rate) { - erip->tx_completion = (uint32_t)(GET_ETXREG(tx_completion) & - ETX_COMPLETION_MASK); - macupdate |= eri_reclaim(erip, erip->tx_completion); - } - mutex_exit(&erip->xmitlock); - if (macupdate) - mac_tx_update(erip->mh); - return (B_FALSE); }
--- a/usr/src/uts/sun4v/io/vnet.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/sun4v/io/vnet.c Mon Aug 17 09:14:35 2009 -0400 @@ -40,6 +40,8 @@ #include <sys/dlpi.h> #include <net/if.h> #include <sys/mac_provider.h> +#include <sys/mac_client.h> +#include <sys/mac_client_priv.h> #include <sys/mac_ether.h> #include <sys/ddi.h> #include <sys/sunddi.h> @@ -75,11 +77,38 @@ #ifdef VNET_IOC_DEBUG static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp); #endif +static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data); +static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, + const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle); +static void vnet_get_group(void *arg, mac_ring_type_t type, const int index, + mac_group_info_t *infop, mac_group_handle_t handle); +static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); +static void vnet_rx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num); +static void vnet_tx_ring_stop(mac_ring_driver_t rdriver); +static int vnet_ring_enable_intr(void *arg); +static int vnet_ring_disable_intr(void *arg); +static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup); +static int vnet_addmac(void *arg, const uint8_t *mac_addr); +static int vnet_remmac(void *arg, const uint8_t *mac_addr); /* vnet internal functions */ static int vnet_unattach(vnet_t *vnetp); +static void vnet_ring_grp_init(vnet_t *vnetp); +static void vnet_ring_grp_uninit(vnet_t *vnetp); static int vnet_mac_register(vnet_t *); static int vnet_read_mac_address(vnet_t *vnetp); +static int vnet_bind_vgenring(vnet_res_t *vresp); +static void vnet_unbind_vgenring(vnet_res_t *vresp); +static int vnet_bind_hwrings(vnet_t *vnetp); +static void vnet_unbind_hwrings(vnet_t *vnetp); +static int vnet_bind_rings(vnet_res_t *vresp); +static void vnet_unbind_rings(vnet_res_t *vresp); +static int vnet_hio_stat(void *, uint_t, uint64_t *); +static int vnet_hio_start(void *); +static void vnet_hio_stop(void *); +static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type); +mblk_t *vnet_hio_tx(void *, mblk_t *); /* Forwarding database (FDB) routines */ static void vnet_fdb_create(vnet_t *vnetp); @@ -98,6 +127,8 @@ static void vnet_dispatch_res_task(vnet_t *vnetp); static void vnet_res_start_task(void *arg); static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err); +static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp); +static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp); /* Exported to vnet_gen */ int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); @@ -112,15 +143,21 @@ /* Exported to to vnet_dds */ int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); +int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); +void vnet_hio_mac_cleanup(vnet_t *vnetp); /* Externs that are imported from vnet_gen */ extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, const uint8_t *macaddr, void **vgenhdl); +extern int vgen_init_mdeg(void *arg); extern void vgen_uninit(void *arg); extern int vgen_dds_tx(void *arg, void *dmsg); extern void vgen_mod_init(void); extern int vgen_mod_cleanup(void); extern void vgen_mod_fini(void); +extern int vgen_enable_intr(void *arg); +extern int vgen_disable_intr(void *arg); +extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup); /* Externs that are imported from vnet_dds */ extern void vdds_mod_init(void); @@ -131,6 +168,9 @@ extern void vdds_cleanup_hybrid_res(void *arg); extern void vdds_cleanup_hio(vnet_t *vnetp); +/* Externs imported from mac_impl */ +extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *); + #define DRV_NAME "vnet" #define VNET_FDBE_REFHOLD(p) \ { \ @@ -145,9 +185,9 @@ } #ifdef VNET_IOC_DEBUG -#define VNET_M_CALLBACK_FLAGS (MC_IOCTL) +#define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB) #else -#define VNET_M_CALLBACK_FLAGS (0) +#define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB) #endif static mac_callbacks_t vnet_m_callbacks = { @@ -157,9 +197,23 @@ vnet_m_stop, vnet_m_promisc, vnet_m_multicst, - vnet_m_unicst, - vnet_m_tx, + NULL, /* m_unicst entry must be NULL while rx rings are exposed */ + NULL, /* m_tx entry must be NULL while tx rings are exposed */ vnet_m_ioctl, + vnet_m_capab, + NULL +}; + +static mac_callbacks_t vnet_hio_res_callbacks = { + 0, + vnet_hio_stat, + vnet_hio_start, + vnet_hio_stop, + NULL, + NULL, + NULL, + vnet_hio_tx, + NULL, NULL, NULL }; @@ -176,6 +230,9 @@ uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */ uint32_t vnet_ldc_mtu = VNET_LDC_MTU; /* ldc mtu */ +/* Configure tx serialization in mac layer for the vnet device */ +boolean_t vnet_mac_tx_serialize = B_TRUE; + /* * Set this to non-zero to enable additional internal receive buffer pools * based on the MTU of the device for better performance at the cost of more @@ -206,6 +263,11 @@ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +/* mac_open() retry delay in usec */ +uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */ + +/* max # of mac_open() retries */ +uint32_t vnet_mac_open_retries = 100; /* * Property names @@ -375,6 +437,9 @@ rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL); attach_progress |= AST_vnet_alloc; + vnet_ring_grp_init(vnetp); + attach_progress |= AST_ring_init; + status = vdds_init(vnetp); if (status != 0) { goto vnet_attach_fail; @@ -419,10 +484,19 @@ attach_progress |= AST_vnet_list; /* - * Initialize the generic vnet plugin which provides - * communication via sun4v LDC (logical domain channel) based - * resources. It will register the LDC resources as and when - * they become available. + * Initialize the generic vnet plugin which provides communication via + * sun4v LDC (logical domain channel) based resources. This involves 2 + * steps; first, vgen_init() is invoked to read the various properties + * of the vnet device from its MD node (including its mtu which is + * needed to mac_register()) and obtain a handle to the vgen layer. + * After mac_register() is done and we have a mac handle, we then + * invoke vgen_init_mdeg() which registers with the the MD event + * generator (mdeg) framework to allow LDC resource notifications. + * Note: this sequence also allows us to report the correct default # + * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked + * in the context of mac_register(); and avoids conflicting with + * dynamic pseudo rx rings which get added/removed as a result of mdeg + * events in vgen. */ status = vgen_init(vnetp, reg, vnetp->dip, (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl); @@ -432,15 +506,19 @@ } attach_progress |= AST_vgen_init; - /* register with MAC layer */ status = vnet_mac_register(vnetp); if (status != DDI_SUCCESS) { goto vnet_attach_fail; } vnetp->link_state = LINK_STATE_UNKNOWN; - attach_progress |= AST_macreg; + status = vgen_init_mdeg(vnetp->vgenhdl); + if (status != DDI_SUCCESS) { + goto vnet_attach_fail; + } + attach_progress |= AST_init_mdeg; + vnetp->attach_progress = attach_progress; DBG1(NULL, "instance(%d) exit\n", instance); @@ -503,21 +581,25 @@ attach_progress = vnetp->attach_progress; /* - * Unregister from the gldv3 subsystem. This can fail, in particular - * if there are still any open references to this mac device; in which - * case we just return failure without continuing to detach further. + * Disable the mac device in the gldv3 subsystem. This can fail, in + * particular if there are still any open references to this mac + * device; in which case we just return failure without continuing to + * detach further. + * If it succeeds, we then invoke vgen_uninit() which should unregister + * any pseudo rings registered with the mac layer. Note we keep the + * AST_macreg flag on, so we can unregister with the mac layer at + * the end of this routine. */ if (attach_progress & AST_macreg) { - if (mac_unregister(vnetp->mh) != 0) { + if (mac_disable(vnetp->mh) != 0) { return (1); } - attach_progress &= ~AST_macreg; } /* - * Now that we have unregistered from gldv3, we must finish all other - * steps and successfully return from this function; otherwise we will - * end up leaving the device in a broken/unusable state. + * Now that we have disabled the device, we must finish all other steps + * and successfully return from this function; otherwise we will end up + * leaving the device in a broken/unusable state. * * First, release any hybrid resources assigned to this vnet device. */ @@ -530,9 +612,10 @@ * Uninit vgen. This stops further mdeg callbacks to this vnet * device and/or its ports; and detaches any existing ports. */ - if (attach_progress & AST_vgen_init) { + if (attach_progress & (AST_vgen_init|AST_init_mdeg)) { vgen_uninit(vnetp->vgenhdl); attach_progress &= ~AST_vgen_init; + attach_progress &= ~AST_init_mdeg; } /* Destroy the taskq. */ @@ -563,6 +646,17 @@ attach_progress &= ~AST_vnet_list; } + if (attach_progress & AST_ring_init) { + vnet_ring_grp_uninit(vnetp); + attach_progress &= ~AST_ring_init; + } + + if (attach_progress & AST_macreg) { + VERIFY(mac_unregister(vnetp->mh) == 0); + vnetp->mh = NULL; + attach_progress &= ~AST_macreg; + } + if (attach_progress & AST_vnet_alloc) { rw_destroy(&vnetp->vrwlock); rw_destroy(&vnetp->vsw_fp_rw); @@ -683,8 +777,9 @@ * external hosts. */ mblk_t * -vnet_m_tx(void *arg, mblk_t *mp) +vnet_tx_ring_send(void *arg, mblk_t *mp) { + vnet_pseudo_tx_ring_t *tx_ringp; vnet_t *vnetp; vnet_res_t *vresp; mblk_t *next; @@ -694,8 +789,10 @@ boolean_t is_unicast; boolean_t is_pvid; /* non-default pvid ? */ boolean_t hres; /* Hybrid resource ? */ - - vnetp = (vnet_t *)arg; + void *tx_arg; + + tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + vnetp = (vnet_t *)tx_ringp->vnetp; DBG1(vnetp, "enter\n"); ASSERT(mp != NULL); @@ -790,10 +887,14 @@ } } + + macp = &vresp->macreg; + tx_arg = tx_ringp; + } else { + macp = &vresp->macreg; + tx_arg = macp->m_driver; } - - macp = &vresp->macreg; - resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp); + resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp); /* tx done; now release ref on fdb entry */ VNET_FDBE_REFRELE(vresp); @@ -848,6 +949,124 @@ return (0); } +static void +vnet_ring_grp_init(vnet_t *vnetp) +{ + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int i; + + tx_grp = &vnetp->tx_grp[0]; + tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) * + VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP); + for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) { + tx_ringp[i].state |= VNET_TXRING_SHARED; + } + tx_grp->rings = tx_ringp; + tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS; + + rx_grp = &vnetp->rx_grp[0]; + rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP; + rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL); + rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) * + rx_grp->max_ring_cnt, KM_SLEEP); + + /* + * Setup the first 3 Pseudo RX Rings that are reserved; + * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource. + */ + rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE; + rx_ringp[0].index = 0; + rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; + rx_ringp[1].index = 1; + rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID; + rx_ringp[2].index = 2; + + rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; + rx_grp->rings = rx_ringp; + + for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; + i < rx_grp->max_ring_cnt; i++) { + rx_ringp = &rx_grp->rings[i]; + rx_ringp->state = VNET_RXRING_FREE; + rx_ringp->index = i; + } +} + +static void +vnet_ring_grp_uninit(vnet_t *vnetp) +{ + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_tx_group_t *tx_grp; + + tx_grp = &vnetp->tx_grp[0]; + if (tx_grp->rings != NULL) { + ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS); + kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) * + tx_grp->ring_cnt); + tx_grp->rings = NULL; + } + + rx_grp = &vnetp->rx_grp[0]; + if (rx_grp->rings != NULL) { + ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP); + ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT); + kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) * + rx_grp->max_ring_cnt); + rx_grp->rings = NULL; + } +} + +static vnet_pseudo_rx_ring_t * +vnet_alloc_pseudo_rx_ring(vnet_t *vnetp) +{ + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + int index; + + rx_grp = &vnetp->rx_grp[0]; + WRITE_ENTER(&rx_grp->lock); + + if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) { + /* no rings available */ + RW_EXIT(&rx_grp->lock); + return (NULL); + } + + for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT; + index < rx_grp->max_ring_cnt; index++) { + rx_ringp = &rx_grp->rings[index]; + if (rx_ringp->state == VNET_RXRING_FREE) { + rx_ringp->state |= VNET_RXRING_INUSE; + rx_grp->ring_cnt++; + break; + } + } + + RW_EXIT(&rx_grp->lock); + return (rx_ringp); +} + +static void +vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp) +{ + vnet_pseudo_rx_group_t *rx_grp; + + ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT); + rx_grp = &vnetp->rx_grp[0]; + WRITE_ENTER(&rx_grp->lock); + + if (ringp->state != VNET_RXRING_FREE) { + ringp->state = VNET_RXRING_FREE; + ringp->handle = NULL; + rx_grp->ring_cnt--; + } + + RW_EXIT(&rx_grp->lock); +} + /* wrapper function for mac_register() */ static int vnet_mac_register(vnet_t *vnetp) @@ -867,6 +1086,15 @@ macp->m_margin = VLAN_TAGSZ; /* + * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to + * workaround tx lock contention issues in nxge. + */ + macp->m_v12n = MAC_VIRT_LEVEL1; + if (vnet_mac_tx_serialize == B_TRUE) { + macp->m_v12n |= MAC_VIRT_SERIALIZE; + } + + /* * Finally, we're ready to register ourselves with the MAC layer * interface; if this succeeds, we're all ready to start() */ @@ -1116,42 +1344,57 @@ static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp) { - vnet_res_t *vresp = (vnet_res_t *)vrh; - vnet_t *vnetp = vresp->vnetp; + vnet_res_t *vresp = (vnet_res_t *)vrh; + vnet_t *vnetp = vresp->vnetp; + vnet_pseudo_rx_ring_t *ringp; if ((vnetp == NULL) || (vnetp->mh == 0)) { freemsgchain(mp); return; } - /* - * Packets received over a hybrid resource need additional processing - * to remove the tag, for the pvid case. The underlying resource is - * not aware of the vnet's pvid and thus packets are received with the - * vlan tag in the header; unlike packets that are received over a ldc - * channel in which case the peer vnet/vsw would have already removed - * the tag. - */ - if (vresp->type == VIO_NET_RES_HYBRID && - vnetp->pvid != vnetp->default_vlan_id) { - - vnet_rx_frames_untag(vnetp->pvid, &mp); - if (mp == NULL) { - return; - } - } - - mac_rx(vnetp->mh, NULL, mp); + ringp = vresp->rx_ringp; + mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); } void vnet_tx_update(vio_net_handle_t vrh) { - vnet_res_t *vresp = (vnet_res_t *)vrh; - vnet_t *vnetp = vresp->vnetp; - - if ((vnetp != NULL) && (vnetp->mh != NULL)) { - mac_tx_update(vnetp->mh); + vnet_res_t *vresp = (vnet_res_t *)vrh; + vnet_t *vnetp = vresp->vnetp; + vnet_pseudo_tx_ring_t *tx_ringp; + vnet_pseudo_tx_group_t *tx_grp; + int i; + + if (vnetp == NULL || vnetp->mh == NULL) { + return; + } + + /* + * Currently, the tx hwring API (used to access rings that belong to + * a Hybrid IO resource) does not provide us a per ring flow ctrl + * update; also the pseudo rings are shared by the ports/ldcs in the + * vgen layer. Thus we can't figure out which pseudo ring is being + * re-enabled for transmits. To work around this, when we get a tx + * restart notification from below, we simply propagate that to all + * the tx pseudo rings registered with the mac layer above. + * + * There are a couple of side effects with this approach, but they are + * not harmful, as outlined below: + * + * A) We might send an invalid ring_update() for a ring that is not + * really flow controlled. This will not have any effect in the mac + * layer and packets will continue to be transmitted on that ring. + * + * B) We might end up clearing the flow control in the mac layer for + * a ring that is still flow controlled in the underlying resource. + * This will result in the mac layer restarting transmit, only to be + * flow controlled again on that ring. + */ + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + mac_tx_ring_update(vnetp->mh, tx_ringp->handle); } } @@ -1233,8 +1476,8 @@ ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp, vio_net_callbacks_t *vcb) { - vnet_t *vnetp; - vnet_res_t *vresp; + vnet_t *vnetp; + vnet_res_t *vresp; vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP); ether_copy(local_macaddr, vresp->local_macaddr); @@ -1260,11 +1503,7 @@ vnetp->instance); } } - - WRITE_ENTER(&vnetp->vrwlock); - vresp->nextp = vnetp->vres_list; - vnetp->vres_list = vresp; - RW_EXIT(&vnetp->vrwlock); + vnet_add_resource(vnetp, vresp); break; } vnetp = vnetp->nextp; @@ -1281,6 +1520,14 @@ vcb->vio_net_tx_update = vnet_tx_update; vcb->vio_net_report_err = vnet_handle_res_err; + /* Bind the resource to pseudo ring(s) */ + if (vnet_bind_rings(vresp) != 0) { + (void) vnet_rem_resource(vnetp, vresp); + vnet_hio_destroy_kstats(vresp->ksp); + KMEM_FREE(vresp); + return (1); + } + /* Dispatch a task to start resources */ vnet_dispatch_res_task(vnetp); return (0); @@ -1294,8 +1541,6 @@ { vnet_res_t *vresp = (vnet_res_t *)vhp; vnet_t *vnetp = vresp->vnetp; - vnet_res_t *vrp; - kstat_t *ksp = NULL; DBG1(NULL, "Resource Registerig hdl=0x%p", vhp); @@ -1306,7 +1551,29 @@ */ vnet_fdbe_del(vnetp, vresp); + vnet_unbind_rings(vresp); + /* Now remove the resource from the list */ + (void) vnet_rem_resource(vnetp, vresp); + + vnet_hio_destroy_kstats(vresp->ksp); + KMEM_FREE(vresp); +} + +static void +vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp) +{ + WRITE_ENTER(&vnetp->vrwlock); + vresp->nextp = vnetp->vres_list; + vnetp->vres_list = vresp; + RW_EXIT(&vnetp->vrwlock); +} + +static vnet_res_t * +vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp) +{ + vnet_res_t *vrp; + WRITE_ENTER(&vnetp->vrwlock); if (vresp == vnetp->vres_list) { vnetp->vres_list = vresp->nextp; @@ -1320,15 +1587,12 @@ vrp = vrp->nextp; } } - - ksp = vresp->ksp; - vresp->ksp = NULL; - vresp->vnetp = NULL; vresp->nextp = NULL; + RW_EXIT(&vnetp->vrwlock); - vnet_hio_destroy_kstats(ksp); - KMEM_FREE(vresp); + + return (vresp); } /* @@ -1710,6 +1974,1024 @@ } } +static boolean_t +vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data) +{ + vnet_t *vnetp = (vnet_t *)arg; + + if (vnetp == NULL) { + return (0); + } + + switch (cap) { + + case MAC_CAPAB_RINGS: { + + mac_capab_rings_t *cap_rings = cap_data; + /* + * Rings Capability Notes: + * We advertise rings to make use of the rings framework in + * gldv3 mac layer, to improve the performance. This is + * specifically needed when a Hybrid resource (with multiple + * tx/rx hardware rings) is assigned to a vnet device. We also + * leverage this for the normal case when no Hybrid resource is + * assigned. + * + * Ring Allocation: + * - TX path: + * We expose a pseudo ring group with 2 pseudo tx rings (as + * currently HybridIO exports only 2 rings) In the normal case, + * transmit traffic that comes down to the driver through the + * mri_tx (vnet_tx_ring_send()) entry point goes through the + * distributed switching algorithm in vnet and gets transmitted + * over a port/LDC in the vgen layer to either the vswitch or a + * peer vnet. If and when a Hybrid resource is assigned to the + * vnet, we obtain the tx ring information of the Hybrid device + * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings. + * Traffic being sent over the Hybrid resource by the mac layer + * gets spread across both hw rings, as they are mapped to the + * 2 pseudo tx rings in vnet. + * + * - RX path: + * We expose a pseudo ring group with 3 pseudo rx rings (static + * rings) initially. The first (default) pseudo rx ring is + * reserved for the resource that connects to the vswitch + * service. The next 2 rings are reserved for a Hybrid resource + * that may be assigned to the vnet device. If and when a + * Hybrid resource is assigned to the vnet, we obtain the rx + * ring information of the Hybrid device (nxge) and map these + * pseudo rings 1:1 to the 2 hw rx rings. For each additional + * resource that connects to a peer vnet, we dynamically + * allocate a pseudo rx ring and map it to that resource, when + * the resource gets added; and the pseudo rx ring is + * dynamically registered with the upper mac layer. We do the + * reverse and unregister the ring with the mac layer when + * the resource gets removed. + * + * Synchronization notes: + * We don't need any lock to protect members of ring structure, + * specifically ringp->hw_rh, in either the TX or the RX ring, + * as explained below. + * - TX ring: + * ring->hw_rh is initialized only when a Hybrid resource is + * associated; and gets referenced only in vnet_hio_tx(). The + * Hybrid resource itself is available in fdb only after tx + * hwrings are found and mapped; i.e, in vio_net_resource_reg() + * we call vnet_bind_rings() first and then call + * vnet_start_resources() which adds an entry to fdb. For + * traffic going over LDC resources, we don't reference + * ring->hw_rh at all. + * - RX ring: + * For rings mapped to Hybrid resource ring->hw_rh is + * initialized and only then do we add the rx callback for + * the underlying Hybrid resource; we disable callbacks before + * we unmap ring->hw_rh. For rings mapped to LDC resources, we + * stop the rx callbacks (in vgen) before we remove ring->hw_rh + * (vio_net_resource_unreg()). + */ + + if (cap_rings->mr_type == MAC_RING_TYPE_RX) { + cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; + + /* + * The ring_cnt for rx grp is initialized in + * vnet_ring_grp_init(). Later, the ring_cnt gets + * updated dynamically whenever LDC resources are added + * or removed. + */ + cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt; + cap_rings->mr_rget = vnet_get_ring; + + cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS; + cap_rings->mr_gget = vnet_get_group; + cap_rings->mr_gaddring = NULL; + cap_rings->mr_gremring = NULL; + } else { + cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; + + /* + * The ring_cnt for tx grp is initialized in + * vnet_ring_grp_init() and remains constant, as we + * do not support dymanic tx rings for now. + */ + cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt; + cap_rings->mr_rget = vnet_get_ring; + + /* + * Transmit rings are not grouped; i.e, the number of + * transmit ring groups advertised should be set to 0. + */ + cap_rings->mr_gnum = 0; + + cap_rings->mr_gget = vnet_get_group; + cap_rings->mr_gaddring = NULL; + cap_rings->mr_gremring = NULL; + } + return (B_TRUE); + + } + + default: + break; + + } + + return (B_FALSE); +} + +/* + * Callback funtion for MAC layer to get ring information. + */ +static void +vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index, + const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle) +{ + vnet_t *vnetp = arg; + + switch (rtype) { + + case MAC_RING_TYPE_RX: { + + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + mac_intr_t *mintr; + + /* We advertised only one RX group */ + ASSERT(g_index == 0); + rx_grp = &vnetp->rx_grp[g_index]; + + /* Check the current # of rings in the rx group */ + ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt)); + + /* Get the ring based on the index */ + rx_ringp = &rx_grp->rings[r_index]; + + rx_ringp->handle = r_handle; + /* + * Note: we don't need to save the incoming r_index in rx_ring, + * as vnet_ring_grp_init() would have initialized the index for + * each ring in the array. + */ + rx_ringp->grp = rx_grp; + rx_ringp->vnetp = vnetp; + + mintr = &infop->mri_intr; + mintr->mi_handle = (mac_intr_handle_t)rx_ringp; + mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr; + mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr; + + infop->mri_driver = (mac_ring_driver_t)rx_ringp; + infop->mri_start = vnet_rx_ring_start; + infop->mri_stop = vnet_rx_ring_stop; + + /* Set the poll function, as this is an rx ring */ + infop->mri_poll = vnet_rx_poll; + + break; + } + + case MAC_RING_TYPE_TX: { + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + + /* + * No need to check grp index; mac layer passes -1 for it. + */ + tx_grp = &vnetp->tx_grp[0]; + + /* Check the # of rings in the tx group */ + ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt)); + + /* Get the ring based on the index */ + tx_ringp = &tx_grp->rings[r_index]; + + tx_ringp->handle = r_handle; + tx_ringp->index = r_index; + tx_ringp->grp = tx_grp; + tx_ringp->vnetp = vnetp; + + infop->mri_driver = (mac_ring_driver_t)tx_ringp; + infop->mri_start = vnet_tx_ring_start; + infop->mri_stop = vnet_tx_ring_stop; + + /* Set the transmit function, as this is a tx ring */ + infop->mri_tx = vnet_tx_ring_send; + + break; + } + + default: + break; + } +} + +/* + * Callback funtion for MAC layer to get group information. + */ +static void +vnet_get_group(void *arg, mac_ring_type_t type, const int index, + mac_group_info_t *infop, mac_group_handle_t handle) +{ + vnet_t *vnetp = (vnet_t *)arg; + + switch (type) { + + case MAC_RING_TYPE_RX: + { + vnet_pseudo_rx_group_t *rx_grp; + + /* We advertised only one RX group */ + ASSERT(index == 0); + + rx_grp = &vnetp->rx_grp[index]; + rx_grp->handle = handle; + rx_grp->index = index; + rx_grp->vnetp = vnetp; + + infop->mgi_driver = (mac_group_driver_t)rx_grp; + infop->mgi_start = NULL; + infop->mgi_stop = NULL; + infop->mgi_addmac = vnet_addmac; + infop->mgi_remmac = vnet_remmac; + infop->mgi_count = rx_grp->ring_cnt; + + break; + } + + case MAC_RING_TYPE_TX: + { + vnet_pseudo_tx_group_t *tx_grp; + + /* We advertised only one TX group */ + ASSERT(index == 0); + + tx_grp = &vnetp->tx_grp[index]; + tx_grp->handle = handle; + tx_grp->index = index; + tx_grp->vnetp = vnetp; + + infop->mgi_driver = (mac_group_driver_t)tx_grp; + infop->mgi_start = NULL; + infop->mgi_stop = NULL; + infop->mgi_addmac = NULL; + infop->mgi_remmac = NULL; + infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS; + + break; + } + + default: + break; + + } +} + +static int +vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + int err; + + /* + * If this ring is mapped to a LDC resource, simply mark the state to + * indicate the ring is started and return. + */ + if ((rx_ringp->state & + (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { + rx_ringp->gen_num = mr_gen_num; + rx_ringp->state |= VNET_RXRING_STARTED; + return (0); + } + + ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); + + /* + * This must be a ring reserved for a hwring. If the hwring is not + * bound yet, simply mark the state to indicate the ring is started and + * return. If and when a hybrid resource is activated for this vnet + * device, we will bind the hwring and start it then. If a hwring is + * already bound, start it now. + */ + if (rx_ringp->hw_rh == NULL) { + rx_ringp->gen_num = mr_gen_num; + rx_ringp->state |= VNET_RXRING_STARTED; + return (0); + } + + err = mac_hwring_start(rx_ringp->hw_rh); + if (err == 0) { + rx_ringp->gen_num = mr_gen_num; + rx_ringp->state |= VNET_RXRING_STARTED; + } else { + err = ENXIO; + } + + return (err); +} + +static void +vnet_rx_ring_stop(mac_ring_driver_t arg) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + + /* + * If this ring is mapped to a LDC resource, simply mark the state to + * indicate the ring is now stopped and return. + */ + if ((rx_ringp->state & + (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) { + rx_ringp->state &= ~VNET_RXRING_STARTED; + } + + ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); + + /* + * This must be a ring reserved for a hwring. If the hwring is not + * bound yet, simply mark the state to indicate the ring is stopped and + * return. If a hwring is already bound, stop it now. + */ + if (rx_ringp->hw_rh == NULL) { + rx_ringp->state &= ~VNET_RXRING_STARTED; + return; + } + + mac_hwring_stop(rx_ringp->hw_rh); + rx_ringp->state &= ~VNET_RXRING_STARTED; +} + +/* ARGSUSED */ +static int +vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num) +{ + vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + + tx_ringp->state |= VNET_TXRING_STARTED; + return (0); +} + +static void +vnet_tx_ring_stop(mac_ring_driver_t arg) +{ + vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + + tx_ringp->state &= ~VNET_TXRING_STARTED; +} + +/* + * Disable polling for a ring and enable its interrupt. + */ +static int +vnet_ring_enable_intr(void *arg) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + vnet_res_t *vresp; + + if (rx_ringp->hw_rh == NULL) { + /* + * Ring enable intr func is being invoked, but the ring is + * not bound to any underlying resource ? This must be a ring + * reserved for Hybrid resource and no such resource has been + * assigned to this vnet device yet. We simply return success. + */ + ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); + return (0); + } + + /* + * The rx ring has been bound to either a LDC or a Hybrid resource. + * Call the appropriate function to enable interrupts for the ring. + */ + if (rx_ringp->state & VNET_RXRING_HYBRID) { + return (mac_hwring_enable_intr(rx_ringp->hw_rh)); + } else { + vresp = (vnet_res_t *)rx_ringp->hw_rh; + return (vgen_enable_intr(vresp->macreg.m_driver)); + } +} + +/* + * Enable polling for a ring and disable its interrupt. + */ +static int +vnet_ring_disable_intr(void *arg) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + vnet_res_t *vresp; + + if (rx_ringp->hw_rh == NULL) { + /* + * Ring disable intr func is being invoked, but the ring is + * not bound to any underlying resource ? This must be a ring + * reserved for Hybrid resource and no such resource has been + * assigned to this vnet device yet. We simply return success. + */ + ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0); + return (0); + } + + /* + * The rx ring has been bound to either a LDC or a Hybrid resource. + * Call the appropriate function to disable interrupts for the ring. + */ + if (rx_ringp->state & VNET_RXRING_HYBRID) { + return (mac_hwring_disable_intr(rx_ringp->hw_rh)); + } else { + vresp = (vnet_res_t *)rx_ringp->hw_rh; + return (vgen_disable_intr(vresp->macreg.m_driver)); + } +} + +/* + * Poll 'bytes_to_pickup' bytes of message from the rx ring. + */ +static mblk_t * +vnet_rx_poll(void *arg, int bytes_to_pickup) +{ + vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg; + mblk_t *mp = NULL; + vnet_res_t *vresp; + vnet_t *vnetp = rx_ringp->vnetp; + + if (rx_ringp->hw_rh == NULL) { + return (NULL); + } + + if (rx_ringp->state & VNET_RXRING_HYBRID) { + mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup); + /* + * Packets received over a hybrid resource need additional + * processing to remove the tag, for the pvid case. The + * underlying resource is not aware of the vnet's pvid and thus + * packets are received with the vlan tag in the header; unlike + * packets that are received over a ldc channel in which case + * the peer vnet/vsw would have already removed the tag. + */ + if (vnetp->pvid != vnetp->default_vlan_id) { + vnet_rx_frames_untag(vnetp->pvid, &mp); + } + } else { + vresp = (vnet_res_t *)rx_ringp->hw_rh; + mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup); + } + return (mp); +} + +/* ARGSUSED */ +void +vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp, + boolean_t loopback) +{ + vnet_t *vnetp = (vnet_t *)arg; + vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh; + + /* + * Packets received over a hybrid resource need additional processing + * to remove the tag, for the pvid case. The underlying resource is + * not aware of the vnet's pvid and thus packets are received with the + * vlan tag in the header; unlike packets that are received over a ldc + * channel in which case the peer vnet/vsw would have already removed + * the tag. + */ + if (vnetp->pvid != vnetp->default_vlan_id) { + vnet_rx_frames_untag(vnetp->pvid, &mp); + if (mp == NULL) { + return; + } + } + mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num); +} + +static int +vnet_addmac(void *arg, const uint8_t *mac_addr) +{ + vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; + vnet_t *vnetp; + + vnetp = rx_grp->vnetp; + + if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { + return (0); + } + + cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n", + vnetp->instance, __func__); + return (EINVAL); +} + +static int +vnet_remmac(void *arg, const uint8_t *mac_addr) +{ + vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg; + vnet_t *vnetp; + + vnetp = rx_grp->vnetp; + + if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) { + return (0); + } + + cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n", + vnetp->instance, __func__, ether_sprintf((void *)mac_addr)); + return (EINVAL); +} + +int +vnet_hio_mac_init(vnet_t *vnetp, char *ifname) +{ + mac_handle_t mh; + mac_client_handle_t mch = NULL; + mac_unicast_handle_t muh = NULL; + mac_diag_t diag; + mac_register_t *macp; + char client_name[MAXNAMELEN]; + int rv; + uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE | + MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY; + vio_net_callbacks_t vcb; + ether_addr_t rem_addr = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + uint32_t retries = 0; + + if ((macp = mac_alloc(MAC_VERSION)) == NULL) { + return (EAGAIN); + } + + do { + rv = mac_open_by_linkname(ifname, &mh); + if (rv == 0) { + break; + } + if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) { + mac_free(macp); + return (rv); + } + drv_usecwait(vnet_mac_open_delay); + } while (rv == ENOENT); + + vnetp->hio_mh = mh; + + (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance, + ifname); + rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE); + if (rv != 0) { + goto fail; + } + vnetp->hio_mch = mch; + + rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0, + &diag); + if (rv != 0) { + goto fail; + } + vnetp->hio_muh = muh; + + macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; + macp->m_driver = vnetp; + macp->m_dip = NULL; + macp->m_src_addr = NULL; + macp->m_callbacks = &vnet_hio_res_callbacks; + macp->m_min_sdu = 0; + macp->m_max_sdu = ETHERMTU; + + rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID, + vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb); + if (rv != 0) { + goto fail; + } + mac_free(macp); + + /* add the recv callback */ + mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp); + + /* add the notify callback - only tx updates for now */ + vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb, + vnetp); + + return (0); + +fail: + mac_free(macp); + vnet_hio_mac_cleanup(vnetp); + return (1); +} + +void +vnet_hio_mac_cleanup(vnet_t *vnetp) +{ + if (vnetp->hio_mnh != NULL) { + (void) mac_notify_remove(vnetp->hio_mnh, B_TRUE); + vnetp->hio_mnh = NULL; + } + + if (vnetp->hio_vhp != NULL) { + vio_net_resource_unreg(vnetp->hio_vhp); + vnetp->hio_vhp = NULL; + } + + if (vnetp->hio_muh != NULL) { + mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh); + vnetp->hio_muh = NULL; + } + + if (vnetp->hio_mch != NULL) { + mac_client_close(vnetp->hio_mch, 0); + vnetp->hio_mch = NULL; + } + + if (vnetp->hio_mh != NULL) { + mac_close(vnetp->hio_mh); + vnetp->hio_mh = NULL; + } +} + +/* Bind pseudo rings to hwrings */ +static int +vnet_bind_hwrings(vnet_t *vnetp) +{ + mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS]; + mac_perim_handle_t mph1; + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int hw_ring_cnt; + int i; + int rv; + + mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); + + /* Get the list of the underlying RX rings. */ + hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh, + MAC_RING_TYPE_RX); + + /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */ + if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { + cmn_err(CE_WARN, + "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n", + vnetp->instance, hw_ring_cnt); + goto fail; + } + + if (vnetp->rx_hwgh != NULL) { + /* + * Quiesce the HW ring and the mac srs on the ring. Note + * that the HW ring will be restarted when the pseudo ring + * is started. At that time all the packets will be + * directly passed up to the pseudo RX ring and handled + * by mac srs created over the pseudo RX ring. + */ + mac_rx_client_quiesce(vnetp->hio_mch); + mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE); + } + + /* + * Bind the pseudo rings to the hwrings and start the hwrings. + * Note we don't need to register these with the upper mac, as we have + * statically exported these pseudo rxrings which are reserved for + * rxrings of Hybrid resource. + */ + rx_grp = &vnetp->rx_grp[0]; + for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { + /* Pick the rxrings reserved for Hybrid resource */ + rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; + + /* Store the hw ring handle */ + rx_ringp->hw_rh = hw_rh[i]; + + /* Bind the pseudo ring to the underlying hwring */ + mac_hwring_setup(rx_ringp->hw_rh, + (mac_resource_handle_t)rx_ringp); + + /* Start the hwring if needed */ + if (rx_ringp->state & VNET_RXRING_STARTED) { + rv = mac_hwring_start(rx_ringp->hw_rh); + if (rv != 0) { + mac_hwring_teardown(rx_ringp->hw_rh); + rx_ringp->hw_rh = NULL; + goto fail; + } + } + } + + /* Get the list of the underlying TX rings. */ + hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh, + MAC_RING_TYPE_TX); + + /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */ + if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) { + cmn_err(CE_WARN, + "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n", + vnetp->instance, hw_ring_cnt); + goto fail; + } + + /* + * Now map the pseudo txrings to the hw txrings. Note we don't need + * to register these with the upper mac, as we have statically exported + * these rings. Note that these rings will continue to be used for LDC + * resources to peer vnets and vswitch (shared ring). + */ + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < tx_grp->ring_cnt; i++) { + tx_ringp = &tx_grp->rings[i]; + tx_ringp->hw_rh = hw_rh[i]; + tx_ringp->state |= VNET_TXRING_HYBRID; + } + + mac_perim_exit(mph1); + return (0); + +fail: + mac_perim_exit(mph1); + vnet_unbind_hwrings(vnetp); + return (1); +} + +/* Unbind pseudo rings from hwrings */ +static void +vnet_unbind_hwrings(vnet_t *vnetp) +{ + mac_perim_handle_t mph1; + vnet_pseudo_rx_ring_t *rx_ringp; + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_tx_group_t *tx_grp; + vnet_pseudo_tx_ring_t *tx_ringp; + int i; + + mac_perim_enter_by_mh(vnetp->hio_mh, &mph1); + + tx_grp = &vnetp->tx_grp[0]; + for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { + tx_ringp = &tx_grp->rings[i]; + if (tx_ringp->state & VNET_TXRING_HYBRID) { + tx_ringp->state &= ~VNET_TXRING_HYBRID; + tx_ringp->hw_rh = NULL; + } + } + + rx_grp = &vnetp->rx_grp[0]; + for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) { + rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX]; + if (rx_ringp->hw_rh != NULL) { + /* Stop the hwring */ + mac_hwring_stop(rx_ringp->hw_rh); + + /* Teardown the hwring */ + mac_hwring_teardown(rx_ringp->hw_rh); + rx_ringp->hw_rh = NULL; + } + } + + if (vnetp->rx_hwgh != NULL) { + vnetp->rx_hwgh = NULL; + /* + * First clear the permanent-quiesced flag of the RX srs then + * restart the HW ring and the mac srs on the ring. + */ + mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE); + mac_rx_client_restart(vnetp->hio_mch); + } + + mac_perim_exit(mph1); +} + +/* Bind pseudo ring to a LDC resource */ +static int +vnet_bind_vgenring(vnet_res_t *vresp) +{ + vnet_t *vnetp; + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + mac_perim_handle_t mph1; + int rv; + int type; + + vnetp = vresp->vnetp; + type = vresp->type; + rx_grp = &vnetp->rx_grp[0]; + + if (type == VIO_NET_RES_LDC_SERVICE) { + /* + * Ring Index 0 is the default ring in the group and is + * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring + * is allocated statically and is reported to the mac layer + * in vnet_m_capab(). So, all we need to do here, is save a + * reference to the associated vresp. + */ + rx_ringp = &rx_grp->rings[0]; + rx_ringp->hw_rh = (mac_ring_handle_t)vresp; + vresp->rx_ringp = (void *)rx_ringp; + return (0); + } + ASSERT(type == VIO_NET_RES_LDC_GUEST); + + mac_perim_enter_by_mh(vnetp->mh, &mph1); + + rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp); + if (rx_ringp == NULL) { + cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring", + vnetp->instance); + goto fail; + } + + /* Store the LDC resource itself as the ring handle */ + rx_ringp->hw_rh = (mac_ring_handle_t)vresp; + + /* + * Save a reference to the ring in the resource for lookup during + * unbind. Note this is only done for LDC resources. We don't need this + * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its + * rx rings are mapped to reserved pseudo rx rings (index 1 and 2). + */ + vresp->rx_ringp = (void *)rx_ringp; + rx_ringp->state |= VNET_RXRING_LDC_GUEST; + + /* Register the pseudo ring with upper-mac */ + rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index); + if (rv != 0) { + rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; + rx_ringp->hw_rh = NULL; + vnet_free_pseudo_rx_ring(vnetp, rx_ringp); + goto fail; + } + + mac_perim_exit(mph1); + return (0); +fail: + mac_perim_exit(mph1); + return (1); +} + +/* Unbind pseudo ring from a LDC resource */ +static void +vnet_unbind_vgenring(vnet_res_t *vresp) +{ + vnet_t *vnetp; + vnet_pseudo_rx_group_t *rx_grp; + vnet_pseudo_rx_ring_t *rx_ringp; + mac_perim_handle_t mph1; + int type; + + vnetp = vresp->vnetp; + type = vresp->type; + rx_grp = &vnetp->rx_grp[0]; + + if (vresp->rx_ringp == NULL) { + return; + } + + if (type == VIO_NET_RES_LDC_SERVICE) { + /* + * Ring Index 0 is the default ring in the group and is + * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring + * is allocated statically and is reported to the mac layer + * in vnet_m_capab(). So, all we need to do here, is remove its + * reference to the associated vresp. + */ + rx_ringp = &rx_grp->rings[0]; + rx_ringp->hw_rh = NULL; + vresp->rx_ringp = NULL; + return; + } + ASSERT(type == VIO_NET_RES_LDC_GUEST); + + mac_perim_enter_by_mh(vnetp->mh, &mph1); + + rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp; + vresp->rx_ringp = NULL; + + if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) { + /* Unregister the pseudo ring with upper-mac */ + mac_group_rem_ring(rx_grp->handle, rx_ringp->handle); + + rx_ringp->hw_rh = NULL; + rx_ringp->state &= ~VNET_RXRING_LDC_GUEST; + + /* Free the pseudo rx ring */ + vnet_free_pseudo_rx_ring(vnetp, rx_ringp); + } + + mac_perim_exit(mph1); +} + +static void +vnet_unbind_rings(vnet_res_t *vresp) +{ + switch (vresp->type) { + + case VIO_NET_RES_LDC_SERVICE: + case VIO_NET_RES_LDC_GUEST: + vnet_unbind_vgenring(vresp); + break; + + case VIO_NET_RES_HYBRID: + vnet_unbind_hwrings(vresp->vnetp); + break; + + default: + break; + + } +} + +static int +vnet_bind_rings(vnet_res_t *vresp) +{ + int rv; + + switch (vresp->type) { + + case VIO_NET_RES_LDC_SERVICE: + case VIO_NET_RES_LDC_GUEST: + rv = vnet_bind_vgenring(vresp); + break; + + case VIO_NET_RES_HYBRID: + rv = vnet_bind_hwrings(vresp->vnetp); + break; + + default: + rv = 1; + break; + + } + + return (rv); +} + +/* ARGSUSED */ +int +vnet_hio_stat(void *arg, uint_t stat, uint64_t *val) +{ + vnet_t *vnetp = (vnet_t *)arg; + + *val = mac_stat_get(vnetp->hio_mh, stat); + return (0); +} + +/* + * The start() and stop() routines for the Hybrid resource below, are just + * dummy functions. This is provided to avoid resource type specific code in + * vnet_start_resources() and vnet_stop_resources(). The starting and stopping + * of the Hybrid resource happens in the context of the mac_client interfaces + * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup(). + */ +/* ARGSUSED */ +static int +vnet_hio_start(void *arg) +{ + return (0); +} + +/* ARGSUSED */ +static void +vnet_hio_stop(void *arg) +{ +} + +mblk_t * +vnet_hio_tx(void *arg, mblk_t *mp) +{ + vnet_pseudo_tx_ring_t *tx_ringp; + mblk_t *nextp; + mblk_t *ret_mp; + + tx_ringp = (vnet_pseudo_tx_ring_t *)arg; + for (;;) { + nextp = mp->b_next; + mp->b_next = NULL; + + ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp); + if (ret_mp != NULL) { + ret_mp->b_next = nextp; + mp = ret_mp; + break; + } + + if ((mp = nextp) == NULL) + break; + } + return (mp); +} + +static void +vnet_hio_notify_cb(void *arg, mac_notify_type_t type) +{ + vnet_t *vnetp = (vnet_t *)arg; + mac_perim_handle_t mph; + + mac_perim_enter_by_mh(vnetp->hio_mh, &mph); + switch (type) { + case MAC_NOTE_TX: + vnet_tx_update(vnetp->hio_vhp); + break; + + default: + break; + } + mac_perim_exit(mph); +} + #ifdef VNET_IOC_DEBUG /*
--- a/usr/src/uts/sun4v/io/vnet_dds.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/sun4v/io/vnet_dds.c Mon Aug 17 09:14:35 2009 -0400 @@ -113,6 +113,8 @@ /* Functions imported from other files */ extern int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg); +extern int vnet_hio_mac_init(vnet_t *vnetp, char *ifname); +extern void vnet_hio_mac_cleanup(vnet_t *vnetp); /* HV functions that are used in this file */ extern uint64_t vdds_hv_niu_vr_getinfo(uint32_t hvcookie, @@ -412,7 +414,31 @@ } else { vdds->hio_dip = dip; vdds->hio_cookie = hio_cookie; - (void) vdds_send_dds_resp_msg(vnetp, dmsg, B_TRUE); + sprintf(vdds->hio_ifname, "%s%d", ddi_driver_name(dip), + ddi_get_instance(dip)); + + rv = vnet_hio_mac_init(vnetp, vdds->hio_ifname); + if (rv != 0) { + /* failed - cleanup, send failed DDS message */ + DERR(vdds, "HIO mac init failed, cleaning up"); + rv = vdds_destroy_niu_node(dip, hio_cookie); + if (rv == 0) { + /* use DERR to print by default */ + DERR(vdds, "Successfully destroyed" + " Hybrid node"); + } else { + cmn_err(CE_WARN, "vnet%d:Failed to " + "destroy Hybrid node", + vnetp->instance); + } + vdds->hio_dip = NULL; + vdds->hio_cookie = 0; + (void) vdds_send_dds_resp_msg(vnetp, + dmsg, B_FALSE); + } else { + (void) vdds_send_dds_resp_msg(vnetp, + dmsg, B_TRUE); + } /* DERR used only print by default */ DERR(vdds, "Successfully created HIO node"); } @@ -424,6 +450,7 @@ DBG2(vdds, "NACK: No HIO device destroy"); (void) vdds_send_dds_resp_msg(vnetp, dmsg, B_FALSE); } else { + vnet_hio_mac_cleanup(vnetp); rv = vdds_destroy_niu_node(vnetp->vdds_info.hio_dip, vdds->hio_cookie); if (rv == 0) { @@ -444,6 +471,7 @@ case VNET_DDS_TASK_REL_SHARE: DBG2(vdds, "REL_SHARE task..."); if (vnetp->vdds_info.hio_dip != NULL) { + vnet_hio_mac_cleanup(vnetp); rv = vdds_destroy_niu_node(vnetp->vdds_info.hio_dip, vdds->hio_cookie); if (rv == 0) {
--- a/usr/src/uts/sun4v/io/vnet_gen.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/sun4v/io/vnet_gen.c Mon Aug 17 09:14:35 2009 -0400 @@ -73,11 +73,15 @@ /* vgen proxy entry points */ int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, const uint8_t *macaddr, void **vgenhdl); +int vgen_init_mdeg(void *arg); void vgen_uninit(void *arg); int vgen_dds_tx(void *arg, void *dmsg); void vgen_mod_init(void); int vgen_mod_cleanup(void); void vgen_mod_fini(void); +int vgen_enable_intr(void *arg); +int vgen_disable_intr(void *arg); +mblk_t *vgen_poll(void *arg, int bytes_to_pickup); static int vgen_start(void *arg); static void vgen_stop(void *arg); static mblk_t *vgen_tx(void *arg, mblk_t *mp); @@ -151,6 +155,7 @@ static int vgen_tx_dring_full(vgen_ldc_t *ldcp); static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp); static void vgen_ldc_watchdog(void *arg); +static mblk_t *vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup); /* vgen handshake functions */ static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp); @@ -200,7 +205,7 @@ static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp); static void vgen_ldc_rcv_worker(void *arg); static void vgen_handle_evt_read(vgen_ldc_t *ldcp); -static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp); +static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt); static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp); static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp); static void vgen_link_update(vgen_t *vgenp, link_state_t link_state); @@ -536,13 +541,6 @@ if (rv != 0) { goto vgen_init_fail; } - - /* register with MD event generator */ - rv = vgen_mdeg_reg(vgenp); - if (rv != DDI_SUCCESS) { - goto vgen_init_fail; - } - *vgenhdl = (void *)vgenp; DBG1(NULL, NULL, "vnet(%d): exit\n", instance); @@ -562,6 +560,15 @@ return (DDI_FAILURE); } +int +vgen_init_mdeg(void *arg) +{ + vgen_t *vgenp = (vgen_t *)arg; + + /* register with MD event generator */ + return (vgen_mdeg_reg(vgenp)); +} + /* * Called by vnet to undo the initializations done by vgen_init(). * The handle provided by generic transport during vgen_init() is the argument. @@ -2094,13 +2101,21 @@ static void vgen_mdeg_unreg(vgen_t *vgenp) { - (void) mdeg_unregister(vgenp->mdeg_dev_hdl); - (void) mdeg_unregister(vgenp->mdeg_port_hdl); - kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template)); - KMEM_FREE(vgenp->mdeg_parentp); - vgenp->mdeg_parentp = NULL; - vgenp->mdeg_dev_hdl = NULL; - vgenp->mdeg_port_hdl = NULL; + if (vgenp->mdeg_dev_hdl != NULL) { + (void) mdeg_unregister(vgenp->mdeg_dev_hdl); + vgenp->mdeg_dev_hdl = NULL; + } + if (vgenp->mdeg_port_hdl != NULL) { + (void) mdeg_unregister(vgenp->mdeg_port_hdl); + vgenp->mdeg_port_hdl = NULL; + } + + if (vgenp->mdeg_parentp != NULL) { + kmem_free(vgenp->mdeg_parentp->specp, + sizeof (vgen_prop_template)); + KMEM_FREE(vgenp->mdeg_parentp); + vgenp->mdeg_parentp = NULL; + } } /* mdeg callback function for the port node */ @@ -2907,6 +2922,7 @@ mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL); attach_state |= AST_mutex_init; @@ -3032,6 +3048,7 @@ mutex_destroy(&ldcp->cblock); mutex_destroy(&ldcp->wrlock); mutex_destroy(&ldcp->rxlock); + mutex_destroy(&ldcp->pollq_lock); } if (attach_state & AST_ldc_alloc) { KMEM_FREE(ldcp); @@ -3100,6 +3117,7 @@ mutex_destroy(&ldcp->cblock); mutex_destroy(&ldcp->wrlock); mutex_destroy(&ldcp->rxlock); + mutex_destroy(&ldcp->pollq_lock); /* unlink it from the list */ *prev_ldcp = ldcp->nextp; @@ -6278,7 +6296,7 @@ */ if (bp != NULL) { DTRACE_PROBE1(vgen_rcv_msgs, int, count); - vgen_rx(ldcp, bp); + vgen_rx(ldcp, bp, bpt); count = 0; bp = bpt = NULL; } @@ -6459,7 +6477,7 @@ if (count++ > vgen_chain_len) { DTRACE_PROBE1(vgen_rcv_msgs, int, count); - vgen_rx(ldcp, bp); + vgen_rx(ldcp, bp, bpt); count = 0; bp = bpt = NULL; } @@ -6512,7 +6530,7 @@ /* send up packets received so far */ if (bp != NULL) { DTRACE_PROBE1(vgen_rcv_msgs, int, count); - vgen_rx(ldcp, bp); + vgen_rx(ldcp, bp, bpt); bp = bpt = NULL; } DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); @@ -6996,18 +7014,57 @@ * Send received packets up the stack. */ static void -vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp) +vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt) { vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; + vgen_t *vgenp = LDC_TO_VGEN(ldcp); if (ldcp->rcv_thread != NULL) { ASSERT(MUTEX_HELD(&ldcp->rxlock)); - mutex_exit(&ldcp->rxlock); } else { ASSERT(MUTEX_HELD(&ldcp->cblock)); + } + + mutex_enter(&ldcp->pollq_lock); + + if (ldcp->polling_on == B_TRUE) { + /* + * If we are in polling mode, simply queue + * the packets onto the poll queue and return. + */ + if (ldcp->pollq_headp == NULL) { + ldcp->pollq_headp = bp; + ldcp->pollq_tailp = bpt; + } else { + ldcp->pollq_tailp->b_next = bp; + ldcp->pollq_tailp = bpt; + } + + mutex_exit(&ldcp->pollq_lock); + return; + } + + /* + * Prepend any pending mblks in the poll queue, now that we + * are in interrupt mode, before sending up the chain of pkts. + */ + if (ldcp->pollq_headp != NULL) { + DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n", + (uintptr_t)ldcp); + ldcp->pollq_tailp->b_next = bp; + bp = ldcp->pollq_headp; + ldcp->pollq_headp = ldcp->pollq_tailp = NULL; + } + + mutex_exit(&ldcp->pollq_lock); + + if (ldcp->rcv_thread != NULL) { + mutex_exit(&ldcp->rxlock); + } else { mutex_exit(&ldcp->cblock); } + /* Send up the packets */ vrx_cb(ldcp->portp->vhp, bp); if (ldcp->rcv_thread != NULL) { @@ -7233,6 +7290,145 @@ vgen_handshake_retry(ldcp); } +int +vgen_enable_intr(void *arg) +{ + vgen_port_t *portp = (vgen_port_t *)arg; + vgen_ldclist_t *ldclp; + vgen_ldc_t *ldcp; + + ldclp = &portp->ldclist; + READ_ENTER(&ldclp->rwlock); + /* + * NOTE: for now, we will assume we have a single channel. + */ + if (ldclp->headp == NULL) { + RW_EXIT(&ldclp->rwlock); + return (1); + } + ldcp = ldclp->headp; + + mutex_enter(&ldcp->pollq_lock); + ldcp->polling_on = B_FALSE; + mutex_exit(&ldcp->pollq_lock); + + RW_EXIT(&ldclp->rwlock); + + return (0); +} + +int +vgen_disable_intr(void *arg) +{ + vgen_port_t *portp = (vgen_port_t *)arg; + vgen_ldclist_t *ldclp; + vgen_ldc_t *ldcp; + + ldclp = &portp->ldclist; + READ_ENTER(&ldclp->rwlock); + /* + * NOTE: for now, we will assume we have a single channel. + */ + if (ldclp->headp == NULL) { + RW_EXIT(&ldclp->rwlock); + return (1); + } + ldcp = ldclp->headp; + + + mutex_enter(&ldcp->pollq_lock); + ldcp->polling_on = B_TRUE; + mutex_exit(&ldcp->pollq_lock); + + RW_EXIT(&ldclp->rwlock); + + return (0); +} + +mblk_t * +vgen_poll(void *arg, int bytes_to_pickup) +{ + vgen_port_t *portp = (vgen_port_t *)arg; + vgen_ldclist_t *ldclp; + vgen_ldc_t *ldcp; + mblk_t *mp = NULL; + + ldclp = &portp->ldclist; + READ_ENTER(&ldclp->rwlock); + /* + * NOTE: for now, we will assume we have a single channel. + */ + if (ldclp->headp == NULL) { + RW_EXIT(&ldclp->rwlock); + return (NULL); + } + ldcp = ldclp->headp; + + mp = vgen_ldc_poll(ldcp, bytes_to_pickup); + + RW_EXIT(&ldclp->rwlock); + return (mp); +} + +static mblk_t * +vgen_ldc_poll(vgen_ldc_t *ldcp, int bytes_to_pickup) +{ + mblk_t *bp = NULL; + mblk_t *bpt = NULL; + mblk_t *mp = NULL; + size_t mblk_sz = 0; + size_t sz = 0; + uint_t count = 0; + + mutex_enter(&ldcp->pollq_lock); + + bp = ldcp->pollq_headp; + while (bp != NULL) { + /* get the size of this packet */ + mblk_sz = msgdsize(bp); + + /* if adding this pkt, exceeds the size limit, we are done. */ + if (sz + mblk_sz > bytes_to_pickup) { + break; + } + + /* we have room for this packet */ + sz += mblk_sz; + + /* increment the # of packets being sent up */ + count++; + + /* track the last processed pkt */ + bpt = bp; + + /* get the next pkt */ + bp = bp->b_next; + } + + if (count != 0) { + /* + * picked up some packets; save the head of pkts to be sent up. + */ + mp = ldcp->pollq_headp; + + /* move the pollq_headp to skip over the pkts being sent up */ + ldcp->pollq_headp = bp; + + /* picked up all pending pkts in the queue; reset tail also */ + if (ldcp->pollq_headp == NULL) { + ldcp->pollq_tailp = NULL; + } + + /* terminate the tail of pkts to be sent up */ + bpt->b_next = NULL; + } + + mutex_exit(&ldcp->pollq_lock); + + DTRACE_PROBE1(vgen_poll_pkts, uint_t, count); + return (mp); +} + #if DEBUG /*
--- a/usr/src/uts/sun4v/os/fillsysinfo.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/sun4v/os/fillsysinfo.c Mon Aug 17 09:14:35 2009 -0400 @@ -774,9 +774,6 @@ } } - cmn_err(CE_WARN, "mmu-#ra-bits property not found in MD"); - cmn_err(CE_WARN, "Memory DR disabled"); - memnodes = md_alloc_scan_dag(mdp, md_root_node(mdp), "memory", "fwd", &mem_list);
--- a/usr/src/uts/sun4v/promif/promif_emul.c Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/sun4v/promif/promif_emul.c Mon Aug 17 09:14:35 2009 -0400 @@ -233,7 +233,7 @@ unmap_prom_mappings(struct translation *transroot, size_t ntransroot) { int i, j, rv; - int npgs, nunmapped, nfreed, nskipped; + int npgs, nunmapped, nfreed, nskipped, nskipped_io; char *p; tte_t tte; pfn_t pfn; @@ -256,6 +256,7 @@ nfreed = 0; nunmapped = 0; nskipped = 0; + nskipped_io = 0; for (i = 0, promt = transroot; i < ntransroot; i++, promt++) { ASSERT(promt->tte_hi != 0); @@ -292,34 +293,41 @@ ASSERT(TTE_IS_8K(&tte)); /* - * Unload the current mapping for the page and - * if it is the last mapping, free the page. + * Unload the current mapping for the pfn and + * if it is the last mapping for a memory page, + * free the page. */ - pp = page_numtopp_nolock(pfn); - PMFREE_DEBUG("unmap vaddr=0x%lx pfn=0x%lx pp=0x%p", - vaddr, pfn, (void *)pp); - ASSERT(pp); - ASSERT(PAGE_EXCL(pp)); - ASSERT(PP_ISNORELOC(pp)); - ASSERT(!PP_ISFREE(pp)); - ASSERT(page_find(&prom_ppages, pfn)); - ASSERT(page_get_pagecnt(pp->p_szc) == 1); + PMFREE_DEBUG("unmap vaddr=0x%lx pfn=0x%lx", vaddr, pfn); hat_unload(kas.a_hat, (caddr_t)vaddr, PAGESIZE, HAT_UNLOAD_UNLOCK); - if (pp->p_mapping) { - PMFREE_DEBUG(" skip\n"); + if (pf_is_memory(pfn)) { + pp = page_numtopp_nolock(pfn); + PMFREE_DEBUG(" pp=0x%p", (void *)pp); + ASSERT(pp); + ASSERT(PAGE_EXCL(pp)); + ASSERT(PP_ISNORELOC(pp)); + ASSERT(!PP_ISFREE(pp)); + ASSERT(page_find(&prom_ppages, pfn)); + ASSERT(page_get_pagecnt(pp->p_szc) == 1); + + if (pp->p_mapping) { + PMFREE_DEBUG(" skip\n"); + } else { + PP_CLRNORELOC(pp); + page_destroy(pp, 0); + memlist_write_lock(); + rv = memlist_add_span(pfn << PAGESHIFT, + PAGESIZE, &phys_avail); + ASSERT(rv == MEML_SPANOP_OK); + memlist_write_unlock(); + PMFREE_DEBUG(" free\n"); + nfreed++; + } } else { - PP_CLRNORELOC(pp); - page_destroy(pp, 0); - memlist_write_lock(); - rv = memlist_add_span(pfn << PAGESHIFT, - PAGESIZE, &phys_avail); - ASSERT(rv == MEML_SPANOP_OK); - memlist_write_unlock(); - PMFREE_DEBUG(" free\n"); - nfreed++; + nskipped_io++; + PMFREE_DEBUG(" skip IO\n"); } nunmapped++; vaddr += PAGESIZE; @@ -327,8 +335,9 @@ } if (transroot) { - PMFREE_DEBUG("nunmapped=%d nfreed=%d nskipped=%d\n", - nunmapped, nfreed, nskipped); + PMFREE_DEBUG( + "nunmapped=%d nfreed=%d nskipped=%d nskipped_io=%d\n", + nunmapped, nfreed, nskipped, nskipped_io); kmem_free(transroot, ntransroot * sizeof (*transroot)); }
--- a/usr/src/uts/sun4v/sys/vnet.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/sun4v/sys/vnet.h Mon Aug 17 09:14:35 2009 -0400 @@ -34,6 +34,8 @@ #include <sys/vnet_res.h> #include <sys/vnet_mailbox.h> #include <sys/modhash.h> +#include <net/if.h> +#include <sys/mac_client.h> #define VNET_SUCCESS (0) /* successful return */ #define VNET_FAILURE (-1) /* unsuccessful return */ @@ -117,6 +119,7 @@ uint32_t refcnt; /* reference count */ struct vnet *vnetp; /* back pointer to vnet */ kstat_t *ksp; /* hio kstats */ + void *rx_ringp; /* assoc pseudo rx ring */ } vnet_res_t; #define VNET_DDS_TASK_ADD_SHARE 0x01 @@ -131,6 +134,7 @@ vio_dds_msg_t dmsg; /* Pending DDS message */ dev_info_t *hio_dip; /* Hybrid device's dip */ uint64_t hio_cookie; /* Hybrid device's cookie */ + char hio_ifname[LIFNAMSIZ]; /* Hybrid interface name */ ddi_taskq_t *dds_taskqp; /* Taskq's used for DDS */ struct vnet *vnetp; /* Back pointer to vnetp */ } vnet_dds_info_t; @@ -155,12 +159,103 @@ typedef enum { AST_init = 0x0, AST_vnet_alloc = 0x1, - AST_mac_alloc = 0x2, AST_read_macaddr = 0x4, - AST_vgen_init = 0x8, AST_fdbh_alloc = 0x10, - AST_vdds_init = 0x20, AST_taskq_create = 0x40, - AST_vnet_list = 0x80, AST_macreg = 0x100 + AST_ring_init = 0x2, AST_vdds_init = 0x4, + AST_read_macaddr = 0x8, AST_fdbh_alloc = 0x10, + AST_taskq_create = 0x20, AST_vnet_list = 0x40, + AST_vgen_init = 0x80, AST_macreg = 0x100, + AST_init_mdeg = 0x200 } vnet_attach_progress_t; +#define VNET_NUM_PSEUDO_GROUPS 1 /* # of pseudo ring grps */ +#define VNET_NUM_HYBRID_RINGS 2 /* # of Hybrid tx/rx rings */ +#define VNET_HYBRID_RXRING_INDEX 1 /* Hybrid rx ring start index */ + +/* + * # of Pseudo TX Rings is defined based on the possible + * # of TX Hardware Rings from a Hybrid resource. + */ +#define VNET_NUM_PSEUDO_TXRINGS VNET_NUM_HYBRID_RINGS + +/* + * # of Pseudo RX Rings that are reserved and exposed by default. + * 1 for LDC resource to vsw + 2 for RX rings of Hybrid resource. + */ +#define VNET_NUM_PSEUDO_RXRINGS_DEFAULT (VNET_NUM_HYBRID_RINGS + 1) + +/* Pseudo RX Ring States */ +typedef enum { + VNET_RXRING_FREE = 0x0, /* Free */ + VNET_RXRING_INUSE = 0x1, /* In use */ + VNET_RXRING_LDC_SERVICE = 0x2, /* Mapped to vswitch */ + VNET_RXRING_LDC_GUEST = 0x4, /* Mapped to a peer vnet */ + VNET_RXRING_HYBRID = 0x8, /* Mapped to Hybrid resource */ + VNET_RXRING_STARTED = 0x10 /* Started */ +} vnet_rxring_state_t; + +/* Pseudo TX Ring States */ +typedef enum { + VNET_TXRING_FREE = 0x0, /* Free */ + VNET_TXRING_INUSE = 0x1, /* In use */ + VNET_TXRING_SHARED = 0x2, /* Shared among LDCs */ + VNET_TXRING_HYBRID = 0x4, /* Shared among LDCs, Hybrid resource */ + VNET_TXRING_STARTED = 0x8 /* Started */ +} vnet_txring_state_t; + +/* + * Psuedo TX Ring + */ +typedef struct vnet_pseudo_tx_ring { + uint_t index; /* ring index */ + vnet_txring_state_t state; /* ring state */ + void *grp; /* grp associated */ + void *vnetp; /* vnet associated */ + mac_ring_handle_t handle; /* ring handle in mac layer */ + mac_ring_handle_t hw_rh; /* Resource type dependent, internal */ + /* ring handle. Hybrid res: ring hdl */ + /* of hardware rx ring; LDC res: hdl */ + /* to the res itself (vnet_res_t) */ +} vnet_pseudo_tx_ring_t; + +/* + * Psuedo RX Ring + */ +typedef struct vnet_pseudo_rx_ring { + uint_t index; /* ring index */ + vnet_rxring_state_t state; /* ring state */ + void *grp; /* grp associated */ + void *vnetp; /* vnet associated */ + mac_ring_handle_t handle; /* ring handle in mac layer */ + mac_ring_handle_t hw_rh; /* Resource type dependent, internal */ + /* ring handle. Hybrid res: ring hdl */ + /* of hardware tx ring; otherwise */ + /* NULL */ + uint64_t gen_num; /* Mac layer gen_num */ +} vnet_pseudo_rx_ring_t; + +/* + * Psuedo TX Ring Group + */ +typedef struct vnet_pseudo_tx_group { + uint_t index; /* group index */ + void *vnetp; /* vnet associated */ + mac_group_handle_t handle; /* grp handle in mac layer */ + uint_t ring_cnt; /* total # of rings in grp */ + vnet_pseudo_tx_ring_t *rings; /* array of rings */ +} vnet_pseudo_tx_group_t; + +/* + * Psuedo RX Ring Group + */ +typedef struct vnet_pseudo_rx_group { + krwlock_t lock; /* sync rings access in grp */ + int index; /* group index */ + void *vnetp; /* vnet this grp belongs to */ + mac_group_handle_t handle; /* grp handle in mac layer */ + uint_t max_ring_cnt; /* total # of rings in grp */ + uint_t ring_cnt; /* # of rings in use */ + vnet_pseudo_rx_ring_t *rings; /* array of rings */ +} vnet_pseudo_rx_group_t; + /* * vnet instance state information */ @@ -194,6 +289,18 @@ vnet_dds_info_t vdds_info; /* DDS related info */ krwlock_t vrwlock; /* Resource list lock */ ddi_taskq_t *taskqp; /* Resource taskq */ + + /* pseudo ring groups */ + vnet_pseudo_rx_group_t rx_grp[VNET_NUM_PSEUDO_GROUPS]; + vnet_pseudo_tx_group_t tx_grp[VNET_NUM_PSEUDO_GROUPS]; + + vio_net_handle_t hio_vhp; /* HIO resource hdl */ + mac_handle_t hio_mh; /* HIO mac hdl */ + mac_client_handle_t hio_mch; /* HIO mac client hdl */ + mac_unicast_handle_t hio_muh; /* HIO mac unicst hdl */ + mac_notify_handle_t hio_mnh; /* HIO notify cb hdl */ + mac_group_handle_t rx_hwgh; /* HIO rx ring-group hdl */ + mac_group_handle_t tx_hwgh; /* HIO tx ring-group hdl */ } vnet_t; #ifdef DEBUG
--- a/usr/src/uts/sun4v/sys/vnet_gen.h Mon Aug 17 09:11:10 2009 -0400 +++ b/usr/src/uts/sun4v/sys/vnet_gen.h Mon Aug 17 09:14:35 2009 -0400 @@ -180,6 +180,7 @@ kmutex_t tclock; /* tx reclaim lock */ kmutex_t wrlock; /* sync transmits */ kmutex_t rxlock; /* sync reception */ + kmutex_t pollq_lock; /* sync polling and rxworker */ /* channel info from ldc layer */ uint64_t ldc_id; /* channel number */ @@ -248,6 +249,11 @@ kmutex_t rcv_thr_lock; /* lock for receive thread */ kcondvar_t rcv_thr_cv; /* cond.var for recv thread */ + /* receive polling fields */ + boolean_t polling_on; /* polling enabled ? */ + mblk_t *pollq_headp; /* head of pkts in pollq */ + mblk_t *pollq_tailp; /* tail of pkts in pollq */ + /* channel statistics */ vgen_stats_t stats; /* channel statistics */ kstat_t *ksp; /* channel kstats */