view usr/src/uts/i86pc/i86hvm/io/xdf_shell.c @ 10021:a41c569bdaca

PSARC/2006/379 Solaris on Extended partition 6644364 Extended partitions need to be supported on Solaris 6713308 Macro UNUSED in fdisk.h needs to be changed since id 100 is Novell Netware 286's partition ID 6713318 Need to differentiate between solaris old partition and Linux swap 6745175 Partitions can be created using fdisk table with invalid partition line by "fdisk -F" 6745740 Multiple extended partition can be created by "fdisk -A" 6824622 Logical device node can't be created in HVM host
author Sheshadri Vasudevan <Sheshadri.Vasudevan@Sun.COM>
date Thu, 02 Jul 2009 08:59:40 +0530
parents f030be2909ac
children
line wrap: on
line source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <io/xdf_shell.h>
#include <sys/dkio.h>
#include <sys/scsi/scsi_types.h>

/*
 * General Notes
 *
 * We don't support disks with bad block mappins.  We have this
 * limitation because the underlying xdf driver doesn't support
 * bad block remapping.  If there is a need to support this feature
 * it should be added directly to the xdf driver and we should just
 * pass requests strait on through and let it handle the remapping.
 * Also, it's probably worth pointing out that most modern disks do bad
 * block remapping internally in the hardware so there's actually less
 * of a chance of us ever discovering bad blocks.  Also, in most cases
 * this driver (and the xdf driver) will only be used with virtualized
 * devices, so one might wonder why a virtual device would ever actually
 * experience bad blocks.  To wrap this up, you might be wondering how
 * these bad block mappings get created and how they are managed.  Well,
 * there are two tools for managing bad block mappings, format(1M) and
 * addbadsec(1M).  Format(1M) can be used to do a surface scan of a disk
 * to attempt to find bad block and create mappings for them.  Format(1M)
 * and addbadsec(1M) can also be used to edit existing mappings that may
 * be saved on the disk.
 *
 * The underlying PV driver that this driver passes on requests to is the
 * xdf driver.  Since in most cases the xdf driver doesn't deal with
 * physical disks it has it's own algorithm for assigning a physical
 * geometry to a virtual disk (ie, cylinder count, head count, etc.)
 * The default values chosen by the xdf driver may not match those
 * assigned to a disk by a hardware disk emulator in an HVM environment.
 * This is a problem since these physical geometry attributes affect
 * things like the partition table, backup label location, etc.  So
 * to emulate disk devices correctly we need to know the physical geometry
 * that was assigned to a disk at the time of it's initalization.
 * Normally in an HVM environment this information will passed to
 * the BIOS and operating system from the hardware emulator that is
 * emulating the disk devices.  In the case of a solaris dom0+xvm
 * this would be qemu.  So to work around this issue, this driver will
 * query the emulated hardware to get the assigned physical geometry
 * and then pass this geometry onto the xdf driver so that it can use it.
 * But really, this information is essentially metadata about the disk
 * that should be kept with the disk image itself.  (Assuming or course
 * that a disk image is the actual backingstore for this emulated device.)
 * This metadata should also be made available to PV drivers via a common
 * mechanism, probably the xenstore.  The fact that this metadata isn't
 * available outside of HVM domains means that it's difficult to move
 * disks between HVM and PV domains, since a fully PV domain will have no
 * way of knowing what the correct geometry of the target device is.
 * (Short of reading the disk, looking for things like partition tables
 * and labels, and taking a best guess at what the geometry was when
 * the disk was initialized.  Unsuprisingly, qemu actually does this.)
 *
 * This driver has to map xdf shell device instances into their corresponding
 * xdf device instances.  We have to do this to ensure that when a user
 * accesses a emulated xdf shell device we map those accesses to the proper
 * paravirtualized device.  Basically what we need to know is how multiple
 * 'disk' entries in a domU configuration file get mapped to emulated
 * xdf shell devices and to xdf devices.  The 'disk' entry to xdf instance
 * mappings we know because those are done within the Solaris xvdi code
 * and the xpvd nexus driver.  But the config to emulated devices mappings
 * are handled entirely within the xen management tool chain and the
 * hardware emulator.  Since all the tools that establish these mappings
 * live in dom0, dom0 should really supply us with this information,
 * probably via the xenstore.  Unfortunatly it doesn't so, since there's
 * no good way to determine this mapping dynamically, this driver uses
 * a hard coded set of static mappings.  These mappings are hardware
 * emulator specific because each different hardware emulator could have
 * a different device tree with different xdf shell device paths.  This
 * means that if we want to continue to use this static mapping approach
 * to allow Solaris to run on different hardware emulators we'll have
 * to analyze each of those emulators to determine what paths they
 * use and hard code those paths into this driver.  yech.  This metadata
 * really needs to be supplied to us by dom0.
 *
 * This driver access underlying xdf nodes.  Unfortunatly, devices
 * must create minor nodes during attach, and for disk devices to create
 * minor nodes, they have to look at the label on the disk, so this means
 * that disk drivers must be able to access a disk contents during
 * attach.  That means that this disk driver must be able to access
 * underlying xdf nodes during attach.  Unfortunatly, due to device tree
 * locking restrictions, we cannot have an attach operation occuring on
 * this device and then attempt to access another device which may
 * cause another attach to occur in a different device tree branch
 * since this could result in deadlock.  Hence, this driver can only
 * access xdf device nodes that we know are attached, and it can't use
 * any ddi interfaces to access those nodes if those interfaces could
 * trigger an attach of the xdf device.  So this driver works around
 * these restrictions by talking directly to xdf devices via
 * xdf_hvm_hold().  This interface takes a pathname to an xdf device,
 * and if that device is already attached then it returns the a held dip
 * pointer for that device node.  This prevents us from getting into
 * deadlock situations, but now we need a mechanism to ensure that all
 * the xdf device nodes this driver might access are attached before
 * this driver tries to access them.  This is accomplished via the
 * hvmboot_rootconf() callback which is invoked just before root is
 * mounted.  hvmboot_rootconf() will attach xpvd and tell it to configure
 * all xdf device visible to the system.  All these xdf device nodes
 * will also be marked with the "ddi-no-autodetach" property so that
 * once they are configured, the will not be automatically unconfigured.
 * The only way that they could be unconfigured is if the administrator
 * explicitly attempts to unload required modules via rem_drv(1M)
 * or modunload(1M).
 */

/*
 * 16 paritions + fdisk (see xdf.h)
 */
#define	XDFS_DEV2UNIT(dev)	XDF_INST((getminor((dev))))
#define	XDFS_DEV2PART(dev)	XDF_PART((getminor((dev))))

#define	OTYP_VALID(otyp)	((otyp == OTYP_BLK) ||			\
					(otyp == OTYP_CHR) ||		\
					(otyp == OTYP_LYR))

#define	XDFS_NODES		4

#define	XDFS_HVM_MODE(sp)	(XDFS_HVM_STATE(sp)->xdfs_hs_mode)
#define	XDFS_HVM_DIP(sp)	(XDFS_HVM_STATE(sp)->xdfs_hs_dip)
#define	XDFS_HVM_PATH(sp)	(XDFS_HVM_STATE(sp)->xdfs_hs_path)
#define	XDFS_HVM_STATE(sp)						\
		((xdfs_hvm_state_t *)(&((char *)(sp))[XDFS_HVM_STATE_OFFSET]))
#define	XDFS_HVM_STATE_OFFSET	(xdfs_ss_size - sizeof (xdfs_hvm_state_t))
#define	XDFS_HVM_SANE(sp)						\
		ASSERT(XDFS_HVM_MODE(sp));				\
		ASSERT(XDFS_HVM_DIP(sp) != NULL);			\
		ASSERT(XDFS_HVM_PATH(sp) != NULL);


typedef struct xdfs_hvm_state {
	boolean_t	xdfs_hs_mode;
	dev_info_t	*xdfs_hs_dip;
	char		*xdfs_hs_path;
} xdfs_hvm_state_t;

/* local function and structure prototypes */
static int xdfs_iodone(struct buf *);
static boolean_t xdfs_isopen_part(xdfs_state_t *, int);
static boolean_t xdfs_isopen(xdfs_state_t *);
static cmlb_tg_ops_t xdfs_lb_ops;

/*
 * Globals
 */
major_t			xdfs_major;
#define			xdfs_hvm_dev_ops (xdfs_c_hvm_dev_ops)
#define			xdfs_hvm_cb_ops (xdfs_hvm_dev_ops->devo_cb_ops)

/*
 * Private globals
 */
volatile boolean_t	xdfs_pv_disable = B_FALSE;
static void		*xdfs_ssp;
static size_t		xdfs_ss_size;

/*
 * Private helper functions
 */
static boolean_t
xdfs_tgt_hold(xdfs_state_t *xsp)
{
	mutex_enter(&xsp->xdfss_mutex);
	ASSERT(xsp->xdfss_tgt_holds >= 0);
	if (!xsp->xdfss_tgt_attached) {
		mutex_exit(&xsp->xdfss_mutex);
		return (B_FALSE);
	}
	xsp->xdfss_tgt_holds++;
	mutex_exit(&xsp->xdfss_mutex);
	return (B_TRUE);
}

static void
xdfs_tgt_release(xdfs_state_t *xsp)
{
	mutex_enter(&xsp->xdfss_mutex);
	ASSERT(xsp->xdfss_tgt_attached);
	ASSERT(xsp->xdfss_tgt_holds > 0);
	if (--xsp->xdfss_tgt_holds == 0)
		cv_broadcast(&xsp->xdfss_cv);
	mutex_exit(&xsp->xdfss_mutex);
}

/*ARGSUSED*/
static int
xdfs_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
{
	int		instance = ddi_get_instance(dip);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	int		rv;

	if (xsp == NULL)
		return (ENXIO);

	if (!xdfs_tgt_hold(xsp))
		return (ENXIO);

	if (cmd == TG_GETVIRTGEOM) {
		cmlb_geom_t	pgeom, *vgeomp;
		diskaddr_t	capacity;

		/*
		 * The native xdf driver doesn't support this ioctl.
		 * Intead of passing it on, emulate it here so that the
		 * results look the same as what we get for a real xdf
		 * shell device.
		 *
		 * Get the real size of the device
		 */
		if ((rv = xdf_lb_getinfo(xsp->xdfss_tgt_dip,
		    TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0)
			goto out;
		capacity = pgeom.g_capacity;

		/*
		 * If the controller returned us something that doesn't
		 * really fit into an Int 13/function 8 geometry
		 * result, just fail the ioctl.  See PSARC 1998/313.
		 */
		if (capacity >= (63 * 254 * 1024)) {
			rv = EINVAL;
			goto out;
		}

		vgeomp = (cmlb_geom_t *)arg;
		vgeomp->g_capacity	= capacity;
		vgeomp->g_nsect		= 63;
		vgeomp->g_nhead		= 254;
		vgeomp->g_ncyl		= capacity / (63 * 254);
		vgeomp->g_acyl		= 0;
		vgeomp->g_secsize	= 512;
		vgeomp->g_intrlv	= 1;
		vgeomp->g_rpm		= 3600;
		rv = 0;
		goto out;
	}

	rv = xdf_lb_getinfo(xsp->xdfss_tgt_dip, cmd, arg, tg_cookie);

out:
	xdfs_tgt_release(xsp);
	return (rv);
}

static boolean_t
xdfs_isopen_part(xdfs_state_t *xsp, int part)
{
	int otyp;

	ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
	for (otyp = 0; (otyp < OTYPCNT); otyp++) {
		if (xsp->xdfss_otyp_count[otyp][part] != 0) {
			ASSERT(xsp->xdfss_tgt_attached);
			ASSERT(xsp->xdfss_tgt_holds >= 0);
			return (B_TRUE);
		}
	}
	return (B_FALSE);
}

static boolean_t
xdfs_isopen(xdfs_state_t *xsp)
{
	int part;

	ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
	for (part = 0; part < XDF_PEXT; part++) {
		if (xdfs_isopen_part(xsp, part))
			return (B_TRUE);
	}
	return (B_FALSE);
}

static int
xdfs_iodone(struct buf *bp)
{
	struct buf	*bp_orig = bp->b_chain;

	/* Propegate back the io results */
	bp_orig->b_resid = bp->b_resid;
	bioerror(bp_orig, geterror(bp));
	biodone(bp_orig);

	freerbuf(bp);
	return (0);
}

static int
xdfs_cmlb_attach(xdfs_state_t *xsp)
{
	return (cmlb_attach(xsp->xdfss_dip, &xdfs_lb_ops,
	    xsp->xdfss_tgt_is_cd ? DTYPE_RODIRECT : DTYPE_DIRECT,
	    xdf_is_rm(xsp->xdfss_tgt_dip),
	    B_TRUE,
	    xdfs_c_cmlb_node_type(xsp),
	    xdfs_c_cmlb_alter_behavior(xsp),
	    xsp->xdfss_cmlbhandle, 0));
}

static boolean_t
xdfs_tgt_probe(xdfs_state_t *xsp, dev_info_t *tgt_dip)
{
	cmlb_geom_t		pgeom;
	int			tgt_instance = ddi_get_instance(tgt_dip);

	ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
	ASSERT(!xdfs_isopen(xsp));
	ASSERT(!xsp->xdfss_tgt_attached);

	xsp->xdfss_tgt_dip = tgt_dip;
	xsp->xdfss_tgt_holds = 0;
	xsp->xdfss_tgt_dev = makedevice(ddi_driver_major(tgt_dip),
	    XDF_MINOR(tgt_instance, 0));
	ASSERT((xsp->xdfss_tgt_dev & XDF_PMASK) == 0);
	xsp->xdfss_tgt_is_cd = xdf_is_cd(tgt_dip);

	/*
	 * GROSS HACK ALERT!  GROSS HACK ALERT!
	 *
	 * Before we can initialize the cmlb layer, we have to tell the
	 * underlying xdf device what it's physical geometry should be.
	 * See the block comments at the top of this file for more info.
	 */
	if (!xsp->xdfss_tgt_is_cd &&
	    ((xdfs_c_getpgeom(xsp->xdfss_dip, &pgeom) != 0) ||
	    (xdf_hvm_setpgeom(xsp->xdfss_tgt_dip, &pgeom) != 0)))
		return (B_FALSE);

	/*
	 * Force the xdf front end driver to connect to the backend.  From
	 * the solaris device tree perspective, the xdf driver devinfo node
	 * is already in the ATTACHED state.  (Otherwise xdf_hvm_hold()
	 * would not have returned a dip.)  But this doesn't mean that the
	 * xdf device has actually established a connection to it's back
	 * end driver.  For us to be able to access the xdf device it needs
	 * to be connected.
	 */
	if (!xdf_hvm_connect(xsp->xdfss_tgt_dip)) {
		cmn_err(CE_WARN, "pv driver failed to connect: %s",
		    xsp->xdfss_pv);
		return (B_FALSE);
	}

	if (xsp->xdfss_tgt_is_cd && !xdf_media_req_supported(tgt_dip)) {
		/*
		 * Unfortunatly, the dom0 backend driver doesn't support
		 * important media request operations like eject, so fail
		 * the probe (this should cause us to fall back to emulated
		 * hvm device access, which does support things like eject).
		 */
		return (B_FALSE);
	}

	/* create kstat for iostat(1M) */
	if (xdf_kstat_create(xsp->xdfss_tgt_dip, (char *)xdfs_c_name,
	    tgt_instance) != 0)
		return (B_FALSE);

	/*
	 * Now we need to mark ourselves as attached and drop xdfss_mutex.
	 * We do this because the final steps in the attach process will
	 * need to access the underlying disk to read the label and
	 * possibly the devid.
	 */
	xsp->xdfss_tgt_attached = B_TRUE;
	mutex_exit(&xsp->xdfss_mutex);

	if (!xsp->xdfss_tgt_is_cd && xdfs_c_bb_check(xsp)) {
		cmn_err(CE_WARN, "pv disks with bad blocks are unsupported: %s",
		    xsp->xdfss_hvm);
		mutex_enter(&xsp->xdfss_mutex);
		xdf_kstat_delete(xsp->xdfss_tgt_dip);
		xsp->xdfss_tgt_attached = B_FALSE;
		return (B_FALSE);
	}

	/*
	 * Initalize cmlb.  Note that for partition information cmlb
	 * will access the underly xdf disk device directly via
	 * xdfs_lb_rdwr() and xdfs_lb_getinfo().  There are no
	 * layered driver handles associated with this access because
	 * it is a direct disk access that doesn't go through
	 * any of the device nodes exported by the xdf device (since
	 * all exported device nodes only reflect the portion of
	 * the device visible via the partition/slice that the node
	 * is associated with.)  So while not observable via the LDI,
	 * this direct disk access is ok since we're actually holding
	 * the target device.
	 */
	if (xdfs_cmlb_attach(xsp) != 0) {
		mutex_enter(&xsp->xdfss_mutex);
		xdf_kstat_delete(xsp->xdfss_tgt_dip);
		xsp->xdfss_tgt_attached = B_FALSE;
		return (B_FALSE);
	}

	/* setup devid string */
	xsp->xdfss_tgt_devid = NULL;
	if (!xsp->xdfss_tgt_is_cd)
		xdfs_c_devid_setup(xsp);

	(void) cmlb_validate(xsp->xdfss_cmlbhandle, 0, 0);

	/* Have the system report any newly created device nodes */
	ddi_report_dev(xsp->xdfss_dip);

	mutex_enter(&xsp->xdfss_mutex);
	return (B_TRUE);
}

static boolean_t
xdfs_tgt_detach(xdfs_state_t *xsp)
{
	ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
	ASSERT(xsp->xdfss_tgt_attached);
	ASSERT(xsp->xdfss_tgt_holds >= 0);

	if ((xdfs_isopen(xsp)) || (xsp->xdfss_tgt_holds != 0))
		return (B_FALSE);

	ddi_devid_unregister(xsp->xdfss_dip);
	if (xsp->xdfss_tgt_devid != NULL)
		ddi_devid_free(xsp->xdfss_tgt_devid);

	xdf_kstat_delete(xsp->xdfss_tgt_dip);
	xsp->xdfss_tgt_attached = B_FALSE;
	return (B_TRUE);
}

/*
 * Xdf_shell interfaces that may be called from outside this file.
 */
void
xdfs_minphys(struct buf *bp)
{
	xdfmin(bp);
}

/*
 * Cmlb ops vector, allows the cmlb module to directly access the entire
 * xdf disk device without going through any partitioning layers.
 */
int
xdfs_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr,
    diskaddr_t start, size_t count, void *tg_cookie)
{
	int		instance = ddi_get_instance(dip);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	int		rv;

	if (xsp == NULL)
		return (ENXIO);

	if (!xdfs_tgt_hold(xsp))
		return (ENXIO);

	rv = xdf_lb_rdwr(xsp->xdfss_tgt_dip,
	    cmd, bufaddr, start, count, tg_cookie);

	xdfs_tgt_release(xsp);
	return (rv);
}

/*
 * Driver PV and HVM cb_ops entry points
 */
/*ARGSUSED*/
static int
xdfs_open(dev_t *dev_p, int flag, int otyp, cred_t *credp)
{
	ldi_ident_t	li;
	dev_t		dev = *dev_p;
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	dev_t		tgt_devt = xsp->xdfss_tgt_dev | part;
	int		err = 0;

	if ((otyp < 0) || (otyp >= OTYPCNT))
		return (EINVAL);

	if (XDFS_HVM_MODE(xsp)) {
		if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
			return (ENOTSUP);
		return (xdfs_hvm_cb_ops->cb_open(dev_p, flag, otyp, credp));
	}

	/* allocate an ldi handle */
	VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0);

	mutex_enter(&xsp->xdfss_mutex);

	/*
	 * We translate all device opens (chr, blk, and lyr) into
	 * block device opens.  Why?  Because for all the opens that
	 * come through this driver, we only keep around one LDI handle.
	 * So that handle can only be of one open type.  The reason
	 * that we choose the block interface for this is that to use
	 * the block interfaces for a device the system needs to allocate
	 * buf_ts, which are associated with system memory which can act
	 * as a cache for device data.  So normally when a block device
	 * is closed the system will ensure that all these pages get
	 * flushed out of memory.  But if we were to open the device
	 * as a character device, then when we went to close the underlying
	 * device (even if we had invoked the block interfaces) any data
	 * remaining in memory wouldn't necessairly be flushed out
	 * before the device was closed.
	 */
	if (xsp->xdfss_tgt_lh[part] == NULL) {
		ASSERT(!xdfs_isopen_part(xsp, part));

		err = ldi_open_by_dev(&tgt_devt, OTYP_BLK, flag, credp,
		    &xsp->xdfss_tgt_lh[part], li);

		if (err != 0) {
			mutex_exit(&xsp->xdfss_mutex);
			ldi_ident_release(li);
			return (err);
		}

		/* Disk devices really shouldn't clone */
		ASSERT(tgt_devt == (xsp->xdfss_tgt_dev | part));
	} else {
		ldi_handle_t lh_tmp;

		ASSERT(xdfs_isopen_part(xsp, part));

		/* do ldi open/close to get flags and cred check */
		err = ldi_open_by_dev(&tgt_devt, OTYP_BLK, flag, credp,
		    &lh_tmp, li);
		if (err != 0) {
			mutex_exit(&xsp->xdfss_mutex);
			ldi_ident_release(li);
			return (err);
		}

		/* Disk devices really shouldn't clone */
		ASSERT(tgt_devt == (xsp->xdfss_tgt_dev | part));
		(void) ldi_close(lh_tmp, flag, credp);
	}
	ldi_ident_release(li);

	xsp->xdfss_otyp_count[otyp][part]++;

	mutex_exit(&xsp->xdfss_mutex);
	return (0);
}

/*ARGSUSED*/
static int
xdfs_close(dev_t dev, int flag, int otyp, cred_t *credp)
{
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	int		err = 0;

	ASSERT((otyp >= 0) && otyp < OTYPCNT);

	/* Sanity check the dev_t associated with this request. */
	ASSERT(getmajor(dev) == xdfs_major);
	if (getmajor(dev) != xdfs_major)
		return (ENXIO);

	if (XDFS_HVM_MODE(xsp)) {
		if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
			return (ENOTSUP);
		return (xdfs_hvm_cb_ops->cb_close(dev, flag, otyp, credp));
	}

	/*
	 * Sanity check that that the device is actually open.  On debug
	 * kernels we'll panic and on non-debug kernels we'll return failure.
	 */
	mutex_enter(&xsp->xdfss_mutex);
	ASSERT(xdfs_isopen_part(xsp, part));
	if (!xdfs_isopen_part(xsp, part)) {
		mutex_exit(&xsp->xdfss_mutex);
		return (ENXIO);
	}

	ASSERT(xsp->xdfss_tgt_lh[part] != NULL);
	ASSERT(xsp->xdfss_otyp_count[otyp][part] > 0);
	if (otyp == OTYP_LYR) {
		xsp->xdfss_otyp_count[otyp][part]--;
	} else {
		xsp->xdfss_otyp_count[otyp][part] = 0;
	}

	if (!xdfs_isopen_part(xsp, part)) {
		err = ldi_close(xsp->xdfss_tgt_lh[part], flag, credp);
		xsp->xdfss_tgt_lh[part] = NULL;
	}

	mutex_exit(&xsp->xdfss_mutex);

	return (err);
}

int
xdfs_strategy(struct buf *bp)
{
	dev_t		dev = bp->b_edev;
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	dev_t		tgt_devt;
	struct buf	*bp_clone;

	/* Sanity check the dev_t associated with this request. */
	ASSERT(getmajor(dev) == xdfs_major);
	if (getmajor(dev) != xdfs_major)
		goto err;

	if (XDFS_HVM_MODE(xsp)) {
		if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
			return (ENOTSUP);
		return (xdfs_hvm_cb_ops->cb_strategy(bp));
	}

	/*
	 * Sanity checks that the dev_t associated with the buf we were
	 * passed corresponds to an open partition.  On debug kernels we'll
	 * panic and on non-debug kernels we'll return failure.
	 */
	mutex_enter(&xsp->xdfss_mutex);
	ASSERT(xdfs_isopen_part(xsp, part));
	if (!xdfs_isopen_part(xsp, part)) {
		mutex_exit(&xsp->xdfss_mutex);
		goto err;
	}
	mutex_exit(&xsp->xdfss_mutex);

	/* clone this buffer */
	tgt_devt = xsp->xdfss_tgt_dev | part;
	bp_clone = bioclone(bp, 0, bp->b_bcount, tgt_devt, bp->b_blkno,
	    xdfs_iodone, NULL, KM_SLEEP);
	bp_clone->b_chain = bp;

	/*
	 * If we're being invoked on behalf of the physio() call in
	 * xdfs_dioctl_rwcmd() then b_private will be set to
	 * XB_SLICE_NONE and we need to propegate this flag into the
	 * cloned buffer so that the xdf driver will see it.
	 */
	if (bp->b_private == (void *)XB_SLICE_NONE)
		bp_clone->b_private = (void *)XB_SLICE_NONE;

	/*
	 * Pass on the cloned buffer.  Note that we don't bother to check
	 * for failure because the xdf strategy routine will have to
	 * invoke biodone() if it wants to return an error, which means
	 * that the xdfs_iodone() callback will get invoked and it
	 * will propegate the error back up the stack and free the cloned
	 * buffer.
	 */
	ASSERT(xsp->xdfss_tgt_lh[part] != NULL);
	return (ldi_strategy(xsp->xdfss_tgt_lh[part], bp_clone));

err:
	bioerror(bp, ENXIO);
	bp->b_resid = bp->b_bcount;
	biodone(bp);
	return (0);
}

static int
xdfs_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
{
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	if (!XDFS_HVM_MODE(xsp))
		return (ldi_dump(xsp->xdfss_tgt_lh[part], addr, blkno, nblk));

	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
		return (ENOTSUP);
	return (xdfs_hvm_cb_ops->cb_dump(dev, addr, blkno, nblk));
}

/*ARGSUSED*/
static int
xdfs_read(dev_t dev, struct uio *uio, cred_t *credp)
{
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	if (!XDFS_HVM_MODE(xsp))
		return (ldi_read(xsp->xdfss_tgt_lh[part], uio, credp));

	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
		return (ENOTSUP);
	return (xdfs_hvm_cb_ops->cb_read(dev, uio, credp));
}

/*ARGSUSED*/
static int
xdfs_write(dev_t dev, struct uio *uio, cred_t *credp)
{
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	if (!XDFS_HVM_MODE(xsp))
		return (ldi_write(xsp->xdfss_tgt_lh[part], uio, credp));

	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
		return (ENOTSUP);
	return (xdfs_hvm_cb_ops->cb_write(dev, uio, credp));
}

/*ARGSUSED*/
static int
xdfs_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
{
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	if (!XDFS_HVM_MODE(xsp))
		return (ldi_aread(xsp->xdfss_tgt_lh[part], aio, credp));

	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
	    (xdfs_hvm_cb_ops->cb_strategy == NULL) ||
	    (xdfs_hvm_cb_ops->cb_strategy == nodev) ||
	    (xdfs_hvm_cb_ops->cb_aread == NULL))
		return (ENOTSUP);
	return (xdfs_hvm_cb_ops->cb_aread(dev, aio, credp));
}

/*ARGSUSED*/
static int
xdfs_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
{
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	if (!XDFS_HVM_MODE(xsp))
		return (ldi_awrite(xsp->xdfss_tgt_lh[part], aio, credp));

	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
	    (xdfs_hvm_cb_ops->cb_strategy == NULL) ||
	    (xdfs_hvm_cb_ops->cb_strategy == nodev) ||
	    (xdfs_hvm_cb_ops->cb_awrite == NULL))
		return (ENOTSUP);
	return (xdfs_hvm_cb_ops->cb_awrite(dev, aio, credp));
}

static int
xdfs_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
    int *rvalp)
{
	int		instance = XDFS_DEV2UNIT(dev);
	int		part = XDFS_DEV2PART(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	int		rv;
	boolean_t	done;

	if (XDFS_HVM_MODE(xsp)) {
		if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
			return (ENOTSUP);
		return (xdfs_hvm_cb_ops->cb_ioctl(
		    dev, cmd, arg, flag, credp, rvalp));
	}

	rv = xdfs_c_ioctl(xsp, dev, part, cmd, arg, flag, credp, rvalp, &done);
	if (done)
		return (rv);
	rv = ldi_ioctl(xsp->xdfss_tgt_lh[part], cmd, arg, flag, credp, rvalp);
	if (rv == 0) {
		/* Force Geometry Validation */
		(void) cmlb_invalidate(xsp->xdfss_cmlbhandle, 0);
		(void) cmlb_validate(xsp->xdfss_cmlbhandle, 0, 0);
	}
	return (rv);
}

static int
xdfs_hvm_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
    int flags, char *name, caddr_t valuep, int *lengthp)
{
	int		instance = ddi_get_instance(dip);
	void		*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	ASSERT(XDFS_HVM_MODE(xsp));

	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
	    (xdfs_hvm_cb_ops->cb_prop_op == NULL) ||
	    (xdfs_hvm_cb_ops->cb_prop_op == nodev) ||
	    (xdfs_hvm_cb_ops->cb_prop_op == nulldev))
		return (DDI_PROP_NOT_FOUND);

	return (xdfs_hvm_cb_ops->cb_prop_op(dev, dip, prop_op,
	    flags, name, valuep, lengthp));
}

static int
xdfs_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
    int flags, char *name, caddr_t valuep, int *lengthp)
{
	int		instance = ddi_get_instance(dip);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	int		rv;
	dev_info_t	*tgt_dip;
	dev_t		tgt_devt;

	/*
	 * Sanity check that if a dev_t or dip were specified that they
	 * correspond to this device driver.  On debug kernels we'll
	 * panic and on non-debug kernels we'll return failure.
	 */
	ASSERT(ddi_driver_major(dip) == xdfs_major);
	ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdfs_major));
	if ((ddi_driver_major(dip) != xdfs_major) ||
	    ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdfs_major)))
		return (DDI_PROP_NOT_FOUND);

	/*
	 * This property lookup might be associated with a device node
	 * that is not yet attached, if so pass it onto ddi_prop_op().
	 */
	if (xsp == NULL)
		return (ddi_prop_op(dev, dip, prop_op, flags,
		    name, valuep, lengthp));

	/* If we're accessing the device in hvm mode, pass this request on */
	if (XDFS_HVM_MODE(xsp))
		return (xdfs_hvm_prop_op(dev, dip, prop_op,
		    flags, name, valuep, lengthp));

	/*
	 * Make sure we only lookup static properties.
	 *
	 * If there are static properties of the underlying xdf driver
	 * that we want to mirror, then we'll have to explicity look them
	 * up and define them during attach.  There are a few reasons
	 * for this.  Most importantly, most static properties are typed
	 * and all dynamic properties are untyped, ie, for dynamic
	 * properties the caller must know the type of the property and
	 * how to interpret the value of the property.  the prop_op drivedr
	 * entry point is only designed for returning dynamic/untyped
	 * properties, so if we were to attempt to lookup and pass back
	 * static properties of the underlying device here then we would
	 * be losing the type information for those properties.  Another
	 * reason we don't want to pass on static property requests is that
	 * static properties are enumerable in the device tree, where as
	 * dynamic ones are not.
	 */
	flags |= DDI_PROP_DYNAMIC;

	/*
	 * We can't use the ldi here to access the underlying device because
	 * the ldi actually opens the device, and that open might fail if the
	 * device has already been opened with the FEXCL flag.  If we used
	 * the ldi here, it would also be possible for some other caller to
	 * try open the device with the FEXCL flag and get a failure back
	 * because we have it open to do a property query.  Instad we'll
	 * grab a hold on the target dip.
	 */
	if (!xdfs_tgt_hold(xsp))
		return (DDI_PROP_NOT_FOUND);

	/* figure out dip the dev_t we're going to pass on down */
	tgt_dip = xsp->xdfss_tgt_dip;
	if (dev == DDI_DEV_T_ANY) {
		tgt_devt = DDI_DEV_T_ANY;
	} else {
		tgt_devt = xsp->xdfss_tgt_dev | XDFS_DEV2PART(dev);
	}

	/*
	 * Cdev_prop_op() is not a public interface, and normally the caller
	 * is required to make sure that the target driver actually implements
	 * this interface before trying to invoke it.  In this case we know
	 * that we're always accessing the xdf driver and it does have this
	 * interface defined, so we can skip the check.
	 */
	rv = cdev_prop_op(tgt_devt, tgt_dip,
	    prop_op, flags, name, valuep, lengthp);

	xdfs_tgt_release(xsp);
	return (rv);
}

/*
 * Driver PV and HVM dev_ops entry points
 */
/*ARGSUSED*/
static int
xdfs_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
    void **result)
{
	dev_t		dev = (dev_t)arg;
	int		instance = XDFS_DEV2UNIT(dev);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	switch (infocmd) {
		case DDI_INFO_DEVT2DEVINFO:
			if (xsp == NULL)
				return (DDI_FAILURE);
			if (XDFS_HVM_MODE(xsp))
				*result = XDFS_HVM_DIP(xsp);
			else
				*result = (void *)xsp->xdfss_dip;
			break;
		case DDI_INFO_DEVT2INSTANCE:
			*result = (void *)(intptr_t)instance;
			break;
		default:
			return (DDI_FAILURE);
	}
	return (DDI_SUCCESS);
}

static int
xdfs_hvm_probe(dev_info_t *dip, char *path)
{
	int		instance = ddi_get_instance(dip);
	int		rv = DDI_PROBE_SUCCESS;
	void		*xsp;

	ASSERT(path != NULL);
	cmn_err(CE_WARN, "PV access to device disabled: %s", path);

	(void) ddi_soft_state_zalloc(xdfs_ssp, instance);
	VERIFY((xsp = ddi_get_soft_state(xdfs_ssp, instance)) != NULL);

	if ((xdfs_hvm_dev_ops == NULL) ||
	    (xdfs_hvm_dev_ops->devo_probe == NULL) ||
	    ((rv = xdfs_hvm_dev_ops->devo_probe(dip)) == DDI_PROBE_FAILURE)) {
		ddi_soft_state_free(xdfs_ssp, instance);
		cmn_err(CE_WARN, "HVM probe of device failed: %s", path);
		kmem_free(path, MAXPATHLEN);
		return (DDI_PROBE_FAILURE);
	}

	XDFS_HVM_MODE(xsp) = B_TRUE;
	XDFS_HVM_DIP(xsp) = dip;
	XDFS_HVM_PATH(xsp) = path;

	return (rv);
}

static int
xdfs_probe(dev_info_t *dip)
{
	int		instance = ddi_get_instance(dip);
	xdfs_state_t	*xsp;
	dev_info_t	*tgt_dip;
	char		*path;
	int		i, pv_disable;

	/* if we've already probed the device then there's nothing todo */
	if (ddi_get_soft_state(xdfs_ssp, instance))
		return (DDI_PROBE_PARTIAL);

	/* Figure out our pathname */
	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
	(void) ddi_pathname(dip, path);

	/* see if we should disable pv access mode */
	pv_disable = ddi_prop_get_int(DDI_DEV_T_ANY,
	    dip, DDI_PROP_NOTPROM, "pv_disable", 0);

	if (xdfs_pv_disable || pv_disable)
		return (xdfs_hvm_probe(dip, path));

	/*
	 * This xdf shell device layers on top of an xdf device.  So the first
	 * thing we need to do is determine which xdf device instance this
	 * xdf shell instance should be layered on top of.
	 */
	for (i = 0; xdfs_c_h2p_map[i].xdfs_h2p_hvm != NULL; i++) {
		if (strcmp(xdfs_c_h2p_map[i].xdfs_h2p_hvm, path) == 0)
			break;
	}

	if ((xdfs_c_h2p_map[i].xdfs_h2p_hvm == NULL) ||
	    ((tgt_dip = xdf_hvm_hold(xdfs_c_h2p_map[i].xdfs_h2p_pv)) == NULL)) {
		/*
		 * UhOh.  We either don't know what xdf instance this xdf
		 * shell device should be mapped to or the xdf node assocaited
		 * with this instance isnt' attached.  in either case fall
		 * back to hvm access.
		 */
		return (xdfs_hvm_probe(dip, path));
	}

	/* allocate and initialize our state structure */
	(void) ddi_soft_state_zalloc(xdfs_ssp, instance);
	xsp = ddi_get_soft_state(xdfs_ssp, instance);
	mutex_init(&xsp->xdfss_mutex, NULL, MUTEX_DRIVER, NULL);
	cv_init(&xsp->xdfss_cv, NULL, CV_DEFAULT, NULL);
	mutex_enter(&xsp->xdfss_mutex);

	xsp->xdfss_dip = dip;
	xsp->xdfss_pv = xdfs_c_h2p_map[i].xdfs_h2p_pv;
	xsp->xdfss_hvm = xdfs_c_h2p_map[i].xdfs_h2p_hvm;
	xsp->xdfss_tgt_attached = B_FALSE;
	cmlb_alloc_handle((cmlb_handle_t *)&xsp->xdfss_cmlbhandle);

	if (!xdfs_tgt_probe(xsp, tgt_dip)) {
		mutex_exit(&xsp->xdfss_mutex);
		cmlb_free_handle(&xsp->xdfss_cmlbhandle);
		ddi_soft_state_free(xdfs_ssp, instance);
		ddi_release_devi(tgt_dip);
		return (xdfs_hvm_probe(dip, path));
	}
	mutex_exit(&xsp->xdfss_mutex);

	/*
	 * Add a zero-length attribute to tell the world we support
	 * kernel ioctls (for layered drivers).
	 */
	(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
	    DDI_KERNEL_IOCTL, NULL, 0);

	kmem_free(path, MAXPATHLEN);
	return (DDI_PROBE_SUCCESS);
}

static int
xdfs_hvm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
	int		instance = ddi_get_instance(dip);
	void		*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	int		rv = DDI_FAILURE;

	XDFS_HVM_SANE(xsp);

	if ((xdfs_hvm_dev_ops == NULL) ||
	    (xdfs_hvm_dev_ops->devo_attach == NULL) ||
	    ((rv = xdfs_hvm_dev_ops->devo_attach(dip, cmd)) != DDI_SUCCESS)) {
		cmn_err(CE_WARN, "HVM attach of device failed: %s",
		    XDFS_HVM_PATH(xsp));
		kmem_free(XDFS_HVM_PATH(xsp), MAXPATHLEN);
		ddi_soft_state_free(xdfs_ssp, instance);
		return (rv);
	}

	return (DDI_SUCCESS);
}

/*
 * Autoconfiguration Routines
 */
static int
xdfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
	int		instance = ddi_get_instance(dip);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	if (xsp == NULL)
		return (DDI_FAILURE);
	if (XDFS_HVM_MODE(xsp))
		return (xdfs_hvm_attach(dip, cmd));
	if (cmd != DDI_ATTACH)
		return (DDI_FAILURE);

	xdfs_c_attach(xsp);
	return (DDI_SUCCESS);
}

static int
xdfs_hvm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
	int		instance = ddi_get_instance(dip);
	void		*xsp = ddi_get_soft_state(xdfs_ssp, instance);
	int		rv;

	XDFS_HVM_SANE(xsp);

	if ((xdfs_hvm_dev_ops == NULL) ||
	    (xdfs_hvm_dev_ops->devo_detach == NULL))
		return (DDI_FAILURE);

	if ((rv = xdfs_hvm_dev_ops->devo_detach(dip, cmd)) != DDI_SUCCESS)
		return (rv);

	kmem_free(XDFS_HVM_PATH(xsp), MAXPATHLEN);
	ddi_soft_state_free(xdfs_ssp, instance);
	return (DDI_SUCCESS);
}

static int
xdfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
	int		instance = ddi_get_instance(dip);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	if (XDFS_HVM_MODE(xsp))
		return (xdfs_hvm_detach(dip, cmd));
	if (cmd != DDI_DETACH)
		return (DDI_FAILURE);

	mutex_enter(&xsp->xdfss_mutex);
	if (!xdfs_tgt_detach(xsp)) {
		mutex_exit(&xsp->xdfss_mutex);
		return (DDI_FAILURE);
	}
	mutex_exit(&xsp->xdfss_mutex);

	cmlb_detach(xsp->xdfss_cmlbhandle, 0);
	cmlb_free_handle(&xsp->xdfss_cmlbhandle);
	ddi_release_devi(xsp->xdfss_tgt_dip);
	ddi_soft_state_free(xdfs_ssp, instance);
	ddi_prop_remove_all(dip);
	return (DDI_SUCCESS);
}

static int
xdfs_hvm_power(dev_info_t *dip, int component, int level)
{
	int		instance = ddi_get_instance(dip);
	void		*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	XDFS_HVM_SANE(xsp);

	if ((xdfs_hvm_dev_ops == NULL) ||
	    (xdfs_hvm_dev_ops->devo_power == NULL))
		return (DDI_FAILURE);
	return (xdfs_hvm_dev_ops->devo_power(dip, component, level));
}

static int
xdfs_power(dev_info_t *dip, int component, int level)
{
	int		instance = ddi_get_instance(dip);
	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);

	if (XDFS_HVM_MODE(xsp))
		return (xdfs_hvm_power(dip, component, level));
	return (nodev());
}

/*
 * Cmlb ops vector
 */
static cmlb_tg_ops_t xdfs_lb_ops = {
	TG_DK_OPS_VERSION_1,
	xdfs_lb_rdwr,
	xdfs_lb_getinfo
};

/*
 * Device driver ops vector
 */
static struct cb_ops xdfs_cb_ops = {
	xdfs_open,		/* open */
	xdfs_close,		/* close */
	xdfs_strategy,		/* strategy */
	nodev,			/* print */
	xdfs_dump,		/* dump */
	xdfs_read,		/* read */
	xdfs_write,		/* write */
	xdfs_ioctl,		/* ioctl */
	nodev,			/* devmap */
	nodev,			/* mmap */
	nodev,			/* segmap */
	nochpoll,		/* poll */
	xdfs_prop_op,		/* cb_prop_op */
	0,			/* streamtab  */
	D_64BIT | D_MP | D_NEW,	/* Driver comaptibility flag */
	CB_REV,			/* cb_rev */
	xdfs_aread,		/* async read */
	xdfs_awrite		/* async write */
};

struct dev_ops xdfs_ops = {
	DEVO_REV,		/* devo_rev, */
	0,			/* refcnt  */
	xdfs_getinfo,		/* info */
	nulldev,		/* identify */
	xdfs_probe,		/* probe */
	xdfs_attach,		/* attach */
	xdfs_detach,		/* detach */
	nodev,			/* reset */
	&xdfs_cb_ops,		/* driver operations */
	NULL,			/* bus operations */
	xdfs_power,		/* power */
	ddi_quiesce_not_supported, /* devo_quiesce */
};

/*
 * Module linkage information for the kernel.
 */
static struct modldrv modldrv = {
	&mod_driverops,		/* Type of module.  This one is a driver. */
	NULL,			/* Module description.  Set by _init() */
	&xdfs_ops,		/* Driver ops. */
};

static struct modlinkage modlinkage = {
	MODREV_1, (void *)&modldrv, NULL
};

int
_init(void)
{
	int rval;

	xdfs_major = ddi_name_to_major((char *)xdfs_c_name);
	if (xdfs_major == (major_t)-1)
		return (EINVAL);

	/*
	 * Determine the size of our soft state structure.  The base
	 * size of the structure is the larger of the hvm clients state
	 * structure, or our shell state structure.  Then we'll align
	 * the end of the structure to a pointer boundry and append
	 * a xdfs_hvm_state_t structure.  This way the xdfs_hvm_state_t
	 * structure is always present and we can use it to determine the
	 * current device access mode (hvm or shell).
	 */
	xdfs_ss_size = MAX(xdfs_c_hvm_ss_size, sizeof (xdfs_state_t));
	xdfs_ss_size = P2ROUNDUP(xdfs_ss_size, sizeof (uintptr_t));
	xdfs_ss_size += sizeof (xdfs_hvm_state_t);

	/*
	 * In general ide usually supports 4 disk devices, this same
	 * limitation also applies to software emulating ide devices.
	 * so by default we pre-allocate 4 xdf shell soft state structures.
	 */
	if ((rval = ddi_soft_state_init(&xdfs_ssp,
	    xdfs_ss_size, XDFS_NODES)) != 0)
		return (rval);
	*xdfs_c_hvm_ss = xdfs_ssp;

	/* Install our module */
	if (modldrv.drv_linkinfo == NULL)
		modldrv.drv_linkinfo = (char *)xdfs_c_linkinfo;
	if ((rval = mod_install(&modlinkage)) != 0) {
		ddi_soft_state_fini(&xdfs_ssp);
		return (rval);
	}

	return (0);
}

int
_info(struct modinfo *modinfop)
{
	if (modldrv.drv_linkinfo == NULL)
		modldrv.drv_linkinfo = (char *)xdfs_c_linkinfo;
	return (mod_info(&modlinkage, modinfop));
}

int
_fini(void)
{
	int	rval;
	if ((rval = mod_remove(&modlinkage)) != 0)
		return (rval);
	ddi_soft_state_fini(&xdfs_ssp);
	return (0);
}