Mercurial > illumos > illumos-gate
changeset 4696:666103281afe
6531557 format(1m) does not work with virtual disks
6573657 vds type-conversion bug prevents raw disk accesses from working
6578918 disk image should have a device id
author | achartre |
---|---|
date | Fri, 20 Jul 2007 17:15:29 -0700 |
parents | 42666b3425e5 |
children | 0dd5086ea219 |
files | usr/src/uts/sun4v/io/vdc.c usr/src/uts/sun4v/io/vds.c usr/src/uts/sun4v/sys/vdsk_common.h |
diffstat | 3 files changed, 1065 insertions(+), 306 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/sun4v/io/vdc.c Fri Jul 20 15:17:01 2007 -0700 +++ b/usr/src/uts/sun4v/io/vdc.c Fri Jul 20 17:15:29 2007 -0700 @@ -81,6 +81,7 @@ #include <sys/cdio.h> #include <sys/dktp/fdisk.h> +#include <sys/dktp/dadkio.h> #include <sys/scsi/generic/sense.h> #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ @@ -571,7 +572,7 @@ status = vdc_setup_disk_layout(vdc); if (status != 0) { DMSG(vdc, 0, "[%d] Failed to discover disk layout (err%d)", - vdc->instance, status); + vdc->instance, status); goto return_status; } @@ -582,13 +583,13 @@ status = vdc_create_device_nodes(vdc); if (status) { DMSG(vdc, 0, "[%d] Failed to create device nodes", - instance); + instance); goto return_status; } status = vdc_create_device_nodes_props(vdc); if (status) { DMSG(vdc, 0, "[%d] Failed to create device nodes" - " properties (%d)", instance, status); + " properties (%d)", instance, status); goto return_status; } @@ -642,7 +643,7 @@ if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", - vdc->instance); + vdc->instance); return (EIO); } vdc->ldc_id = ldc_id; @@ -656,7 +657,7 @@ status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); if (status != 0) { DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", - vdc->instance, ldc_id, status); + vdc->instance, ldc_id, status); return (status); } vdc->initialized |= VDC_LDC_INIT; @@ -664,7 +665,7 @@ status = ldc_status(vdc->ldc_handle, &ldc_state); if (status != 0) { DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", - vdc->instance, status); + vdc->instance, status); return (status); } vdc->ldc_state = ldc_state; @@ -674,7 +675,7 @@ (caddr_t)vdc); if (status != 0) { DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", - vdc->instance, status); + vdc->instance, status); return (status); } vdc->initialized |= VDC_LDC_CB; @@ -690,7 +691,7 @@ status = ldc_open(vdc->ldc_handle); if (status != 0) { DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", - vdc->instance, vdc->ldc_id, status); + vdc->instance, vdc->ldc_id, status); return (status); } vdc->initialized |= VDC_LDC_OPEN; @@ -721,7 +722,7 @@ int status; DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", - vdcp->state); + vdcp->state); status = ldc_down(vdcp->ldc_handle); DMSG(vdcp, 0, "ldc_down() = %d\n", status); @@ -739,8 +740,8 @@ ddi_remove_minor_node(vdc->dip, "h,raw"); if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, - VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), - DDI_NT_BLOCK, 0) != DDI_SUCCESS) { + VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), + DDI_NT_BLOCK, 0) != DDI_SUCCESS) { cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", vdc->instance); return (EIO); @@ -750,8 +751,8 @@ vdc->initialized |= VDC_MINOR; if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, - VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), - DDI_NT_BLOCK, 0) != DDI_SUCCESS) { + VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), + DDI_NT_BLOCK, 0) != DDI_SUCCESS) { cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", vdc->instance); return (EIO); @@ -767,8 +768,8 @@ ddi_remove_minor_node(vdc->dip, "wd,raw"); if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, - VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), - DDI_NT_BLOCK, 0) != DDI_SUCCESS) { + VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), + DDI_NT_BLOCK, 0) != DDI_SUCCESS) { cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", vdc->instance); return (EIO); @@ -778,8 +779,8 @@ vdc->initialized |= VDC_MINOR; if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, - VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), - DDI_NT_BLOCK, 0) != DDI_SUCCESS) { + VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), + DDI_NT_BLOCK, 0) != DDI_SUCCESS) { cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", vdc->instance); return (EIO); @@ -858,19 +859,19 @@ if (ddi_create_minor_node(dip, name, S_IFBLK, VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", - instance, name); + instance, name); return (EIO); } /* if any device node is created we set this flag */ vdc->initialized |= VDC_MINOR; - (void) snprintf(name, sizeof (name), "%c%s", - 'a' + i, ",raw"); + (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); + if (ddi_create_minor_node(dip, name, S_IFCHR, VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", - instance, name); + instance, name); return (EIO); } } @@ -914,7 +915,7 @@ if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { DMSG(vdc, 0, "![%d] Could not create device node property." - " No VTOC available", instance); + " No VTOC available", instance); return (ENXIO); } @@ -932,25 +933,25 @@ for (i = 0; i < num_slices; i++) { dev = makedevice(ddi_driver_major(dip), - VD_MAKE_DEV(instance, i)); + VD_MAKE_DEV(instance, i)); size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", - instance, size, size / (1024 * 1024), - vdc->vtoc->v_part[i].p_size); + instance, size, size / (1024 * 1024), + vdc->vtoc->v_part[i].p_size); rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); if (rv != DDI_PROP_SUCCESS) { cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", - instance, VDC_SIZE_PROP_NAME, size); + instance, VDC_SIZE_PROP_NAME, size); return (EIO); } rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, - lbtodb(size)); + lbtodb(size)); if (rv != DDI_PROP_SUCCESS) { cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", - instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); + instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); return (EIO); } } @@ -978,7 +979,7 @@ } DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", - getminor(*dev), flag, otyp); + getminor(*dev), flag, otyp); mutex_enter(&vdc->lock); vdc->open_count++; @@ -1091,6 +1092,7 @@ vdc_t *vdc = NULL; int instance = VDCUNIT(buf->b_edev); int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; + int slice; if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); @@ -1106,8 +1108,15 @@ bp_mapin(buf); + if ((long)buf->b_private == VD_SLICE_NONE) { + /* I/O using an absolute disk offset */ + slice = VD_SLICE_NONE; + } else { + slice = VDCPART(buf->b_edev); + } + rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, - buf->b_bcount, VDCPART(buf->b_edev), buf->b_lblkno, + buf->b_bcount, slice, buf->b_lblkno, CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : VIO_write_dir); @@ -1239,9 +1248,8 @@ vdc->instance, status); if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " - "id(%lx) rv(%d) size(%ld)", - vdc->instance, vdc->ldc_handle, - status, msglen); + "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, + status, msglen); if (msglen != sizeof (vio_ver_msg_t)) status = ENOMSG; } @@ -1285,7 +1293,7 @@ if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", - vdcp->instance); + vdcp->instance); return (EPROTO); } @@ -1334,9 +1342,8 @@ if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " - "id(%lx) rv(%d) size(%ld)", - vdc->instance, vdc->ldc_handle, - status, msglen); + "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, + status, msglen); if (msglen != sizeof (vio_ver_msg_t)) status = ENOMSG; } @@ -1380,7 +1387,7 @@ if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", - vdcp->instance); + vdcp->instance); return (EPROTO); } @@ -1421,12 +1428,12 @@ if (status != 0) { DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", - vdc->instance, status); + vdc->instance, status); return (status); } DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", - vdc->instance, status); + vdc->instance, status); /* fill in tag */ pkt.tag.vio_msgtype = VIO_TYPE_CTRL; @@ -1444,7 +1451,7 @@ status = vdc_send(vdc, (caddr_t)&pkt, &msglen); if (status != 0) { DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", - vdc->instance, status); + vdc->instance, status); } return (status); @@ -1487,12 +1494,12 @@ if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", - vdcp->instance); + vdcp->instance); return (EPROTO); } return (vdc_handle_dring_reg_msg(vdcp, - (vio_dring_reg_msg_t *)&vio_msg)); + (vio_dring_reg_msg_t *)&vio_msg)); } @@ -1586,17 +1593,15 @@ status = vdc_wait_for_response(vdcp, &vio_msg); mutex_enter(&vdcp->lock); if (status) { - DMSG(vdcp, 0, - "[%d] Failed waiting for RDX response," - " rv(%d)", vdcp->instance, status); + DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", + vdcp->instance, status); return (status); } /* check type and sub_type ... */ if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { - DMSG(vdcp, 0, "[%d] Invalid RDX response\n", - vdcp->instance); + DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); return (EPROTO); } @@ -1655,7 +1660,7 @@ case 0: if (len == 0) { DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with " - "no error!\n", vdc->instance); + "no error!\n", vdc->instance); goto loop; } @@ -1867,7 +1872,7 @@ return (ENOENT); } obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, - OBP_REG, -1); + OBP_REG, -1); DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); /* @@ -1896,8 +1901,8 @@ * ones are disk nodes. */ num_vdevs = md_scan_dag(mdp, rootnode, - md_find_name(mdp, VDC_MD_VDEV_NAME), - md_find_name(mdp, "fwd"), listp); + md_find_name(mdp, VDC_MD_VDEV_NAME), + md_find_name(mdp, "fwd"), listp); if (num_vdevs <= 0) { cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); @@ -1910,14 +1915,14 @@ status = md_get_prop_str(mdp, listp[idx], "name", &node_name); if ((status != 0) || (node_name == NULL)) { cmn_err(CE_NOTE, "Unable to get name of node type '%s'" - ": err %d", VDC_MD_VDEV_NAME, status); + ": err %d", VDC_MD_VDEV_NAME, status); continue; } DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { status = md_get_prop_val(mdp, listp[idx], - VDC_MD_CFG_HDL, &md_inst); + VDC_MD_CFG_HDL, &md_inst); DMSGX(1, "[%d] vdc inst in MD=%lx\n", instance, md_inst); if ((status == 0) && (md_inst == obp_inst)) { @@ -1936,20 +1941,20 @@ /* get the channels for this node */ num_chans = md_scan_dag(mdp, listp[idx], - md_find_name(mdp, VDC_MD_CHAN_NAME), - md_find_name(mdp, "fwd"), chanp); + md_find_name(mdp, VDC_MD_CHAN_NAME), + md_find_name(mdp, "fwd"), chanp); /* expecting at least one channel */ if (num_chans <= 0) { cmn_err(CE_NOTE, "No '%s' node for '%s' port", - VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); + VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); status = ENOENT; goto done; } else if (num_chans != 1) { DMSGX(0, "[%d] Expected 1 '%s' node for '%s' port, found %d\n", - instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, - num_chans); + instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, + num_chans); } /* @@ -1958,7 +1963,7 @@ */ if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { cmn_err(CE_NOTE, "Channel '%s' property not found", - VDC_ID_PROP); + VDC_ID_PROP); status = ENOENT; } @@ -1991,7 +1996,7 @@ switch (status) { case ECONNREFUSED: /* listener not ready at other end */ DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", - vdc->instance, vdc->ldc_id, status); + vdc->instance, vdc->ldc_id, status); status = 0; break; default: @@ -2099,22 +2104,22 @@ */ if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { DMSG(vdc, 0, "[%d] using minimum DRing size\n", - vdc->instance); + vdc->instance); vdc->dring_max_cookies = maxphys / PAGESIZE; } else { vdc->dring_max_cookies = - (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; + (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; } vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + - (sizeof (ldc_mem_cookie_t) * - (vdc->dring_max_cookies - 1))); + (sizeof (ldc_mem_cookie_t) * + (vdc->dring_max_cookies - 1))); vdc->dring_len = VD_DRING_LEN; status = ldc_mem_dring_create(vdc->dring_len, - vdc->dring_entry_size, &vdc->ldc_dring_hdl); + vdc->dring_entry_size, &vdc->ldc_dring_hdl); if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { DMSG(vdc, 0, "[%d] Descriptor ring creation failed", - vdc->instance); + vdc->instance); return (status); } vdc->initialized |= VDC_DRING_INIT; @@ -2123,17 +2128,17 @@ if ((vdc->initialized & VDC_DRING_BOUND) == 0) { DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); vdc->dring_cookie = - kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); + kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, - LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, - &vdc->dring_cookie[0], - &vdc->dring_cookie_count); + LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, + &vdc->dring_cookie[0], + &vdc->dring_cookie_count); if (status != 0) { DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " - "(%lx) to channel (%lx) status=%d\n", - vdc->instance, vdc->ldc_dring_hdl, - vdc->ldc_handle, status); + "(%lx) to channel (%lx) status=%d\n", + vdc->instance, vdc->ldc_dring_hdl, + vdc->ldc_handle, status); return (status); } ASSERT(vdc->dring_cookie_count == 1); @@ -2153,8 +2158,8 @@ /* Allocate the local copy of this dring */ vdc->local_dring = - kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), - KM_SLEEP); + kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), + KM_SLEEP); vdc->initialized |= VDC_DRING_LOCAL; } @@ -2170,10 +2175,10 @@ dep->hdr.dstate = VIO_DESC_FREE; status = ldc_mem_alloc_handle(vdc->ldc_handle, - &vdc->local_dring[i].desc_mhdl); + &vdc->local_dring[i].desc_mhdl); if (status != 0) { DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" - " descriptor %d", vdc->instance, i); + " descriptor %d", vdc->instance, i); return (status); } vdc->local_dring[i].is_free = B_TRUE; @@ -2250,7 +2255,7 @@ if (vdc->initialized & VDC_DRING_LOCAL) { DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); kmem_free(vdc->local_dring, - vdc->dring_len * sizeof (vdc_local_desc_t)); + vdc->dring_len * sizeof (vdc_local_desc_t)); vdc->initialized &= ~VDC_DRING_LOCAL; } @@ -2261,7 +2266,7 @@ vdc->initialized &= ~VDC_DRING_BOUND; } else { DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", - vdc->instance, status, vdc->ldc_dring_hdl); + vdc->instance, status, vdc->ldc_dring_hdl); } kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); } @@ -2275,7 +2280,7 @@ vdc->initialized &= ~VDC_DRING_INIT; } else { DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", - vdc->instance, status, vdc->ldc_dring_hdl); + vdc->instance, status, vdc->ldc_dring_hdl); } } } @@ -2364,7 +2369,7 @@ void *cb_arg, vio_desc_direction_t dir) { ASSERT(vdcp != NULL); - ASSERT(slice < V_NUMPAR); + ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); mutex_enter(&vdcp->lock); @@ -2765,7 +2770,7 @@ dep = ldep->dep; ASSERT(dep != NULL); ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || - (dep->payload.status == ECANCELED)); + (dep->payload.status == ECANCELED)); VDC_MARK_DRING_ENTRY_FREE(vdc, idx); @@ -2795,14 +2800,14 @@ bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); kmem_free(ldep->align_addr, - sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); + sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); ldep->align_addr = NULL; } rv = ldc_mem_unbind_handle(ldep->desc_mhdl); if (rv != 0) { DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", - vdc->instance, ldep->desc_mhdl, idx, rv); + vdc->instance, ldep->desc_mhdl, idx, rv); /* * The error returned by the vDisk server is more informative * and thus has a higher priority but if it isn't set we ensure @@ -2879,8 +2884,8 @@ if (((uint64_t)vaddr & 0x7) != 0) { ASSERT(ldep->align_addr == NULL); ldep->align_addr = - kmem_alloc(sizeof (caddr_t) * - P2ROUNDUP(nbytes, 8), KM_SLEEP); + kmem_alloc(sizeof (caddr_t) * + P2ROUNDUP(nbytes, 8), KM_SLEEP); DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " "(buf=%p nb=%ld op=%d)\n", vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, @@ -2892,17 +2897,16 @@ maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), - maptype, perm, &dep->payload.cookie[0], - &dep->payload.ncookies); + maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", - vdcp->instance, dep->payload.ncookies); + vdcp->instance, dep->payload.ncookies); if (rv != 0) { DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " "(mhdl=%p, buf=%p, err=%d)\n", vdcp->instance, (void *)mhdl, (void *)vaddr, rv); if (ldep->align_addr) { kmem_free(ldep->align_addr, - sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); + sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); ldep->align_addr = NULL; } return (EAGAIN); @@ -2916,11 +2920,11 @@ if (rv != 0) { (void) ldc_mem_unbind_handle(mhdl); DMSG(vdcp, 0, "?[%d] Failed to get next cookie " - "(mhdl=%lx cnum=%d), err=%d", - vdcp->instance, mhdl, i, rv); + "(mhdl=%lx cnum=%d), err=%d", + vdcp->instance, mhdl, i, rv); if (ldep->align_addr) { kmem_free(ldep->align_addr, - sizeof (caddr_t) * ldep->nbytes); + sizeof (caddr_t) * ldep->nbytes); ldep->align_addr = NULL; } return (EAGAIN); @@ -3032,7 +3036,7 @@ if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", - vdc->instance, event); + vdc->instance, event); return (LDC_SUCCESS); } @@ -3064,16 +3068,16 @@ status = vdc_recv(vdcp, msgp, &nbytes); DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", - status, (int)nbytes); + status, (int)nbytes); if (status) { DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", - vdcp->instance, status); + vdcp->instance, status); return (status); } if (nbytes < sizeof (vio_msg_tag_t)) { DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", - vdcp->instance, sizeof (vio_msg_tag_t), nbytes); + vdcp->instance, sizeof (vio_msg_tag_t), nbytes); return (ENOMSG); } @@ -3091,11 +3095,11 @@ if ((msgp->tag.vio_sid != vdcp->session_id) && (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " - "expected 0x%lx [seq num %lx @ %d]", - vdcp->instance, msgp->tag.vio_sid, - vdcp->session_id, - ((vio_dring_msg_t *)msgp)->seq_num, - ((vio_dring_msg_t *)msgp)->start_idx); + "expected 0x%lx [seq num %lx @ %d]", + vdcp->instance, msgp->tag.vio_sid, + vdcp->session_id, + ((vio_dring_msg_t *)msgp)->seq_num, + ((vio_dring_msg_t *)msgp)->start_idx); return (ENOMSG); } return (0); @@ -3188,7 +3192,7 @@ /* all done - now clear up pending dring copy */ dring_size = vdcp->local_dring_backup_len * - sizeof (vdcp->local_dring_backup[0]); + sizeof (vdcp->local_dring_backup[0]); (void) kmem_free(vdcp->local_dring_backup, dring_size); @@ -3278,14 +3282,14 @@ #define Q(_s) (vdcp->state == _s) ? #_s : DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, - Q(VDC_STATE_INIT) - Q(VDC_STATE_INIT_WAITING) - Q(VDC_STATE_NEGOTIATE) - Q(VDC_STATE_HANDLE_PENDING) - Q(VDC_STATE_RUNNING) - Q(VDC_STATE_RESETTING) - Q(VDC_STATE_DETACH) - "UNKNOWN"); + Q(VDC_STATE_INIT) + Q(VDC_STATE_INIT_WAITING) + Q(VDC_STATE_NEGOTIATE) + Q(VDC_STATE_HANDLE_PENDING) + Q(VDC_STATE_RUNNING) + Q(VDC_STATE_RESETTING) + Q(VDC_STATE_DETACH) + "UNKNOWN"); switch (vdcp->state) { case VDC_STATE_INIT: @@ -3412,7 +3416,7 @@ if (status) break; DMSG(vdcp, 1, "[%d] new pkt(s) available\n", - vdcp->instance); + vdcp->instance); status = vdc_process_data_msg(vdcp, &msg); if (status) { DMSG(vdcp, 1, "[%d] process_data_msg " @@ -3492,7 +3496,7 @@ mutex_exit(&vdcp->lock); DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", - vdcp->instance); + vdcp->instance); thread_exit(); break; } @@ -3547,7 +3551,7 @@ if ((start >= vdcp->dring_len) || (end >= vdcp->dring_len) || (end < -1)) { DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", - vdcp->instance, start, end); + vdcp->instance, start, end); mutex_exit(&vdcp->lock); return (EINVAL); } @@ -3585,7 +3589,7 @@ ldep = &vdcp->local_dring[idx]; DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", - ldep->dep->hdr.dstate, ldep->cb_type); + ldep->dep->hdr.dstate, ldep->cb_type); if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { struct buf *bufp; @@ -3604,7 +3608,7 @@ bufp = ldep->cb_arg; ASSERT(bufp != NULL); bufp->b_resid = - bufp->b_bcount - ldep->dep->payload.nbytes; + bufp->b_bcount - ldep->dep->payload.nbytes; status = ldep->dep->payload.status; /* Future:ntoh */ if (status != 0) { DMSG(vdcp, 1, "strategy status=%d\n", status); @@ -3712,12 +3716,12 @@ status = vdc_send(vdc, (caddr_t)ver_msg, &len); DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", - vdc->instance, status); + vdc->instance, status); if (len != sizeof (*ver_msg)) status = EBADMSG; } else { - DMSG(vdc, 0, "[%d] No common version with " - "vDisk server", vdc->instance); + DMSG(vdc, 0, "[%d] No common version with vDisk server", + vdc->instance); status = ENOTSUP; } @@ -3772,10 +3776,10 @@ vdc->vdisk_type = attr_msg->vdisk_type; DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", - vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); + vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", - vdc->instance, vdc->block_size, - attr_msg->vdisk_block_size); + vdc->instance, vdc->block_size, + attr_msg->vdisk_block_size); /* * We don't know at compile time what the vDisk server will @@ -3786,19 +3790,19 @@ * was garbage. */ if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= - (PAGESIZE * DEV_BSIZE)) { + (PAGESIZE * DEV_BSIZE)) { vdc->max_xfer_sz = attr_msg->max_xfer_sz; vdc->block_size = attr_msg->vdisk_block_size; } else { DMSG(vdc, 0, "[%d] vds block transfer size too big;" - " using max supported by vdc", vdc->instance); + " using max supported by vdc", vdc->instance); } if ((attr_msg->xfer_mode != VIO_DRING_MODE) || (attr_msg->vdisk_size > INT64_MAX) || (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { DMSG(vdc, 0, "[%d] Invalid attributes from vds", - vdc->instance); + vdc->instance); status = EINVAL; break; } @@ -3859,7 +3863,7 @@ /* save the received dring_ident */ vdc->dring_ident = dring_msg->dring_ident; DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", - vdc->instance, vdc->dring_ident); + vdc->instance, vdc->dring_ident); break; case VIO_SUBTYPE_NACK: @@ -3923,10 +3927,10 @@ if ((dring_msg->seq_num <= vdc->seq_num_reply) || (dring_msg->seq_num > vdc->seq_num)) { DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " - "%lu > expected <= %lu (last proc req %lu sent %lu)\n", - vdc->instance, dring_msg->seq_num, - vdc->seq_num_reply, vdc->seq_num, - vdc->req_id_proc, vdc->req_id); + "%lu > expected <= %lu (last proc req %lu sent %lu)\n", + vdc->instance, dring_msg->seq_num, + vdc->seq_num_reply, vdc->seq_num, + vdc->req_id_proc, vdc->req_id); return (VDC_SEQ_NUM_INVALID); } vdc->seq_num_reply = dring_msg->seq_num; @@ -3992,7 +3996,7 @@ ver_msg->ver_major = vdc_version[i].major; ver_msg->ver_minor = vdc_version[i].minor; DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", - ver_msg->ver_major, ver_msg->ver_minor); + ver_msg->ver_major, ver_msg->ver_minor); return (B_FALSE); } @@ -4057,8 +4061,8 @@ VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); if (rv != 0) { DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", - vdc->instance, rv, - ddi_model_convert_from(dk_arg->mode & FMODELS)); + vdc->instance, rv, + ddi_model_convert_from(dk_arg->mode & FMODELS)); } /* @@ -4083,6 +4087,145 @@ } /* + * Function: + * vdc_dkio_get_partition() + * + * Description: + * This function implements the DKIOCGAPART ioctl. + * + * Arguments: + * dev - device + * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure + * flag - ioctl flags + */ +static int +vdc_dkio_get_partition(dev_t dev, caddr_t arg, int flag) +{ + struct dk_geom geom; + struct vtoc vtoc; + union { + struct dk_map map[NDKMAP]; + struct dk_map32 map32[NDKMAP]; + } data; + int i, rv, size; + + rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL); + if (rv != 0) + return (rv); + + rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, FKIOCTL); + if (rv != 0) + return (rv); + + if (vtoc.v_nparts != NDKMAP || + geom.dkg_nhead == 0 || geom.dkg_nsect == 0) + return (EINVAL); + + if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { + + for (i = 0; i < NDKMAP; i++) { + data.map32[i].dkl_cylno = vtoc.v_part[i].p_start / + (geom.dkg_nhead * geom.dkg_nsect); + data.map32[i].dkl_nblk = vtoc.v_part[i].p_size; + } + size = NDKMAP * sizeof (struct dk_map32); + + } else { + + for (i = 0; i < NDKMAP; i++) { + data.map[i].dkl_cylno = vtoc.v_part[i].p_start / + (geom.dkg_nhead * geom.dkg_nsect); + data.map[i].dkl_nblk = vtoc.v_part[i].p_size; + } + size = NDKMAP * sizeof (struct dk_map); + + } + + if (ddi_copyout(&data, arg, size, flag) != 0) + return (EFAULT); + + return (0); +} + +/* + * Function: + * vdc_dioctl_rwcmd() + * + * Description: + * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used + * for DKC_DIRECT disks to read or write at an absolute disk offset. + * + * Arguments: + * dev - device + * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure + * flag - ioctl flags + */ +static int +vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) +{ + struct dadkio_rwcmd32 rwcmd32; + struct dadkio_rwcmd rwcmd; + struct iovec aiov; + struct uio auio; + int rw, status; + struct buf *buf; + + if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { + if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, + sizeof (struct dadkio_rwcmd32), flag)) { + return (EFAULT); + } + rwcmd.cmd = rwcmd32.cmd; + rwcmd.flags = rwcmd32.flags; + rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; + rwcmd.buflen = rwcmd32.buflen; + rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; + } else { + if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, + sizeof (struct dadkio_rwcmd), flag)) { + return (EFAULT); + } + } + + switch (rwcmd.cmd) { + case DADKIO_RWCMD_READ: + rw = B_READ; + break; + case DADKIO_RWCMD_WRITE: + rw = B_WRITE; + break; + default: + return (EINVAL); + } + + bzero((caddr_t)&aiov, sizeof (struct iovec)); + aiov.iov_base = rwcmd.bufaddr; + aiov.iov_len = rwcmd.buflen; + + bzero((caddr_t)&auio, sizeof (struct uio)); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; + auio.uio_resid = rwcmd.buflen; + auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; + + buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); + bioinit(buf); + /* + * We use the private field of buf to specify that this is an + * I/O using an absolute offset. + */ + buf->b_private = (void *)VD_SLICE_NONE; + + status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); + + biofini(buf); + kmem_free(buf, sizeof (buf_t)); + + return (status); +} + +/* * This structure is used in the DKIO(7I) array below. */ typedef struct vdc_dk_ioctl { @@ -4122,6 +4265,9 @@ {VD_OP_SET_EFI, DKIOCSETEFI, 0, vdc_set_efi_convert}, + /* DIOCTL_RWCMD is converted to a read or a write */ + {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, + /* * These particular ioctls are not sent to the server - vdc fakes up * the necessary info. @@ -4129,6 +4275,7 @@ {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, + {0, DKIOCGAPART, 0, vdc_null_copy_func }, {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, {0, CDROMREADOFFSET, 0, vdc_null_copy_func} }; @@ -4176,7 +4323,7 @@ } DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", - instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); + instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); /* * Validate the ioctl operation to be performed. @@ -4220,6 +4367,22 @@ case USCSICMD: return (ENOTTY); + case DIOCTL_RWCMD: + { + if (vdc->cinfo->dki_ctype != DKC_DIRECT) + return (ENOTTY); + + return (vdc_dioctl_rwcmd(dev, arg, mode)); + } + + case DKIOCGAPART: + { + if (vdc->vdisk_label != VD_DISK_LABEL_VTOC) + return (ENOTSUP); + + return (vdc_dkio_get_partition(dev, arg, mode)); + } + case DKIOCINFO: { struct dk_cinfo cinfo; @@ -4230,7 +4393,7 @@ cinfo.dki_partition = VDCPART(dev); rv = ddi_copyout(&cinfo, (void *)arg, - sizeof (struct dk_cinfo), mode); + sizeof (struct dk_cinfo), mode); if (rv != 0) return (EFAULT); @@ -4243,7 +4406,7 @@ return (ENXIO); rv = ddi_copyout(vdc->minfo, (void *)arg, - sizeof (struct dk_minfo), mode); + sizeof (struct dk_minfo), mode); if (rv != 0) return (EFAULT); @@ -4303,7 +4466,7 @@ /* put the request on a task queue */ rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, - (void *)dkarg, DDI_SLEEP); + (void *)dkarg, DDI_SLEEP); if (rv == NULL) { /* clean up if dispatch fails */ mutex_enter(&vdc->lock); @@ -4344,7 +4507,7 @@ rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); if (rv != 0) { DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", - instance, rv, cmd); + instance, rv, cmd); if (mem_p != NULL) kmem_free(mem_p, alloc_len); return (rv); @@ -4364,7 +4527,7 @@ * that the ioctl is not applicable. */ DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", - instance, rv, cmd); + instance, rv, cmd); if (mem_p != NULL) kmem_free(mem_p, alloc_len); @@ -4429,7 +4592,7 @@ rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); if (rv != 0) { DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", - instance, rv, cmd); + instance, rv, cmd); if (mem_p != NULL) kmem_free(mem_p, alloc_len); return (rv); @@ -4838,7 +5001,12 @@ (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; - vdc->cinfo->dki_ctype = DKC_SCSI_CCS; + /* + * We currently set the controller type to DKC_DIRECT for any disk. + * When SCSI support is implemented, we will eventually change this + * type to DKC_SCSI_CCS for disks supporting the SCSI protocol. + */ + vdc->cinfo->dki_ctype = DKC_DIRECT; vdc->cinfo->dki_flags = DKI_FMTVOL; vdc->cinfo->dki_cnum = 0; vdc->cinfo->dki_addr = 0; @@ -4894,12 +5062,12 @@ vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); dev = makedevice(ddi_driver_major(vdc->dip), - VD_MAKE_DEV(vdc->instance, 0)); + VD_MAKE_DEV(vdc->instance, 0)); rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); if (rv && rv != ENOTSUP) { DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", - vdc->instance, rv); + vdc->instance, rv); return (rv); } @@ -5064,7 +5232,7 @@ /* build an encapsulated devid based on the returned devid */ if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, - vd_devid->id, &vdc->devid) != DDI_SUCCESS) { + vd_devid->id, &vdc->devid) != DDI_SUCCESS) { DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); kmem_free(vd_devid, bufsize); return (1);
--- a/usr/src/uts/sun4v/io/vds.c Fri Jul 20 15:17:01 2007 -0700 +++ b/usr/src/uts/sun4v/io/vds.c Fri Jul 20 17:15:29 2007 -0700 @@ -50,6 +50,7 @@ #include <sys/vtoc.h> #include <sys/vfs.h> #include <sys/stat.h> +#include <sys/scsi/impl/uscsi.h> #include <vm/seg_map.h> /* Virtual disk server initialization flags */ @@ -88,6 +89,12 @@ /* Flags for writing to a vdisk which is a file */ #define VD_FILE_WRITE_FLAGS SM_ASYNC +/* Number of backup labels */ +#define VD_FILE_NUM_BACKUP 5 + +/* Timeout for SCSI I/O */ +#define VD_SCSI_RDWR_TIMEOUT 30 /* 30 secs */ + /* * By Solaris convention, slice/partition 2 represents the entire disk; * unfortunately, this convention does not appear to be codified. @@ -118,17 +125,14 @@ (((vd)->xfer_mode == 0) ? "null client" : \ "unsupported client"))) -/* For IO to raw disk on file */ -#define VD_FILE_SLICE_NONE -1 - /* Read disk label from a disk on file */ #define VD_FILE_LABEL_READ(vd, labelp) \ - vd_file_rw(vd, VD_FILE_SLICE_NONE, VD_OP_BREAD, (caddr_t)labelp, \ + vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)labelp, \ 0, sizeof (struct dk_label)) /* Write disk label to a disk on file */ #define VD_FILE_LABEL_WRITE(vd, labelp) \ - vd_file_rw(vd, VD_FILE_SLICE_NONE, VD_OP_BWRITE, (caddr_t)labelp, \ + vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)labelp, \ 0, sizeof (struct dk_label)) /* @@ -314,6 +318,7 @@ boolean_t file; /* underlying file */ vnode_t *file_vnode; /* file vnode */ size_t file_size; /* file size */ + ddi_devid_t file_devid; /* devid for disk image */ struct dk_efi dk_efi; /* synthetic for slice type */ struct dk_geom dk_geom; /* synthetic for slice type */ struct vtoc vtoc; /* synthetic for slice type */ @@ -374,6 +379,8 @@ static uint_t vd_file_write_flags = VD_FILE_WRITE_FLAGS; +static short vd_scsi_rdwr_timeout = VD_SCSI_RDWR_TIMEOUT; + /* * Supported protocol version pairs, from highest (newest) to lowest (oldest) * @@ -399,8 +406,8 @@ * Parameters: * vd - disk on which the operation is performed. * slice - slice on which the operation is performed, - * VD_FILE_SLICE_NONE indicates that the operation - * is done on the raw disk. + * VD_SLICE_NONE indicates that the operation + * is done using an absolute disk offset. * operation - operation to execute: read (VD_OP_BREAD) or * write (VD_OP_BWRITE). * data - buffer where data are read to or written from. @@ -424,7 +431,7 @@ ASSERT(vd->file); ASSERT(len > 0); - if (slice == VD_FILE_SLICE_NONE) { + if (slice == VD_SLICE_NONE) { /* raw disk access */ offset = blk * DEV_BSIZE; } else { @@ -508,6 +515,383 @@ return (len); } +/* + * Function: + * vd_file_set_vtoc + * + * Description: + * Set the vtoc of a disk image by writing the label and backup + * labels into the disk image backend. + * + * Parameters: + * vd - disk on which the operation is performed. + * label - the data to be written. + * + * Return Code: + * 0 - success. + * n > 0 - error, n indicates the errno code. + */ +static int +vd_file_set_vtoc(vd_t *vd, struct dk_label *label) +{ + int blk, sec, cyl, head, cnt; + + ASSERT(vd->file); + + if (VD_FILE_LABEL_WRITE(vd, label) < 0) { + PR0("fail to write disk label"); + return (EIO); + } + + /* + * Backup labels are on the last alternate cylinder's + * first five odd sectors. + */ + if (label->dkl_acyl == 0) { + PR0("no alternate cylinder, can not store backup labels"); + return (0); + } + + cyl = label->dkl_ncyl + label->dkl_acyl - 1; + head = label->dkl_nhead - 1; + + blk = (cyl * ((label->dkl_nhead * label->dkl_nsect) - label->dkl_apc)) + + (head * label->dkl_nsect); + + /* + * Write the backup labels. Make sure we don't try to write past + * the last cylinder. + */ + sec = 1; + + for (cnt = 0; cnt < VD_FILE_NUM_BACKUP; cnt++) { + + if (sec >= label->dkl_nsect) { + PR0("not enough sector to store all backup labels"); + return (0); + } + + if (vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)label, + blk + sec, sizeof (struct dk_label)) < 0) { + PR0("error writing backup label at block %d\n", + blk + sec); + return (EIO); + } + + PR1("wrote backup label at block %d\n", blk + sec); + + sec += 2; + } + + return (0); +} + +/* + * Function: + * vd_file_get_devid_block + * + * Description: + * Return the block number where the device id is stored. + * + * Parameters: + * vd - disk on which the operation is performed. + * blkp - pointer to the block number + * + * Return Code: + * 0 - success + * ENOSPC - disk has no space to store a device id + */ +static int +vd_file_get_devid_block(vd_t *vd, size_t *blkp) +{ + diskaddr_t spc, head, cyl; + + ASSERT(vd->file); + ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); + + /* this geometry doesn't allow us to have a devid */ + if (vd->dk_geom.dkg_acyl < 2) { + PR0("not enough alternate cylinder available for devid " + "(acyl=%u)", vd->dk_geom.dkg_acyl); + return (ENOSPC); + } + + /* the devid is in on the track next to the last cylinder */ + cyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl - 2; + spc = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect; + head = vd->dk_geom.dkg_nhead - 1; + + *blkp = (cyl * (spc - vd->dk_geom.dkg_apc)) + + (head * vd->dk_geom.dkg_nsect) + 1; + + return (0); +} + +/* + * Return the checksum of a disk block containing an on-disk devid. + */ +static uint_t +vd_dkdevid2cksum(struct dk_devid *dkdevid) +{ + uint_t chksum, *ip; + int i; + + chksum = 0; + ip = (uint_t *)dkdevid; + for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int)); i++) + chksum ^= ip[i]; + + return (chksum); +} + +/* + * Function: + * vd_file_read_devid + * + * Description: + * Read the device id stored on a disk image. + * + * Parameters: + * vd - disk on which the operation is performed. + * devid - the return address of the device ID. + * + * Return Code: + * 0 - success + * EIO - I/O error while trying to access the disk image + * EINVAL - no valid device id was found + * ENOSPC - disk has no space to store a device id + */ +static int +vd_file_read_devid(vd_t *vd, ddi_devid_t *devid) +{ + struct dk_devid *dkdevid; + size_t blk; + uint_t chksum; + int status, sz; + + if ((status = vd_file_get_devid_block(vd, &blk)) != 0) + return (status); + + dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); + + /* get the devid */ + if ((vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)dkdevid, blk, + DEV_BSIZE)) < 0) { + PR0("error reading devid block at %lu", blk); + status = EIO; + goto done; + } + + /* validate the revision */ + if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) || + (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) { + PR0("invalid devid found at block %lu (bad revision)", blk); + status = EINVAL; + goto done; + } + + /* compute checksum */ + chksum = vd_dkdevid2cksum(dkdevid); + + /* compare the checksums */ + if (DKD_GETCHKSUM(dkdevid) != chksum) { + PR0("invalid devid found at block %lu (bad checksum)", blk); + status = EINVAL; + goto done; + } + + /* validate the device id */ + if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) { + PR0("invalid devid found at block %lu", blk); + status = EINVAL; + goto done; + } + + PR1("devid read at block %lu", blk); + + sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid); + *devid = kmem_alloc(sz, KM_SLEEP); + bcopy(&dkdevid->dkd_devid, *devid, sz); + +done: + kmem_free(dkdevid, DEV_BSIZE); + return (status); + +} + +/* + * Function: + * vd_file_write_devid + * + * Description: + * Write a device id into disk image. + * + * Parameters: + * vd - disk on which the operation is performed. + * devid - the device ID to store. + * + * Return Code: + * 0 - success + * EIO - I/O error while trying to access the disk image + * ENOSPC - disk has no space to store a device id + */ +static int +vd_file_write_devid(vd_t *vd, ddi_devid_t devid) +{ + struct dk_devid *dkdevid; + uint_t chksum; + size_t blk; + int status; + + if ((status = vd_file_get_devid_block(vd, &blk)) != 0) + return (status); + + dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP); + + /* set revision */ + dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB; + dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB; + + /* copy devid */ + bcopy(devid, &dkdevid->dkd_devid, ddi_devid_sizeof(devid)); + + /* compute checksum */ + chksum = vd_dkdevid2cksum(dkdevid); + + /* set checksum */ + DKD_FORMCHKSUM(chksum, dkdevid); + + /* store the devid */ + if ((status = vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, + (caddr_t)dkdevid, blk, DEV_BSIZE)) < 0) { + PR0("Error writing devid block at %lu", blk); + status = EIO; + } else { + PR1("devid written at block %lu", blk); + status = 0; + } + + kmem_free(dkdevid, DEV_BSIZE); + return (status); +} + +/* + * Function: + * vd_scsi_rdwr + * + * Description: + * Read or write to a SCSI disk using an absolute disk offset. + * + * Parameters: + * vd - disk on which the operation is performed. + * operation - operation to execute: read (VD_OP_BREAD) or + * write (VD_OP_BWRITE). + * data - buffer where data are read to or written from. + * blk - starting block for the operation. + * len - number of bytes to read or write. + * + * Return Code: + * 0 - success + * n != 0 - error. + */ +static int +vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len) +{ + struct uscsi_cmd ucmd; + union scsi_cdb cdb; + int nsectors, nblk; + int max_sectors; + int status, rval; + + ASSERT(!vd->file); + + max_sectors = vd->max_xfer_sz; + nblk = (len / DEV_BSIZE); + + if (len % DEV_BSIZE != 0) + return (EINVAL); + + /* + * Build and execute the uscsi ioctl. We build a group0, group1 + * or group4 command as necessary, since some targets + * do not support group1 commands. + */ + while (nblk) { + + bzero(&ucmd, sizeof (ucmd)); + bzero(&cdb, sizeof (cdb)); + + nsectors = (max_sectors < nblk) ? max_sectors : nblk; + + if (blk < (2 << 20) && nsectors <= 0xff) { + FORMG0ADDR(&cdb, blk); + FORMG0COUNT(&cdb, nsectors); + ucmd.uscsi_cdblen = CDB_GROUP0; + } else if (blk > 0xffffffff) { + FORMG4LONGADDR(&cdb, blk); + FORMG4COUNT(&cdb, nsectors); + ucmd.uscsi_cdblen = CDB_GROUP4; + cdb.scc_cmd |= SCMD_GROUP4; + } else { + FORMG1ADDR(&cdb, blk); + FORMG1COUNT(&cdb, nsectors); + ucmd.uscsi_cdblen = CDB_GROUP1; + cdb.scc_cmd |= SCMD_GROUP1; + } + + ucmd.uscsi_cdb = (caddr_t)&cdb; + ucmd.uscsi_bufaddr = data; + ucmd.uscsi_buflen = nsectors * DEV_BSIZE; + ucmd.uscsi_timeout = vd_scsi_rdwr_timeout; + /* + * Set flags so that the command is isolated from normal + * commands and no error message is printed. + */ + ucmd.uscsi_flags = USCSI_ISOLATE | USCSI_SILENT; + + if (operation == VD_OP_BREAD) { + cdb.scc_cmd |= SCMD_READ; + ucmd.uscsi_flags |= USCSI_READ; + } else { + cdb.scc_cmd |= SCMD_WRITE; + } + + status = ldi_ioctl(vd->ldi_handle[VD_ENTIRE_DISK_SLICE], + USCSICMD, (intptr_t)&ucmd, (vd_open_flags | FKIOCTL), + kcred, &rval); + + if (status == 0) + status = ucmd.uscsi_status; + + if (status != 0) + break; + + /* + * Check if partial DMA breakup is required. If so, reduce + * the request size by half and retry the last request. + */ + if (ucmd.uscsi_resid == ucmd.uscsi_buflen) { + max_sectors >>= 1; + if (max_sectors <= 0) { + status = EIO; + break; + } + continue; + } + + if (ucmd.uscsi_resid != 0) { + status = EIO; + break; + } + + blk += nsectors; + nblk -= nsectors; + data += nsectors * DEV_BSIZE; /* SECSIZE */ + } + + return (status); +} + static int vd_start_bio(vd_task_t *task) { @@ -523,7 +907,7 @@ slice = request->slice; - ASSERT(slice < vd->nslices); + ASSERT(slice == VD_SLICE_NONE || slice < vd->nslices); ASSERT((request->operation == VD_OP_BREAD) || (request->operation == VD_OP_BWRITE)); @@ -538,7 +922,7 @@ buf->b_flags = B_BUSY; buf->b_bcount = request->nbytes; buf->b_lblkno = request->addr; - buf->b_edev = vd->dev[slice]; + buf->b_edev = (slice == VD_SLICE_NONE)? NODEV : vd->dev[slice]; mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP; @@ -574,9 +958,32 @@ status = 0; } } else { - status = ldi_strategy(vd->ldi_handle[slice], buf); - if (status == 0) - return (EINPROGRESS); /* will complete on completionq */ + if (slice == VD_SLICE_NONE) { + /* + * This is not a disk image so it is a real disk. We + * assume that the underlying device driver supports + * USCSICMD ioctls. This is the case of all SCSI devices + * (sd, ssd...). + * + * In the future if we have non-SCSI disks we would need + * to invoke the appropriate function to do I/O using an + * absolute disk offset (for example using DKIOCTL_RWCMD + * for IDE disks). + */ + rv = vd_scsi_rdwr(vd, request->operation, + buf->b_un.b_addr, request->addr, request->nbytes); + if (rv != 0) { + request->nbytes = 0; + status = EIO; + } else { + status = 0; + } + } else { + status = ldi_strategy(vd->ldi_handle[slice], buf); + if (status == 0) + /* will complete on completionq */ + return (EINPROGRESS); + } } /* Clean up after error */ @@ -980,16 +1387,17 @@ return (sum); } +/* + * Handle ioctls to a disk slice. + */ static int vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) { dk_efi_t *dk_ioc; - struct dk_label label; - struct vtoc *vtoc; - int i; switch (vd->vdisk_label) { + /* ioctls for a slice from a disk with a VTOC label */ case VD_DISK_LABEL_VTOC: switch (cmd) { @@ -1001,70 +1409,11 @@ ASSERT(ioctl_arg != NULL); bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); return (0); - case DKIOCSVTOC: - if (!vd->file) - return (ENOTSUP); - ASSERT(ioctl_arg != NULL); - vtoc = (struct vtoc *)ioctl_arg; - - if (vtoc->v_sanity != VTOC_SANE || - vtoc->v_sectorsz != DEV_BSIZE || - vtoc->v_nparts != V_NUMPAR) - return (EINVAL); - - bzero(&label, sizeof (label)); - label.dkl_ncyl = vd->dk_geom.dkg_ncyl; - label.dkl_acyl = vd->dk_geom.dkg_acyl; - label.dkl_pcyl = vd->dk_geom.dkg_pcyl; - label.dkl_nhead = vd->dk_geom.dkg_nhead; - label.dkl_nsect = vd->dk_geom.dkg_nsect; - label.dkl_intrlv = vd->dk_geom.dkg_intrlv; - label.dkl_apc = vd->dk_geom.dkg_apc; - label.dkl_rpm = vd->dk_geom.dkg_rpm; - label.dkl_write_reinstruct = - vd->dk_geom.dkg_write_reinstruct; - label.dkl_read_reinstruct = - vd->dk_geom.dkg_read_reinstruct; - - label.dkl_vtoc.v_nparts = vtoc->v_nparts; - label.dkl_vtoc.v_sanity = vtoc->v_sanity; - label.dkl_vtoc.v_version = vtoc->v_version; - for (i = 0; i < vtoc->v_nparts; i++) { - label.dkl_vtoc.v_timestamp[i] = - vtoc->timestamp[i]; - label.dkl_vtoc.v_part[i].p_tag = - vtoc->v_part[i].p_tag; - label.dkl_vtoc.v_part[i].p_flag = - vtoc->v_part[i].p_flag; - label.dkl_map[i].dkl_cylno = - vtoc->v_part[i].p_start / - (label.dkl_nhead * label.dkl_nsect); - label.dkl_map[i].dkl_nblk = - vtoc->v_part[i].p_size; - } - bcopy(vtoc->v_asciilabel, label.dkl_asciilabel, - LEN_DKL_ASCII); - bcopy(vtoc->v_volume, label.dkl_vtoc.v_volume, - LEN_DKL_VVOL); - bcopy(vtoc->v_bootinfo, label.dkl_vtoc.v_bootinfo, - sizeof (vtoc->v_bootinfo)); - - /* re-compute checksum */ - label.dkl_magic = DKL_MAGIC; - label.dkl_cksum = vd_lbl2cksum(&label); - - /* write label to file */ - if (VD_FILE_LABEL_WRITE(vd, &label) < 0) - return (EIO); - - /* update the cached vdisk VTOC */ - bcopy(vtoc, &vd->vtoc, sizeof (vd->vtoc)); - - return (0); default: return (ENOTSUP); } + /* ioctls for a slice from a disk with an EFI label */ case VD_DISK_LABEL_EFI: switch (cmd) { @@ -1085,6 +1434,188 @@ } } +/* + * Handle ioctls to a disk image. + */ +static int +vd_do_file_ioctl(vd_t *vd, int cmd, void *ioctl_arg) +{ + struct dk_label label; + struct dk_geom *geom; + struct vtoc *vtoc; + int i, rc; + + ASSERT(vd->file); + ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); + + switch (cmd) { + + case DKIOCGGEOM: + ASSERT(ioctl_arg != NULL); + geom = (struct dk_geom *)ioctl_arg; + + if (VD_FILE_LABEL_READ(vd, &label) < 0) + return (EIO); + + if (label.dkl_magic != DKL_MAGIC || + label.dkl_cksum != vd_lbl2cksum(&label)) + return (EINVAL); + + bzero(geom, sizeof (struct dk_geom)); + geom->dkg_ncyl = label.dkl_ncyl; + geom->dkg_acyl = label.dkl_acyl; + geom->dkg_nhead = label.dkl_nhead; + geom->dkg_nsect = label.dkl_nsect; + geom->dkg_intrlv = label.dkl_intrlv; + geom->dkg_apc = label.dkl_apc; + geom->dkg_rpm = label.dkl_rpm; + geom->dkg_pcyl = label.dkl_pcyl; + geom->dkg_write_reinstruct = label.dkl_write_reinstruct; + geom->dkg_read_reinstruct = label.dkl_read_reinstruct; + + return (0); + + case DKIOCGVTOC: + ASSERT(ioctl_arg != NULL); + vtoc = (struct vtoc *)ioctl_arg; + + if (VD_FILE_LABEL_READ(vd, &label) < 0) + return (EIO); + + if (label.dkl_magic != DKL_MAGIC || + label.dkl_cksum != vd_lbl2cksum(&label)) + return (EINVAL); + + bzero(vtoc, sizeof (struct vtoc)); + + vtoc->v_sanity = label.dkl_vtoc.v_sanity; + vtoc->v_version = label.dkl_vtoc.v_version; + vtoc->v_sectorsz = DEV_BSIZE; + vtoc->v_nparts = label.dkl_vtoc.v_nparts; + + for (i = 0; i < vtoc->v_nparts; i++) { + vtoc->v_part[i].p_tag = + label.dkl_vtoc.v_part[i].p_tag; + vtoc->v_part[i].p_flag = + label.dkl_vtoc.v_part[i].p_flag; + vtoc->v_part[i].p_start = + label.dkl_map[i].dkl_cylno * + (label.dkl_nhead * label.dkl_nsect); + vtoc->v_part[i].p_size = label.dkl_map[i].dkl_nblk; + vtoc->timestamp[i] = + label.dkl_vtoc.v_timestamp[i]; + } + /* + * The bootinfo array can not be copied with bcopy() because + * elements are of type long in vtoc (so 64-bit) and of type + * int in dk_vtoc (so 32-bit). + */ + vtoc->v_bootinfo[0] = label.dkl_vtoc.v_bootinfo[0]; + vtoc->v_bootinfo[1] = label.dkl_vtoc.v_bootinfo[1]; + vtoc->v_bootinfo[2] = label.dkl_vtoc.v_bootinfo[2]; + bcopy(label.dkl_asciilabel, vtoc->v_asciilabel, + LEN_DKL_ASCII); + bcopy(label.dkl_vtoc.v_volume, vtoc->v_volume, + LEN_DKL_VVOL); + + return (0); + + case DKIOCSGEOM: + ASSERT(ioctl_arg != NULL); + geom = (struct dk_geom *)ioctl_arg; + + if (geom->dkg_nhead == 0 || geom->dkg_nsect == 0) + return (EINVAL); + + /* + * The current device geometry is not updated, just the driver + * "notion" of it. The device geometry will be effectively + * updated when a label is written to the device during a next + * DKIOCSVTOC. + */ + bcopy(ioctl_arg, &vd->dk_geom, sizeof (vd->dk_geom)); + return (0); + + case DKIOCSVTOC: + ASSERT(ioctl_arg != NULL); + ASSERT(vd->dk_geom.dkg_nhead != 0 && + vd->dk_geom.dkg_nsect != 0); + vtoc = (struct vtoc *)ioctl_arg; + + if (vtoc->v_sanity != VTOC_SANE || + vtoc->v_sectorsz != DEV_BSIZE || + vtoc->v_nparts != V_NUMPAR) + return (EINVAL); + + bzero(&label, sizeof (label)); + label.dkl_ncyl = vd->dk_geom.dkg_ncyl; + label.dkl_acyl = vd->dk_geom.dkg_acyl; + label.dkl_pcyl = vd->dk_geom.dkg_pcyl; + label.dkl_nhead = vd->dk_geom.dkg_nhead; + label.dkl_nsect = vd->dk_geom.dkg_nsect; + label.dkl_intrlv = vd->dk_geom.dkg_intrlv; + label.dkl_apc = vd->dk_geom.dkg_apc; + label.dkl_rpm = vd->dk_geom.dkg_rpm; + label.dkl_write_reinstruct = vd->dk_geom.dkg_write_reinstruct; + label.dkl_read_reinstruct = vd->dk_geom.dkg_read_reinstruct; + + label.dkl_vtoc.v_nparts = V_NUMPAR; + label.dkl_vtoc.v_sanity = VTOC_SANE; + label.dkl_vtoc.v_version = vtoc->v_version; + for (i = 0; i < V_NUMPAR; i++) { + label.dkl_vtoc.v_timestamp[i] = + vtoc->timestamp[i]; + label.dkl_vtoc.v_part[i].p_tag = + vtoc->v_part[i].p_tag; + label.dkl_vtoc.v_part[i].p_flag = + vtoc->v_part[i].p_flag; + label.dkl_map[i].dkl_cylno = + vtoc->v_part[i].p_start / + (label.dkl_nhead * label.dkl_nsect); + label.dkl_map[i].dkl_nblk = + vtoc->v_part[i].p_size; + } + /* + * The bootinfo array can not be copied with bcopy() because + * elements are of type long in vtoc (so 64-bit) and of type + * int in dk_vtoc (so 32-bit). + */ + label.dkl_vtoc.v_bootinfo[0] = vtoc->v_bootinfo[0]; + label.dkl_vtoc.v_bootinfo[1] = vtoc->v_bootinfo[1]; + label.dkl_vtoc.v_bootinfo[2] = vtoc->v_bootinfo[2]; + bcopy(vtoc->v_asciilabel, label.dkl_asciilabel, + LEN_DKL_ASCII); + bcopy(vtoc->v_volume, label.dkl_vtoc.v_volume, + LEN_DKL_VVOL); + + /* re-compute checksum */ + label.dkl_magic = DKL_MAGIC; + label.dkl_cksum = vd_lbl2cksum(&label); + + /* write label to the disk image */ + if ((rc = vd_file_set_vtoc(vd, &label)) != 0) + return (rc); + + /* update the cached vdisk VTOC */ + bcopy(vtoc, &vd->vtoc, sizeof (vd->vtoc)); + + /* + * The disk geometry may have changed, so we need to write + * the devid (if there is one) so that it is stored at the + * right location. + */ + if (vd->file_devid != NULL && + vd_file_write_devid(vd, vd->file_devid) != 0) { + PR0("Fail to write devid"); + } + + return (0); + + default: + return (ENOTSUP); + } +} + static int vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) { @@ -1100,8 +1631,8 @@ ASSERT(nbytes != 0 && buf != NULL); PR1("Getting \"arg\" data from client"); if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, - request->cookie, request->ncookies, - LDC_COPY_IN)) != 0) { + request->cookie, request->ncookies, + LDC_COPY_IN)) != 0) { PR0("ldc_mem_copy() returned errno %d " "copying from client", status); return (status); @@ -1118,13 +1649,17 @@ * Handle single-slice block devices internally; otherwise, have the * real driver perform the ioctl() */ - if (vd->file || (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo)) { + if (vd->file) { + if ((status = vd_do_file_ioctl(vd, ioctl->cmd, + (void *)ioctl->arg)) != 0) + return (status); + } else if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) { if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, - (void *)ioctl->arg)) != 0) + (void *)ioctl->arg)) != 0) return (status); } else if ((status = ldi_ioctl(vd->ldi_handle[request->slice], - ioctl->cmd, (intptr_t)ioctl->arg, (vd_open_flags | FKIOCTL), - kcred, &rval)) != 0) { + ioctl->cmd, (intptr_t)ioctl->arg, (vd_open_flags | FKIOCTL), + kcred, &rval)) != 0) { PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status); return (status); } @@ -1145,8 +1680,8 @@ (ioctl->copyout)((void *)ioctl->arg, buf); if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, - request->cookie, request->ncookies, - LDC_COPY_OUT)) != 0) { + request->cookie, request->ncookies, + LDC_COPY_OUT)) != 0) { PR0("ldc_mem_copy() returned errno %d " "copying to client", status); return (status); @@ -1160,7 +1695,7 @@ static int vd_ioctl(vd_task_t *task) { - int i, status, rc; + int i, status; void *buf = NULL; struct dk_geom dk_geom = {0}; struct vtoc vtoc = {0}; @@ -1246,15 +1781,7 @@ status = vd_do_ioctl(vd, request, buf, &ioctl[i]); if (request->nbytes) kmem_free(buf, request->nbytes); - if (!vd->file && vd->vdisk_type == VD_DISK_TYPE_DISK && - (request->operation == VD_OP_SET_VTOC || - request->operation == VD_OP_SET_EFI)) { - /* update disk information */ - rc = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc, - &vd->vdisk_label); - if (rc != 0) - PR0("vd_read_vtoc return error %d", rc); - } + PR0("Returning %d", status); return (status); } @@ -1266,21 +1793,26 @@ vd_dring_payload_t *request = task->request; vd_devid_t *vd_devid; impl_devid_t *devid; - int status, bufid_len, devid_len, len; + int status, bufid_len, devid_len, len, sz; int bufbytes; PR1("Get Device ID, nbytes=%ld", request->nbytes); if (vd->file) { - /* no devid for disk on file */ - return (ENOENT); - } - - if (ddi_lyr_get_devid(vd->dev[request->slice], - (ddi_devid_t *)&devid) != DDI_SUCCESS) { - /* the most common failure is that no devid is available */ - PR2("No Device ID"); - return (ENOENT); + if (vd->file_devid == NULL) { + PR2("No Device ID"); + return (ENOENT); + } else { + sz = ddi_devid_sizeof(vd->file_devid); + devid = kmem_alloc(sz, KM_SLEEP); + bcopy(vd->file_devid, devid, sz); + } + } else { + if (ddi_lyr_get_devid(vd->dev[request->slice], + (ddi_devid_t *)&devid) != DDI_SUCCESS) { + PR2("No Device ID"); + return (ENOENT); + } } bufid_len = request->nbytes - sizeof (vd_devid_t) + 1; @@ -1365,13 +1897,10 @@ return (ENOTSUP); } - /* Handle client using absolute disk offsets */ - if ((vd->vdisk_type == VD_DISK_TYPE_DISK) && - (request->slice == UINT8_MAX)) - request->slice = VD_ENTIRE_DISK_SLICE; - /* Range-check slice */ - if (request->slice >= vd->nslices) { + if (request->slice >= vd->nslices && + (vd->vdisk_type != VD_DISK_TYPE_DISK || + request->slice != VD_SLICE_NONE)) { PR0("Invalid \"slice\" %u (max %u) for virtual disk", request->slice, (vd->nslices - 1)); return (EINVAL); @@ -1382,7 +1911,7 @@ /* Start the operation */ if ((status = vds_operation[i].start(task)) != EINPROGRESS) { PR0("operation : %s returned status %d", - vds_operation[i].namep, status); + vds_operation[i].namep, status); request->status = status; /* op succeeded or failed */ return (0); /* but request completed */ } @@ -1415,8 +1944,8 @@ vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) { return ((tag->vio_msgtype == type) && - (tag->vio_subtype == subtype) && - (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; + (tag->vio_subtype == subtype) && + (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; } /* @@ -1492,7 +2021,7 @@ ASSERT(msglen >= sizeof (msg->tag)); if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, - VIO_VER_INFO)) { + VIO_VER_INFO)) { return (ENOMSG); /* not a version message */ } @@ -1561,7 +2090,7 @@ ASSERT(msglen >= sizeof (msg->tag)); if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, - VIO_ATTR_INFO)) { + VIO_ATTR_INFO)) { PR0("Message is not an attribute message"); return (ENOMSG); } @@ -1635,8 +2164,8 @@ size_t max_inband_msglen = sizeof (vd_dring_inband_msg_t) + ((max_xfer_bytes/PAGESIZE + - ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* - (sizeof (ldc_mem_cookie_t))); + ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* + (sizeof (ldc_mem_cookie_t))); /* * Set the maximum expected message length to @@ -1681,7 +2210,7 @@ ASSERT(msglen >= sizeof (msg->tag)); if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, - VIO_DRING_REG)) { + VIO_DRING_REG)) { PR0("Message is not a register-dring message"); return (ENOMSG); } @@ -1745,7 +2274,7 @@ ASSERT(reg_msg->ncookies == 1); if ((status = - ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { + ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) { PR0("ldc_mem_dring_info() returned errno %d", status); if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0) PR0("ldc_mem_dring_unmap() returned errno %d", status); @@ -1801,7 +2330,7 @@ ASSERT(msglen >= sizeof (msg->tag)); if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, - VIO_DRING_UNREG)) { + VIO_DRING_UNREG)) { PR0("Message is not an unregister-dring message"); return (ENOMSG); } @@ -1883,7 +2412,7 @@ ASSERT(msglen >= sizeof (msg->tag)); if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, - VIO_DESC_DATA)) { + VIO_DESC_DATA)) { PR1("Message is not an in-band-descriptor message"); return (ENOMSG); } @@ -2028,7 +2557,7 @@ ASSERT(msglen >= sizeof (msg->tag)); if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO, - VIO_DRING_DATA)) { + VIO_DRING_DATA)) { PR1("Message is not a dring-data message"); return (ENOMSG); } @@ -2065,7 +2594,7 @@ PR1("Processing descriptor range, start = %u, end = %u", dring_msg->start_idx, dring_msg->end_idx); return (vd_process_element_range(vd, dring_msg->start_idx, - dring_msg->end_idx, msg, msglen)); + dring_msg->end_idx, msg, msglen)); } static int @@ -2153,7 +2682,7 @@ case VIO_DRING_MODE: /* expect register-dring message */ if ((status = - vd_process_dring_reg_msg(vd, msg, msglen)) != 0) + vd_process_dring_reg_msg(vd, msg, msglen)) != 0) return (status); /* One dring negotiated, move to that state */ @@ -2183,7 +2712,7 @@ * support using more than one */ if ((status = - vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) + vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG) return (status); /* @@ -2206,7 +2735,7 @@ * them first */ if ((status = vd_process_dring_msg(vd, msg, - msglen)) != ENOMSG) + msglen)) != ENOMSG) return (status); /* @@ -2280,7 +2809,7 @@ } PR1("\tResulting in state %d (%s)", vd->state, - vd_decode_state(vd->state)); + vd_decode_state(vd->state)); /* Send the "ack" or "nack" to the client */ PR1("Sending %s", @@ -2349,14 +2878,14 @@ switch (status) { case 0: rv = vd_process_msg(vd, (vio_msg_t *)vd->vio_msgp, - msglen); + msglen); /* check if max_msglen changed */ if (msgsize != vd->max_msglen) { PR0("max_msglen changed 0x%lx to 0x%lx bytes\n", msgsize, vd->max_msglen); kmem_free(vd->vio_msgp, msgsize); vd->vio_msgp = - kmem_alloc(vd->max_msglen, KM_SLEEP); + kmem_alloc(vd->max_msglen, KM_SLEEP); } if (rv == EINPROGRESS) continue; @@ -2657,15 +3186,17 @@ ushort_t sum; vattr_t vattr; dev_t dev; + size_t size; char *file_path = vd->device_path; char dev_path[MAXPATHLEN + 1]; + char prefix; ldi_handle_t lhandle; struct dk_cinfo dk_cinfo; struct dk_label label; /* make sure the file is valid */ if ((status = lookupname(file_path, UIO_SYSSPACE, FOLLOW, - NULLVPP, &vd->file_vnode)) != 0) { + NULLVPP, &vd->file_vnode)) != 0) { PRN("Cannot lookup file(%s) errno %d", file_path, status); return (status); } @@ -2755,7 +3286,7 @@ label.dkl_acyl = 0; label.dkl_nsect = vd->file_size / - (DEV_BSIZE * label.dkl_pcyl); + (DEV_BSIZE * label.dkl_pcyl); label.dkl_ncyl = label.dkl_pcyl - label.dkl_acyl; label.dkl_nhead = 1; label.dkl_write_reinstruct = 0; @@ -2770,14 +3301,29 @@ label.dkl_nhead, label.dkl_nsect); PR0("provided disk size: %ld bytes\n", (uint64_t) (label.dkl_pcyl * - label.dkl_nhead * label.dkl_nsect * DEV_BSIZE)); + label.dkl_nhead * label.dkl_nsect * DEV_BSIZE)); + + if (vd->file_size < (1ULL << 20)) { + size = vd->file_size >> 10; + prefix = 'K'; /* Kilobyte */ + } else if (vd->file_size < (1ULL << 30)) { + size = vd->file_size >> 20; + prefix = 'M'; /* Megabyte */ + } else if (vd->file_size < (1ULL << 40)) { + size = vd->file_size >> 30; + prefix = 'G'; /* Gigabyte */ + } else { + size = vd->file_size >> 40; + prefix = 'T'; /* Terabyte */ + } /* * We must have a correct label name otherwise format(1m) will * not recognized the disk as labeled. */ (void) snprintf(label.dkl_asciilabel, LEN_DKL_ASCII, - "SUNVDSK cyl %d alt %d hd %d sec %d", + "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d", + size, prefix, label.dkl_ncyl, label.dkl_acyl, label.dkl_nhead, label.dkl_nsect); @@ -2788,23 +3334,22 @@ label.dkl_vtoc.v_part[2].p_tag = V_BACKUP; label.dkl_map[2].dkl_cylno = 0; label.dkl_map[2].dkl_nblk = label.dkl_ncyl * - label.dkl_nhead * label.dkl_nsect; + label.dkl_nhead * label.dkl_nsect; label.dkl_map[0] = label.dkl_map[2]; label.dkl_map[0] = label.dkl_map[2]; label.dkl_cksum = vd_lbl2cksum(&label); /* write default label to file */ - if (VD_FILE_LABEL_WRITE(vd, &label) < 0) { + if ((rval = vd_file_set_vtoc(vd, &label)) != 0) { PRN("Can't write label to %s", file_path); - return (EIO); + return (rval); } } vd->nslices = label.dkl_vtoc.v_nparts; /* sector size = block size = DEV_BSIZE */ - vd->vdisk_size = (label.dkl_pcyl * - label.dkl_nhead * label.dkl_nsect) / DEV_BSIZE; + vd->vdisk_size = vd->file_size / DEV_BSIZE; vd->vdisk_type = VD_DISK_TYPE_DISK; vd->vdisk_label = VD_DISK_LABEL_VTOC; vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */ @@ -2817,7 +3362,7 @@ } if ((status = ldi_open_by_dev(&dev, OTYP_BLK, FREAD, - kcred, &lhandle, vd->vds->ldi_ident)) != 0) { + kcred, &lhandle, vd->vds->ldi_ident)) != 0) { PR0("ldi_open_by_dev() returned errno %d for device %s", status, dev_path); } else { @@ -2867,12 +3412,52 @@ vd->vtoc.v_part[i].p_tag = label.dkl_vtoc.v_part[i].p_tag; vd->vtoc.v_part[i].p_flag = label.dkl_vtoc.v_part[i].p_flag; vd->vtoc.v_part[i].p_start = label.dkl_map[i].dkl_cylno * - label.dkl_nhead * label.dkl_nsect; + label.dkl_nhead * label.dkl_nsect; vd->vtoc.v_part[i].p_size = label.dkl_map[i].dkl_nblk; vd->ldi_handle[i] = NULL; vd->dev[i] = NULL; } + /* Setup devid for the disk image */ + + status = vd_file_read_devid(vd, &vd->file_devid); + + if (status == 0) { + /* a valid devid was found */ + return (0); + } + + if (status != EINVAL) { + /* + * There was an error while trying to read the devid. So this + * disk image may have a devid but we are unable to read it. + */ + PR0("can not read devid for %s", file_path); + vd->file_devid = NULL; + return (0); + } + + /* + * No valid device id was found so we create one. Note that a failure + * to create a device id is not fatal and does not prevent the disk + * image from being attached. + */ + PR1("creating devid for %s", file_path); + + if (ddi_devid_init(vd->vds->dip, DEVID_FAB, NULL, 0, + &vd->file_devid) != DDI_SUCCESS) { + PR0("fail to create devid for %s", file_path); + vd->file_devid = NULL; + return (0); + } + + /* write devid to the disk image */ + if (vd_file_write_devid(vd, vd->file_devid) != 0) { + PR0("fail to write devid for %s", file_path); + ddi_devid_free(vd->file_devid); + vd->file_devid = NULL; + } + return (0); } @@ -2928,8 +3513,8 @@ /* Verify backing device supports dk_cinfo, dk_geom, and vtoc */ if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, - (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, - &rval)) != 0) { + (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, + &rval)) != 0) { PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", status, device_path); return (status); @@ -2952,9 +3537,9 @@ (status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, (intptr_t)&vd->dk_geom, (vd_open_flags | FKIOCTL), kcred, &rval)) != 0) { - PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", - status, device_path); - return (status); + PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", + status, device_path); + return (status); } /* Store the device's max transfer size for return to the client */ @@ -2962,7 +3547,7 @@ /* Determine if backing device is a pseudo device */ if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]), - dev_to_instance(vd->dev[0]), 0)) == NULL) { + dev_to_instance(vd->dev[0]), 0)) == NULL) { PRN("%s is no longer accessible", device_path); return (EIO); } @@ -3059,7 +3644,7 @@ /* Initialize locking */ if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED, - &iblock) != DDI_SUCCESS) { + &iblock) != DDI_SUCCESS) { PRN("Could not get iblock cookie."); return (EIO); } @@ -3072,14 +3657,14 @@ (void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id); PR1("tq_name = %s", tq_name); if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1, - TASKQ_DEFAULTPRI, 0)) == NULL) { + TASKQ_DEFAULTPRI, 0)) == NULL) { PRN("Could not create task queue"); return (EIO); } (void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id); PR1("tq_name = %s", tq_name); if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1, - TASKQ_DEFAULTPRI, 0)) == NULL) { + TASKQ_DEFAULTPRI, 0)) == NULL) { PRN("Could not create task queue"); return (EIO); } @@ -3099,7 +3684,7 @@ vd->initialized |= VD_LDC; if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events, - (caddr_t)vd)) != 0) { + (caddr_t)vd)) != 0) { PRN("Could not initialize LDC channel %lu," "reg_callback failed with error %d", ldc_id, status); return (status); @@ -3240,6 +3825,8 @@ (void) VOP_CLOSE(vd->file_vnode, vd_open_flags, 1, 0, kcred); VN_RELE(vd->file_vnode); + if (vd->file_devid != NULL) + ddi_devid_free(vd->file_devid); } else { /* Close any open backing-device slices */ for (uint_t slice = 0; slice < vd->nslices; slice++) { @@ -3281,8 +3868,8 @@ /* Look for channel endpoint child(ren) of the vdisk MD node */ if ((num_channels = md_scan_dag(md, vd_node, - md_find_name(md, VD_CHANNEL_ENDPOINT), - md_find_name(md, "fwd"), channel)) <= 0) { + md_find_name(md, VD_CHANNEL_ENDPOINT), + md_find_name(md, "fwd"), channel)) <= 0) { PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT); return (-1); } @@ -3334,7 +3921,7 @@ } PR0("Adding vdisk ID %lu", id); if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP, - &device_path) != 0) { + &device_path) != 0) { PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); return; } @@ -3411,13 +3998,13 @@ /* Determine whether device path has changed */ if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP, - &prev_dev) != 0) { + &prev_dev) != 0) { PRN("Error getting previous vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); return; } if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP, - &curr_dev) != 0) { + &curr_dev) != 0) { PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP); return; } @@ -3483,7 +4070,7 @@ * broken that there is no point in continuing. */ if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, - VD_REG_PROP)) { + VD_REG_PROP)) { PRN("vds \"%s\" property does not exist", VD_REG_PROP); return (DDI_FAILURE); } @@ -3503,11 +4090,10 @@ return (DDI_FAILURE); } - vds->dip = dip; vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS, - vds_destroy_vd, - sizeof (void *)); + vds_destroy_vd, sizeof (void *)); + ASSERT(vds->vd_table != NULL); if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) { @@ -3529,7 +4115,7 @@ ispecp->specp = pspecp; if (mdeg_register(ispecp, &vd_match, vds_process_md, vds, - &vds->mdeg) != MDEG_SUCCESS) { + &vds->mdeg) != MDEG_SUCCESS) { PRN("Unable to register for MD updates"); kmem_free(ispecp, sizeof (mdeg_node_spec_t)); kmem_free(pspecp, sz);
--- a/usr/src/uts/sun4v/sys/vdsk_common.h Fri Jul 20 15:17:01 2007 -0700 +++ b/usr/src/uts/sun4v/sys/vdsk_common.h Fri Jul 20 17:15:29 2007 -0700 @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -108,6 +108,11 @@ #define VD_OP_COUNT 13 /* Number of operations */ /* + * Slice for absolute disk transaction. + */ +#define VD_SLICE_NONE 0xFF + +/* * EFI disks do not have a slice 7. Actually that slice is used to represent * the whole disk. */