Mercurial > illumos > illumos-gate
changeset 4963:ce6338ba4a73
6531913 vds can lose access to vdisks built from files located on the root fs
6575050 vds should support unformatted disks
author | achartre |
---|---|
date | Thu, 30 Aug 2007 07:43:53 -0700 |
parents | 44219572abba |
children | a9481fc76e88 |
files | usr/src/uts/sun4v/io/vdc.c usr/src/uts/sun4v/io/vds.c usr/src/uts/sun4v/sys/vdc.h |
diffstat | 3 files changed, 839 insertions(+), 489 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/sun4v/io/vdc.c Thu Aug 30 01:17:19 2007 -0700 +++ b/usr/src/uts/sun4v/io/vdc.c Thu Aug 30 07:43:53 2007 -0700 @@ -132,7 +132,10 @@ static int vdc_init_descriptor_ring(vdc_t *vdc); static void vdc_destroy_descriptor_ring(vdc_t *vdc); static int vdc_setup_devid(vdc_t *vdc); -static void vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi); +static void vdc_store_label_efi(vdc_t *vdc, struct dk_gpt *efi); +static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, struct vtoc *); +static void vdc_store_label_unk(vdc_t *vdc); +static boolean_t vdc_is_opened(vdc_t *vdc); /* handshake with vds */ static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); @@ -174,8 +177,10 @@ /* dkio */ static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); -static int vdc_create_fake_geometry(vdc_t *vdc); -static int vdc_setup_disk_layout(vdc_t *vdc); +static void vdc_create_fake_geometry(vdc_t *vdc); +static int vdc_validate_geometry(vdc_t *vdc); +static void vdc_validate(vdc_t *vdc); +static void vdc_validate_task(void *arg); static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir); static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, @@ -385,11 +390,25 @@ return (DDI_FAILURE); } - if (vdc->open_count) { + if (vdc_is_opened(vdc)) { DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); return (DDI_FAILURE); } + if (vdc->dkio_flush_pending) { + DMSG(vdc, 0, + "[%d] Cannot detach: %d outstanding DKIO flushes\n", + instance, vdc->dkio_flush_pending); + return (DDI_FAILURE); + } + + if (vdc->validate_pending) { + DMSG(vdc, 0, + "[%d] Cannot detach: %d outstanding validate request\n", + instance, vdc->validate_pending); + return (DDI_FAILURE); + } + DMSG(vdc, 0, "[%d] proceeding...\n", instance); /* mark instance as detaching */ @@ -465,8 +484,8 @@ if (vdc->vtoc) kmem_free(vdc->vtoc, sizeof (struct vtoc)); - if (vdc->label) - kmem_free(vdc->label, DK_LABEL_SIZE); + if (vdc->geom) + kmem_free(vdc->geom, sizeof (struct dk_geom)); if (vdc->devid) { ddi_devid_unregister(dip); @@ -518,7 +537,6 @@ vdc->dip = dip; vdc->instance = instance; - vdc->open_count = 0; vdc->vdisk_type = VD_DISK_TYPE_UNK; vdc->vdisk_label = VD_DISK_LABEL_UNK; vdc->state = VDC_STATE_INIT; @@ -529,6 +547,7 @@ vdc->max_xfer_sz = maxphys / DEV_BSIZE; vdc->vtoc = NULL; + vdc->geom = NULL; vdc->cinfo = NULL; vdc->minfo = NULL; @@ -588,16 +607,18 @@ atomic_inc_32(&vdc_instance_count); /* - * Once the handshake is complete, we can use the DRing to send - * requests to the vDisk server to calculate the geometry and - * VTOC of the "disk" + * Check the disk label. This will send requests and do the handshake. + * We don't really care about the disk label now. What we really need is + * the handshake do be done so that we know the type of the disk (slice + * or full disk) and the appropriate device nodes can be created. */ - status = vdc_setup_disk_layout(vdc); - if (status != 0) { - DMSG(vdc, 0, "[%d] Failed to discover disk layout (err%d)", - vdc->instance, status); - goto return_status; - } + vdc->vdisk_label = VD_DISK_LABEL_UNK; + vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); + vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); + + mutex_enter(&vdc->lock); + (void) vdc_validate_geometry(vdc); + mutex_exit(&vdc->lock); /* * Now that we have the device info we can create the @@ -933,16 +954,11 @@ int i; ASSERT(vdc != NULL); + ASSERT(vdc->vtoc != NULL); instance = vdc->instance; dip = vdc->dip; - if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { - DMSG(vdc, 0, "![%d] Could not create device node property." - " No VTOC available", instance); - return (ENXIO); - } - switch (vdc->vdisk_type) { case VD_DISK_TYPE_DISK: num_slices = V_NUMPAR; @@ -955,6 +971,17 @@ return (EINVAL); } + if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { + /* remove all properties */ + for (i = 0; i < num_slices; i++) { + dev = makedevice(ddi_driver_major(dip), + VD_MAKE_DEV(instance, i)); + (void) ddi_prop_remove(dev, dip, VDC_SIZE_PROP_NAME); + (void) ddi_prop_remove(dev, dip, VDC_NBLOCKS_PROP_NAME); + } + return (0); + } + for (i = 0; i < num_slices; i++) { dev = makedevice(ddi_driver_major(dip), VD_MAKE_DEV(instance, i)); @@ -983,18 +1010,125 @@ return (0); } +/* + * Function: + * vdc_is_opened + * + * Description: + * This function checks if any slice of a given virtual disk is + * currently opened. + * + * Parameters: + * vdc - soft state pointer + * + * Return Values + * B_TRUE - at least one slice is opened. + * B_FALSE - no slice is opened. + */ +static boolean_t +vdc_is_opened(vdc_t *vdc) +{ + int i, nslices; + + switch (vdc->vdisk_type) { + case VD_DISK_TYPE_DISK: + nslices = V_NUMPAR; + break; + case VD_DISK_TYPE_SLICE: + nslices = 1; + break; + case VD_DISK_TYPE_UNK: + default: + ASSERT(0); + } + + /* check if there's any layered open */ + for (i = 0; i < nslices; i++) { + if (vdc->open_lyr[i] > 0) + return (B_TRUE); + } + + /* check if there is any other kind of open */ + for (i = 0; i < OTYPCNT; i++) { + if (vdc->open[i] != 0) + return (B_TRUE); + } + + return (B_FALSE); +} + +static int +vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) +{ + uint8_t slicemask; + int i; + + ASSERT(otyp < OTYPCNT); + ASSERT(slice < V_NUMPAR); + ASSERT(MUTEX_HELD(&vdc->lock)); + + slicemask = 1 << slice; + + /* check if slice is already exclusively opened */ + if (vdc->open_excl & slicemask) + return (EBUSY); + + /* if open exclusive, check if slice is already opened */ + if (flag & FEXCL) { + if (vdc->open_lyr[slice] > 0) + return (EBUSY); + for (i = 0; i < OTYPCNT; i++) { + if (vdc->open[i] & slicemask) + return (EBUSY); + } + vdc->open_excl |= slicemask; + } + + /* mark slice as opened */ + if (otyp == OTYP_LYR) { + vdc->open_lyr[slice]++; + } else { + vdc->open[otyp] |= slicemask; + } + + return (0); +} + +static void +vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) +{ + uint8_t slicemask; + + ASSERT(otyp < OTYPCNT); + ASSERT(slice < V_NUMPAR); + ASSERT(MUTEX_HELD(&vdc->lock)); + + slicemask = 1 << slice; + + if (otyp == OTYP_LYR) { + ASSERT(vdc->open_lyr[slice] > 0); + vdc->open_lyr[slice]--; + } else { + vdc->open[otyp] &= ~slicemask; + } + + if (flag & FEXCL) + vdc->open_excl &= ~slicemask; +} + static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) { _NOTE(ARGUNUSED(cred)) - int instance; - vdc_t *vdc; + int instance; + int slice, status = 0; + vdc_t *vdc; ASSERT(dev != NULL); instance = VDCUNIT(*dev); - if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) + if (otyp >= OTYPCNT) return (EINVAL); if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { @@ -1005,11 +1139,53 @@ DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", getminor(*dev), flag, otyp); + slice = VDCPART(*dev); + mutex_enter(&vdc->lock); - vdc->open_count++; + + status = vdc_mark_opened(vdc, slice, flag, otyp); + + if (status != 0) { + mutex_exit(&vdc->lock); + return (status); + } + + if (flag & (FNDELAY | FNONBLOCK)) { + + /* don't resubmit a validate request if there's already one */ + if (vdc->validate_pending > 0) { + mutex_exit(&vdc->lock); + return (0); + } + + /* call vdc_validate() asynchronously to avoid blocking */ + if (taskq_dispatch(system_taskq, vdc_validate_task, + (void *)vdc, TQ_NOSLEEP) == NULL) { + vdc_mark_closed(vdc, slice, flag, otyp); + mutex_exit(&vdc->lock); + return (ENXIO); + } + + vdc->validate_pending++; + mutex_exit(&vdc->lock); + return (0); + } + mutex_exit(&vdc->lock); - return (0); + vdc_validate(vdc); + + mutex_enter(&vdc->lock); + + if (vdc->vdisk_label == VD_DISK_LABEL_UNK || + vdc->vtoc->v_part[slice].p_size == 0) { + vdc_mark_closed(vdc, slice, flag, otyp); + status = EIO; + } + + mutex_exit(&vdc->lock); + + return (status); } static int @@ -1018,11 +1194,12 @@ _NOTE(ARGUNUSED(cred)) int instance; + int slice; vdc_t *vdc; instance = VDCUNIT(dev); - if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) + if (otyp >= OTYPCNT) return (EINVAL); if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { @@ -1031,19 +1208,11 @@ } DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); - if (vdc->dkio_flush_pending) { - DMSG(vdc, 0, - "[%d] Cannot detach: %d outstanding DKIO flushes\n", - instance, vdc->dkio_flush_pending); - return (EBUSY); - } - - /* - * Should not need the mutex here, since the framework should protect - * against more opens on this device, but just in case. - */ + + slice = VDCPART(dev); + mutex_enter(&vdc->lock); - vdc->open_count--; + vdc_mark_closed(vdc, slice, flag, otyp); mutex_exit(&vdc->lock); return (0); @@ -4072,6 +4241,32 @@ /* * We now verify the attributes sent by vds. */ + if (attr_msg->vdisk_size == 0) { + DMSG(vdc, 0, "[%d] Invalid disk size from vds", + vdc->instance); + status = EINVAL; + break; + } + + if (attr_msg->max_xfer_sz == 0) { + DMSG(vdc, 0, "[%d] Invalid transfer size from vds", + vdc->instance); + status = EINVAL; + break; + } + + /* + * If the disk size is already set check that it hasn't changed. + */ + if ((vdc->vdisk_size != 0) && + (vdc->vdisk_size != attr_msg->vdisk_size)) { + DMSG(vdc, 0, "[%d] Different disk size from vds " + "(old=0x%lx - new=0x%lx", vdc->instance, + vdc->vdisk_size, attr_msg->vdisk_size) + status = EINVAL; + break; + } + vdc->vdisk_size = attr_msg->vdisk_size; vdc->vdisk_type = attr_msg->vdisk_type; @@ -4107,6 +4302,11 @@ break; } + /* + * Now that we have received all attributes we can create a + * fake geometry for the disk. + */ + vdc_create_fake_geometry(vdc); break; case VIO_SUBTYPE_NACK: @@ -4394,53 +4594,53 @@ * This function implements the DKIOCGAPART ioctl. * * Arguments: - * dev - device + * vdc - soft state pointer * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure * flag - ioctl flags */ static int -vdc_dkio_get_partition(dev_t dev, caddr_t arg, int flag) +vdc_dkio_get_partition(vdc_t *vdc, caddr_t arg, int flag) { - struct dk_geom geom; - struct vtoc vtoc; + struct dk_geom *geom; + struct vtoc *vtoc; union { struct dk_map map[NDKMAP]; struct dk_map32 map32[NDKMAP]; } data; int i, rv, size; - rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL); - if (rv != 0) + mutex_enter(&vdc->lock); + + if ((rv = vdc_validate_geometry(vdc)) != 0) { + mutex_exit(&vdc->lock); return (rv); - - rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, FKIOCTL); - if (rv != 0) - return (rv); - - if (vtoc.v_nparts != NDKMAP || - geom.dkg_nhead == 0 || geom.dkg_nsect == 0) - return (EINVAL); + } + + vtoc = vdc->vtoc; + geom = vdc->geom; if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { - for (i = 0; i < NDKMAP; i++) { - data.map32[i].dkl_cylno = vtoc.v_part[i].p_start / - (geom.dkg_nhead * geom.dkg_nsect); - data.map32[i].dkl_nblk = vtoc.v_part[i].p_size; + for (i = 0; i < vtoc->v_nparts; i++) { + data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / + (geom->dkg_nhead * geom->dkg_nsect); + data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; } size = NDKMAP * sizeof (struct dk_map32); } else { - for (i = 0; i < NDKMAP; i++) { - data.map[i].dkl_cylno = vtoc.v_part[i].p_start / - (geom.dkg_nhead * geom.dkg_nsect); - data.map[i].dkl_nblk = vtoc.v_part[i].p_size; + for (i = 0; i < vtoc->v_nparts; i++) { + data.map[i].dkl_cylno = vtoc->v_part[i].p_start / + (geom->dkg_nhead * geom->dkg_nsect); + data.map[i].dkl_nblk = vtoc->v_part[i].p_size; } size = NDKMAP * sizeof (struct dk_map); } + mutex_exit(&vdc->lock); + if (ddi_copyout(&data, arg, size, flag) != 0) return (EFAULT); @@ -4612,7 +4812,6 @@ size_t alloc_len = 0; /* #bytes to allocate mem for */ caddr_t mem_p = NULL; size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); - struct vtoc vtoc_saved; vdc_dk_ioctl_t *iop; vdc = ddi_get_soft_state(vdc_state, instance); @@ -4669,6 +4868,9 @@ case DIOCTL_RWCMD: { + if (vdc->cinfo == NULL) + return (ENXIO); + if (vdc->cinfo->dki_ctype != DKC_DIRECT) return (ENOTTY); @@ -4677,10 +4879,7 @@ case DKIOCGAPART: { - if (vdc->vdisk_label != VD_DISK_LABEL_VTOC) - return (ENOTSUP); - - return (vdc_dkio_get_partition(dev, arg, mode)); + return (vdc_dkio_get_partition(vdc, arg, mode)); } case DKIOCINFO: @@ -4771,6 +4970,7 @@ /* clean up if dispatch fails */ mutex_enter(&vdc->lock); vdc->dkio_flush_pending--; + mutex_exit(&vdc->lock); kmem_free(dkarg, sizeof (vdc_dk_arg_t)); } @@ -4790,14 +4990,6 @@ if (alloc_len > 0) mem_p = kmem_zalloc(alloc_len, KM_SLEEP); - if (cmd == DKIOCSVTOC) { - /* - * Save a copy of the current VTOC so that we can roll back - * if the setting of the new VTOC fails. - */ - bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); - } - /* * Call the conversion function for this ioctl which, if necessary, * converts from the Solaris format to the format ARC'ed @@ -4820,6 +5012,15 @@ VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir); + if (cmd == DKIOCSVTOC || cmd == DKIOCSETEFI) { + /* + * The disk label may have changed. Revalidate the disk + * geometry. This will also update the device nodes and + * properties. + */ + vdc_validate(vdc); + } + if (rv != 0) { /* * This is not necessarily an error. The ioctl could @@ -4831,58 +5032,9 @@ if (mem_p != NULL) kmem_free(mem_p, alloc_len); - if (cmd == DKIOCSVTOC) { - /* update of the VTOC has failed, roll back */ - bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); - } - return (rv); } - if (cmd == DKIOCSVTOC) { - /* - * The VTOC has been changed. We need to update the device - * nodes to handle the case where an EFI label has been - * changed to a VTOC label. We also try and update the device - * node properties. Failing to set the properties should - * not cause an error to be return the caller though. - */ - vdc->vdisk_label = VD_DISK_LABEL_VTOC; - (void) vdc_create_device_nodes_vtoc(vdc); - - if (vdc_create_device_nodes_props(vdc)) { - DMSG(vdc, 0, "![%d] Failed to update device nodes" - " properties", vdc->instance); - } - - } else if (cmd == DKIOCSETEFI) { - /* - * The EFI has been changed. We need to update the device - * nodes to handle the case where a VTOC label has been - * changed to an EFI label. We also try and update the device - * node properties. Failing to set the properties should - * not cause an error to be return the caller though. - */ - struct dk_gpt *efi; - size_t efi_len; - - vdc->vdisk_label = VD_DISK_LABEL_EFI; - (void) vdc_create_device_nodes_efi(vdc); - - rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); - - if (rv == 0) { - vdc_store_efi(vdc, efi); - rv = vdc_create_device_nodes_props(vdc); - vd_efi_free(efi, efi_len); - } - - if (rv) { - DMSG(vdc, 0, "![%d] Failed to update device nodes" - " properties", vdc->instance); - } - } - /* * Call the conversion function (if it exists) for this ioctl * which converts from the format ARC'ed as part of the vDisk @@ -5046,6 +5198,8 @@ static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) { + _NOTE(ARGUNUSED(vdc)) + void *tmp_mem = NULL; struct vtoc vt; struct vtoc *vtp = &vt; @@ -5078,12 +5232,6 @@ vtp = tmp_mem; } - /* - * The VTOC is being changed, then vdc needs to update the copy - * it saved in the soft state structure. - */ - bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); - VTOC2VD_VTOC(vtp, &vtvd); bcopy(&vtvd, to, sizeof (vd_vtoc_t)); kmem_free(tmp_mem, copy_len); @@ -5279,23 +5427,20 @@ * vdc - soft state pointer for this instance of the device driver. * * Return Code: - * 0 - Success + * none. */ -static int +static void vdc_create_fake_geometry(vdc_t *vdc) { ASSERT(vdc != NULL); - - /* - * Check if max_xfer_sz and vdisk_size are valid - */ - if (vdc->vdisk_size == 0 || vdc->max_xfer_sz == 0) - return (EIO); + ASSERT(vdc->vdisk_size != 0); + ASSERT(vdc->max_xfer_sz != 0); /* * DKIOCINFO support */ - vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); + if (vdc->cinfo == NULL) + vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); @@ -5329,59 +5474,65 @@ vdc->minfo->dki_media_type = DK_FIXED_DISK; vdc->minfo->dki_capacity = vdc->vdisk_size; vdc->minfo->dki_lbsize = DEV_BSIZE; - - return (0); +} + +static ushort_t +vdc_lbl2cksum(struct dk_label *label) +{ + int count; + ushort_t sum, *sp; + + count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; + sp = (ushort_t *)label; + sum = 0; + while (count--) { + sum ^= *sp++; + } + + return (sum); } /* * Function: - * vdc_setup_disk_layout() + * vdc_validate_geometry * * Description: - * This routine discovers all the necessary details about the "disk" - * by requesting the data that is available from the vDisk server and by - * faking up the rest of the data. + * This routine discovers the label and geometry of the disk. It stores + * the disk label and related information in the vdc structure. If it + * fails to validate the geometry or to discover the disk label then + * the label is marked as unknown (VD_DISK_LABEL_UNK). * * Arguments: * vdc - soft state pointer for this instance of the device driver. * * Return Code: - * 0 - Success + * 0 - success. + * EINVAL - unknown disk label. + * ENOTSUP - geometry not applicable (EFI label). + * EIO - error accessing the disk. */ static int -vdc_setup_disk_layout(vdc_t *vdc) +vdc_validate_geometry(vdc_t *vdc) { buf_t *buf; /* BREAD requests need to be in a buf_t structure */ dev_t dev; - int slice = 0; - int rv, error; + int rv; + struct dk_label label; + struct dk_geom geom; + struct vtoc vtoc; ASSERT(vdc != NULL); - - if (vdc->vtoc == NULL) - vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); + ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); + ASSERT(MUTEX_HELD(&vdc->lock)); + + mutex_exit(&vdc->lock); dev = makedevice(ddi_driver_major(vdc->dip), VD_MAKE_DEV(vdc->instance, 0)); - rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); - - if (rv && rv != ENOTSUP) { - DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", - vdc->instance, rv); - return (rv); - } - - /* - * The process of attempting to read VTOC will initiate - * the handshake and establish a connection. Following - * handshake, go ahead and create geometry. - */ - error = vdc_create_fake_geometry(vdc); - if (error != 0) { - DMSG(vdc, 0, "[%d] Failed to create disk geometry (err%d)", - vdc->instance, error); - return (error); - } + + rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL); + if (rv == 0) + rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, FKIOCTL); if (rv == ENOTSUP) { /* @@ -5396,58 +5547,171 @@ if (rv) { DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", vdc->instance, rv); - return (rv); + mutex_enter(&vdc->lock); + vdc_store_label_unk(vdc); + return (EIO); } - vdc->vdisk_label = VD_DISK_LABEL_EFI; - vdc_store_efi(vdc, efi); + mutex_enter(&vdc->lock); + vdc_store_label_efi(vdc, efi); vd_efi_free(efi, efi_len); - - return (0); + return (ENOTSUP); } - vdc->vdisk_label = VD_DISK_LABEL_VTOC; + if (rv != 0) { + DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", + vdc->instance, rv); + mutex_enter(&vdc->lock); + vdc_store_label_unk(vdc); + if (rv != EINVAL) + rv = EIO; + return (rv); + } + + /* check that geometry and vtoc are valid */ + if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || + vtoc.v_sanity != VTOC_SANE) { + mutex_enter(&vdc->lock); + vdc_store_label_unk(vdc); + return (EINVAL); + } /* - * FUTURE: This could be default way for reading the VTOC - * from the disk as supposed to sending the VD_OP_GET_VTOC - * to the server. Currently this is a sanity check. + * We have a disk and a valid VTOC. However this does not mean + * that the disk currently have a VTOC label. The returned VTOC may + * be a default VTOC to be used for configuring the disk (this is + * what is done for disk image). So we read the label from the + * beginning of the disk to ensure we really have a VTOC label. * - * find the slice that represents the entire "disk" and use that to - * read the disk label. The convention in Solaris is that slice 2 - * represents the whole disk so we check that it is, otherwise we - * default to slice 0 + * FUTURE: This could be the default way for reading the VTOC + * from the disk as opposed to sending the VD_OP_GET_VTOC + * to the server. This will be the default if vdc is implemented + * ontop of cmlb. + */ + + /* + * Single slice disk does not support read using an absolute disk + * offset so we just rely on the DKIOCGVTOC ioctl in that case. */ - if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && - (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { - slice = 2; - } else { - slice = 0; + if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { + mutex_enter(&vdc->lock); + if (vtoc.v_nparts != 1) { + vdc_store_label_unk(vdc); + return (EINVAL); + } + vdc_store_label_vtoc(vdc, &geom, &vtoc); + return (0); + } + + if (vtoc.v_nparts != V_NUMPAR) { + mutex_enter(&vdc->lock); + vdc_store_label_unk(vdc); + return (EINVAL); } /* * Read disk label from start of disk */ - vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); bioinit(buf); - buf->b_un.b_addr = (caddr_t)vdc->label; + buf->b_un.b_addr = (caddr_t)&label; buf->b_bcount = DK_LABEL_SIZE; buf->b_flags = B_BUSY | B_READ; buf->b_dev = dev; - rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)vdc->label, - DK_LABEL_SIZE, slice, 0, CB_STRATEGY, buf, VIO_read_dir); + rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label, + DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); if (rv) { DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", vdc->instance); - kmem_free(buf, sizeof (buf_t)); - return (rv); + } else { + rv = biowait(buf); + biofini(buf); + } + kmem_free(buf, sizeof (buf_t)); + + if (rv != 0 || label.dkl_magic != DKL_MAGIC || + label.dkl_cksum != vdc_lbl2cksum(&label)) { + DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", + vdc->instance); + mutex_enter(&vdc->lock); + vdc_store_label_unk(vdc); + return (EINVAL); } - rv = biowait(buf); - biofini(buf); - kmem_free(buf, sizeof (buf_t)); - - return (rv); + + mutex_enter(&vdc->lock); + vdc_store_label_vtoc(vdc, &geom, &vtoc); + return (0); +} + +/* + * Function: + * vdc_validate + * + * Description: + * This routine discovers the label of the disk and create the + * appropriate device nodes if the label has changed. + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * none. + */ +static void +vdc_validate(vdc_t *vdc) +{ + vd_disk_label_t old_label; + struct vtoc old_vtoc; + int rv; + + ASSERT(!MUTEX_HELD(&vdc->lock)); + + mutex_enter(&vdc->lock); + + /* save the current label and vtoc */ + old_label = vdc->vdisk_label; + bcopy(vdc->vtoc, &old_vtoc, sizeof (struct vtoc)); + + /* check the geometry */ + (void) vdc_validate_geometry(vdc); + + /* if the disk label has changed, update device nodes */ + if (vdc->vdisk_label != old_label) { + + if (vdc->vdisk_label == VD_DISK_LABEL_EFI) + rv = vdc_create_device_nodes_efi(vdc); + else + rv = vdc_create_device_nodes_vtoc(vdc); + + if (rv != 0) { + DMSG(vdc, 0, "![%d] Failed to update device nodes", + vdc->instance); + } + } + + /* if the vtoc has changed, update device nodes properties */ + if (bcmp(vdc->vtoc, &old_vtoc, sizeof (struct vtoc)) != 0) { + + if (vdc_create_device_nodes_props(vdc) != 0) { + DMSG(vdc, 0, "![%d] Failed to update device nodes" + " properties", vdc->instance); + } + } + + mutex_exit(&vdc->lock); +} + +static void +vdc_validate_task(void *arg) +{ + vdc_t *vdc = (vdc_t *)arg; + + vdc_validate(vdc); + + mutex_enter(&vdc->lock); + ASSERT(vdc->validate_pending > 0); + vdc->validate_pending--; + mutex_exit(&vdc->lock); } /* @@ -5553,10 +5817,14 @@ } static void -vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi) +vdc_store_label_efi(vdc_t *vdc, struct dk_gpt *efi) { struct vtoc *vtoc = vdc->vtoc; + ASSERT(MUTEX_HELD(&vdc->lock)); + + vdc->vdisk_label = VD_DISK_LABEL_EFI; + bzero(vdc->geom, sizeof (struct dk_geom)); vd_efi_to_vtoc(efi, vtoc); if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { /* @@ -5573,3 +5841,23 @@ vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; } } + +static void +vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct vtoc *vtoc) +{ + ASSERT(MUTEX_HELD(&vdc->lock)); + + vdc->vdisk_label = VD_DISK_LABEL_VTOC; + bcopy(vtoc, vdc->vtoc, sizeof (struct vtoc)); + bcopy(geom, vdc->geom, sizeof (struct dk_geom)); +} + +static void +vdc_store_label_unk(vdc_t *vdc) +{ + ASSERT(MUTEX_HELD(&vdc->lock)); + + vdc->vdisk_label = VD_DISK_LABEL_UNK; + bzero(vdc->vtoc, sizeof (struct vtoc)); + bzero(vdc->geom, sizeof (struct dk_geom)); +}
--- a/usr/src/uts/sun4v/io/vds.c Thu Aug 30 01:17:19 2007 -0700 +++ b/usr/src/uts/sun4v/io/vds.c Thu Aug 30 07:43:53 2007 -0700 @@ -398,7 +398,8 @@ static void vd_free_dring_task(vd_t *vdp); static int vd_setup_vd(vd_t *vd); static boolean_t vd_enabled(vd_t *vd); - +static ushort_t vd_lbl2cksum(struct dk_label *label); +static int vd_file_validate_geometry(vd_t *vd); /* * Function: * vd_file_rw @@ -439,6 +440,14 @@ offset = blk * DEV_BSIZE; } else { ASSERT(slice >= 0 && slice < V_NUMPAR); + + if (vd->vdisk_label == VD_DISK_LABEL_UNK && + vd_file_validate_geometry(vd) != 0) { + PR0("Unknown disk label, can't do I/O from slice %d", + slice); + return (-1); + } + if (blk >= vd->vtoc.v_part[slice].p_size) { /* address past the end of the slice */ PR0("req_addr (0x%lx) > psize (0x%lx)", @@ -520,6 +529,116 @@ /* * Function: + * vd_file_build_default_label + * + * Description: + * Return a default label for the given disk. This is used when the disk + * does not have a valid VTOC so that the user can get a valid default + * configuration. The default label have all slices size set to 0 (except + * slice 2 which is the entire disk) to force the user to write a valid + * label onto the disk image. + * + * Parameters: + * vd - disk on which the operation is performed. + * label - the returned default label. + * + * Return Code: + * none. + */ +static void +vd_file_build_default_label(vd_t *vd, struct dk_label *label) +{ + size_t size; + char prefix; + + ASSERT(vd->file); + + /* + * We must have a resonable number of cylinders and sectors so + * that newfs can run using default values. + * + * if (disk_size < 2MB) + * phys_cylinders = disk_size / 100K + * else + * phys_cylinders = disk_size / 300K + * + * phys_cylinders = (phys_cylinders == 0) ? 1 : phys_cylinders + * alt_cylinders = (phys_cylinders > 2) ? 2 : 0; + * data_cylinders = phys_cylinders - alt_cylinders + * + * sectors = disk_size / (phys_cylinders * blk_size) + * + * The file size test is an attempt to not have too few cylinders + * for a small file, or so many on a big file that you waste space + * for backup superblocks or cylinder group structures. + */ + if (vd->file_size < (2 * 1024 * 1024)) + label->dkl_pcyl = vd->file_size / (100 * 1024); + else + label->dkl_pcyl = vd->file_size / (300 * 1024); + + if (label->dkl_pcyl == 0) + label->dkl_pcyl = 1; + + if (label->dkl_pcyl > 2) + label->dkl_acyl = 2; + else + label->dkl_acyl = 0; + + label->dkl_nsect = vd->file_size / + (DEV_BSIZE * label->dkl_pcyl); + label->dkl_ncyl = label->dkl_pcyl - label->dkl_acyl; + label->dkl_nhead = 1; + label->dkl_write_reinstruct = 0; + label->dkl_read_reinstruct = 0; + label->dkl_rpm = 7200; + label->dkl_apc = 0; + label->dkl_intrlv = 0; + + PR0("requested disk size: %ld bytes\n", vd->file_size); + PR0("setup: ncyl=%d nhead=%d nsec=%d\n", label->dkl_pcyl, + label->dkl_nhead, label->dkl_nsect); + PR0("provided disk size: %ld bytes\n", (uint64_t) + (label->dkl_pcyl * label->dkl_nhead * + label->dkl_nsect * DEV_BSIZE)); + + if (vd->file_size < (1ULL << 20)) { + size = vd->file_size >> 10; + prefix = 'K'; /* Kilobyte */ + } else if (vd->file_size < (1ULL << 30)) { + size = vd->file_size >> 20; + prefix = 'M'; /* Megabyte */ + } else if (vd->file_size < (1ULL << 40)) { + size = vd->file_size >> 30; + prefix = 'G'; /* Gigabyte */ + } else { + size = vd->file_size >> 40; + prefix = 'T'; /* Terabyte */ + } + + /* + * We must have a correct label name otherwise format(1m) will + * not recognized the disk as labeled. + */ + (void) snprintf(label->dkl_asciilabel, LEN_DKL_ASCII, + "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d", + size, prefix, + label->dkl_ncyl, label->dkl_acyl, label->dkl_nhead, + label->dkl_nsect); + + /* default VTOC */ + label->dkl_vtoc.v_version = V_VERSION; + label->dkl_vtoc.v_nparts = V_NUMPAR; + label->dkl_vtoc.v_sanity = VTOC_SANE; + label->dkl_vtoc.v_part[2].p_tag = V_BACKUP; + label->dkl_map[2].dkl_cylno = 0; + label->dkl_map[2].dkl_nblk = label->dkl_ncyl * + label->dkl_nhead * label->dkl_nsect; + label->dkl_cksum = vd_lbl2cksum(label); +} + +/* + * Function: * vd_file_set_vtoc * * Description: @@ -1451,38 +1570,34 @@ kmem_free(dk_efi->dki_data, vd_efi->length); } -static int -vd_read_vtoc(ldi_handle_t handle, struct vtoc *vtoc, vd_disk_label_t *label) +static vd_disk_label_t +vd_read_vtoc(ldi_handle_t handle, struct vtoc *vtoc) { int status, rval; struct dk_gpt *efi; size_t efi_len; - *label = VD_DISK_LABEL_UNK; - status = ldi_ioctl(handle, DKIOCGVTOC, (intptr_t)vtoc, (vd_open_flags | FKIOCTL), kcred, &rval); if (status == 0) { - *label = VD_DISK_LABEL_VTOC; - return (0); + return (VD_DISK_LABEL_VTOC); } else if (status != ENOTSUP) { PR0("ldi_ioctl(DKIOCGVTOC) returned error %d", status); - return (status); + return (VD_DISK_LABEL_UNK); } status = vds_efi_alloc_and_read(handle, &efi, &efi_len); if (status) { PR0("vds_efi_alloc_and_read returned error %d", status); - return (status); + return (VD_DISK_LABEL_UNK); } - *label = VD_DISK_LABEL_EFI; vd_efi_to_vtoc(efi, vtoc); vd_efi_free(efi, efi_len); - return (0); + return (VD_DISK_LABEL_EFI); } static ushort_t @@ -1556,6 +1671,102 @@ } /* + * Function: + * vd_file_validate_geometry + * + * Description: + * Read the label and validate the geometry of a disk image. The driver + * label, vtoc and geometry information are updated according to the + * label read from the disk image. + * + * If no valid label is found, the label is set to unknown and the + * function returns EINVAL, but a default vtoc and geometry are provided + * to the driver. + * + * Parameters: + * vd - disk on which the operation is performed. + * + * Return Code: + * 0 - success. + * EIO - error reading the label from the disk image. + * EINVAL - unknown disk label. + */ +static int +vd_file_validate_geometry(vd_t *vd) +{ + struct dk_label label; + struct dk_geom *geom = &vd->dk_geom; + struct vtoc *vtoc = &vd->vtoc; + int i; + int status = 0; + + ASSERT(vd->file); + + if (VD_FILE_LABEL_READ(vd, &label) < 0) + return (EIO); + + if (label.dkl_magic != DKL_MAGIC || + label.dkl_cksum != vd_lbl2cksum(&label) || + label.dkl_vtoc.v_sanity != VTOC_SANE || + label.dkl_vtoc.v_nparts != V_NUMPAR) { + vd->vdisk_label = VD_DISK_LABEL_UNK; + vd_file_build_default_label(vd, &label); + status = EINVAL; + } else { + vd->vdisk_label = VD_DISK_LABEL_VTOC; + } + + /* Update the driver geometry */ + bzero(geom, sizeof (struct dk_geom)); + + geom->dkg_ncyl = label.dkl_ncyl; + geom->dkg_acyl = label.dkl_acyl; + geom->dkg_nhead = label.dkl_nhead; + geom->dkg_nsect = label.dkl_nsect; + geom->dkg_intrlv = label.dkl_intrlv; + geom->dkg_apc = label.dkl_apc; + geom->dkg_rpm = label.dkl_rpm; + geom->dkg_pcyl = label.dkl_pcyl; + geom->dkg_write_reinstruct = label.dkl_write_reinstruct; + geom->dkg_read_reinstruct = label.dkl_read_reinstruct; + + /* Update the driver vtoc */ + bzero(vtoc, sizeof (struct vtoc)); + + vtoc->v_sanity = label.dkl_vtoc.v_sanity; + vtoc->v_version = label.dkl_vtoc.v_version; + vtoc->v_sectorsz = DEV_BSIZE; + vtoc->v_nparts = label.dkl_vtoc.v_nparts; + + for (i = 0; i < vtoc->v_nparts; i++) { + vtoc->v_part[i].p_tag = + label.dkl_vtoc.v_part[i].p_tag; + vtoc->v_part[i].p_flag = + label.dkl_vtoc.v_part[i].p_flag; + vtoc->v_part[i].p_start = + label.dkl_map[i].dkl_cylno * + (label.dkl_nhead * label.dkl_nsect); + vtoc->v_part[i].p_size = label.dkl_map[i].dkl_nblk; + vtoc->timestamp[i] = + label.dkl_vtoc.v_timestamp[i]; + } + /* + * The bootinfo array can not be copied with bcopy() because + * elements are of type long in vtoc (so 64-bit) and of type + * int in dk_vtoc (so 32-bit). + */ + vtoc->v_bootinfo[0] = label.dkl_vtoc.v_bootinfo[0]; + vtoc->v_bootinfo[1] = label.dkl_vtoc.v_bootinfo[1]; + vtoc->v_bootinfo[2] = label.dkl_vtoc.v_bootinfo[2]; + bcopy(label.dkl_asciilabel, vtoc->v_asciilabel, + LEN_DKL_ASCII); + bcopy(label.dkl_vtoc.v_volume, vtoc->v_volume, + LEN_DKL_VVOL); + + return (status); +} + +/* * Handle ioctls to a disk image (file-based). * * Return Values @@ -1571,7 +1782,6 @@ int i, rc; ASSERT(vd->file); - ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); switch (cmd) { @@ -1579,70 +1789,22 @@ ASSERT(ioctl_arg != NULL); geom = (struct dk_geom *)ioctl_arg; - if (VD_FILE_LABEL_READ(vd, &label) < 0) - return (EIO); - - if (label.dkl_magic != DKL_MAGIC || - label.dkl_cksum != vd_lbl2cksum(&label)) - return (EINVAL); - - bzero(geom, sizeof (struct dk_geom)); - geom->dkg_ncyl = label.dkl_ncyl; - geom->dkg_acyl = label.dkl_acyl; - geom->dkg_nhead = label.dkl_nhead; - geom->dkg_nsect = label.dkl_nsect; - geom->dkg_intrlv = label.dkl_intrlv; - geom->dkg_apc = label.dkl_apc; - geom->dkg_rpm = label.dkl_rpm; - geom->dkg_pcyl = label.dkl_pcyl; - geom->dkg_write_reinstruct = label.dkl_write_reinstruct; - geom->dkg_read_reinstruct = label.dkl_read_reinstruct; - + rc = vd_file_validate_geometry(vd); + if (rc != 0 && rc != EINVAL) + return (rc); + + bcopy(&vd->dk_geom, geom, sizeof (struct dk_geom)); return (0); case DKIOCGVTOC: ASSERT(ioctl_arg != NULL); vtoc = (struct vtoc *)ioctl_arg; - if (VD_FILE_LABEL_READ(vd, &label) < 0) - return (EIO); - - if (label.dkl_magic != DKL_MAGIC || - label.dkl_cksum != vd_lbl2cksum(&label)) - return (EINVAL); - - bzero(vtoc, sizeof (struct vtoc)); - - vtoc->v_sanity = label.dkl_vtoc.v_sanity; - vtoc->v_version = label.dkl_vtoc.v_version; - vtoc->v_sectorsz = DEV_BSIZE; - vtoc->v_nparts = label.dkl_vtoc.v_nparts; - - for (i = 0; i < vtoc->v_nparts; i++) { - vtoc->v_part[i].p_tag = - label.dkl_vtoc.v_part[i].p_tag; - vtoc->v_part[i].p_flag = - label.dkl_vtoc.v_part[i].p_flag; - vtoc->v_part[i].p_start = - label.dkl_map[i].dkl_cylno * - (label.dkl_nhead * label.dkl_nsect); - vtoc->v_part[i].p_size = label.dkl_map[i].dkl_nblk; - vtoc->timestamp[i] = - label.dkl_vtoc.v_timestamp[i]; - } - /* - * The bootinfo array can not be copied with bcopy() because - * elements are of type long in vtoc (so 64-bit) and of type - * int in dk_vtoc (so 32-bit). - */ - vtoc->v_bootinfo[0] = label.dkl_vtoc.v_bootinfo[0]; - vtoc->v_bootinfo[1] = label.dkl_vtoc.v_bootinfo[1]; - vtoc->v_bootinfo[2] = label.dkl_vtoc.v_bootinfo[2]; - bcopy(label.dkl_asciilabel, vtoc->v_asciilabel, - LEN_DKL_ASCII); - bcopy(label.dkl_vtoc.v_volume, vtoc->v_volume, - LEN_DKL_VVOL); - + rc = vd_file_validate_geometry(vd); + if (rc != 0 && rc != EINVAL) + return (rc); + + bcopy(&vd->vtoc, vtoc, sizeof (struct vtoc)); return (0); case DKIOCSGEOM: @@ -1721,8 +1883,9 @@ if ((rc = vd_file_set_vtoc(vd, &label)) != 0) return (rc); - /* update the cached vdisk VTOC */ - bcopy(vtoc, &vd->vtoc, sizeof (vd->vtoc)); + /* check the geometry and update the driver info */ + if ((rc = vd_file_validate_geometry(vd)) != 0) + return (rc); /* * The disk geometry may have changed, so we need to write @@ -2007,6 +2170,8 @@ bcopy(devid->did_id, vd_devid->id, len); + request->status = 0; + /* LDC memory operations require 8-byte multiples */ ASSERT(request->nbytes % sizeof (uint64_t) == 0); @@ -3376,6 +3541,51 @@ } static int +vd_setup_partition_vtoc(vd_t *vd) +{ + int rval, status; + char *device_path = vd->device_path; + + status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, + (intptr_t)&vd->dk_geom, (vd_open_flags | FKIOCTL), kcred, &rval); + + if (status != 0) { + PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", + status, device_path); + return (status); + } + + /* Initialize dk_geom structure for single-slice device */ + if (vd->dk_geom.dkg_nsect == 0) { + PRN("%s geometry claims 0 sectors per track", device_path); + return (EIO); + } + if (vd->dk_geom.dkg_nhead == 0) { + PRN("%s geometry claims 0 heads", device_path); + return (EIO); + } + vd->dk_geom.dkg_ncyl = vd->vdisk_size / vd->dk_geom.dkg_nsect / + vd->dk_geom.dkg_nhead; + vd->dk_geom.dkg_acyl = 0; + vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; + + + /* Initialize vtoc structure for single-slice device */ + bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, + MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); + bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); + vd->vtoc.v_nparts = 1; + vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; + vd->vtoc.v_part[0].p_flag = 0; + vd->vtoc.v_part[0].p_start = 0; + vd->vtoc.v_part[0].p_size = vd->vdisk_size; + bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, + MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); + + return (0); +} + +static int vd_setup_partition_efi(vd_t *vd) { efi_gpt_t *gpt; @@ -3417,17 +3627,13 @@ static int vd_setup_file(vd_t *vd) { - int i, rval, status; - ushort_t sum; + int rval, status; vattr_t vattr; dev_t dev; - size_t size; char *file_path = vd->device_path; char dev_path[MAXPATHLEN + 1]; - char prefix; ldi_handle_t lhandle; struct dk_cinfo dk_cinfo; - struct dk_label label; /* make sure the file is valid */ if ((status = lookupname(file_path, UIO_SYSSPACE, FOLLOW, @@ -3475,118 +3681,17 @@ return (EIO); } - /* read label from file */ - if (VD_FILE_LABEL_READ(vd, &label) < 0) { - PRN("Can't read label from %s", file_path); + /* find and validate the geometry of the disk image */ + status = vd_file_validate_geometry(vd); + if (status != 0 && status != EINVAL) { + PRN("Fail to read label from %s", file_path); return (EIO); } - /* label checksum */ - sum = vd_lbl2cksum(&label); - - if (label.dkl_magic != DKL_MAGIC || label.dkl_cksum != sum) { - PR0("%s has an invalid disk label " - "(magic=%x cksum=%x (expect %x))", - file_path, label.dkl_magic, label.dkl_cksum, sum); - - /* default label */ - bzero(&label, sizeof (struct dk_label)); - - /* - * We must have a resonable number of cylinders and sectors so - * that newfs can run using default values. - * - * if (disk_size < 2MB) - * phys_cylinders = disk_size / 100K - * else - * phys_cylinders = disk_size / 300K - * - * phys_cylinders = (phys_cylinders == 0) ? 1 : phys_cylinders - * alt_cylinders = (phys_cylinders > 2) ? 2 : 0; - * data_cylinders = phys_cylinders - alt_cylinders - * - * sectors = disk_size / (phys_cylinders * blk_size) - */ - if (vd->file_size < (2 * 1024 * 1024)) - label.dkl_pcyl = vd->file_size / (100 * 1024); - else - label.dkl_pcyl = vd->file_size / (300 * 1024); - - if (label.dkl_pcyl == 0) - label.dkl_pcyl = 1; - - if (label.dkl_pcyl > 2) - label.dkl_acyl = 2; - else - label.dkl_acyl = 0; - - label.dkl_nsect = vd->file_size / - (DEV_BSIZE * label.dkl_pcyl); - label.dkl_ncyl = label.dkl_pcyl - label.dkl_acyl; - label.dkl_nhead = 1; - label.dkl_write_reinstruct = 0; - label.dkl_read_reinstruct = 0; - label.dkl_rpm = 7200; - label.dkl_apc = 0; - label.dkl_intrlv = 0; - label.dkl_magic = DKL_MAGIC; - - PR0("requested disk size: %ld bytes\n", vd->file_size); - PR0("setup: ncyl=%d nhead=%d nsec=%d\n", label.dkl_pcyl, - label.dkl_nhead, label.dkl_nsect); - PR0("provided disk size: %ld bytes\n", (uint64_t) - (label.dkl_pcyl * - label.dkl_nhead * label.dkl_nsect * DEV_BSIZE)); - - if (vd->file_size < (1ULL << 20)) { - size = vd->file_size >> 10; - prefix = 'K'; /* Kilobyte */ - } else if (vd->file_size < (1ULL << 30)) { - size = vd->file_size >> 20; - prefix = 'M'; /* Megabyte */ - } else if (vd->file_size < (1ULL << 40)) { - size = vd->file_size >> 30; - prefix = 'G'; /* Gigabyte */ - } else { - size = vd->file_size >> 40; - prefix = 'T'; /* Terabyte */ - } - - /* - * We must have a correct label name otherwise format(1m) will - * not recognized the disk as labeled. - */ - (void) snprintf(label.dkl_asciilabel, LEN_DKL_ASCII, - "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d", - size, prefix, - label.dkl_ncyl, label.dkl_acyl, label.dkl_nhead, - label.dkl_nsect); - - /* default VTOC */ - label.dkl_vtoc.v_version = V_VERSION; - label.dkl_vtoc.v_nparts = V_NUMPAR; - label.dkl_vtoc.v_sanity = VTOC_SANE; - label.dkl_vtoc.v_part[2].p_tag = V_BACKUP; - label.dkl_map[2].dkl_cylno = 0; - label.dkl_map[2].dkl_nblk = label.dkl_ncyl * - label.dkl_nhead * label.dkl_nsect; - label.dkl_map[0] = label.dkl_map[2]; - label.dkl_map[0] = label.dkl_map[2]; - label.dkl_cksum = vd_lbl2cksum(&label); - - /* write default label to file */ - if ((rval = vd_file_set_vtoc(vd, &label)) != 0) { - PRN("Can't write label to %s", file_path); - return (rval); - } - } - - vd->nslices = label.dkl_vtoc.v_nparts; - + vd->nslices = V_NUMPAR; /* sector size = block size = DEV_BSIZE */ vd->vdisk_size = vd->file_size / DEV_BSIZE; vd->vdisk_type = VD_DISK_TYPE_DISK; - vd->vdisk_label = VD_DISK_LABEL_VTOC; vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */ /* Get max_xfer_sz from the device where the file is */ @@ -3621,55 +3726,27 @@ PR0("using file %s, dev %s, max_xfer = %u blks", file_path, dev_path, vd->max_xfer_sz); - vd->dk_geom.dkg_ncyl = label.dkl_ncyl; - vd->dk_geom.dkg_acyl = label.dkl_acyl; - vd->dk_geom.dkg_pcyl = label.dkl_pcyl; - vd->dk_geom.dkg_nhead = label.dkl_nhead; - vd->dk_geom.dkg_nsect = label.dkl_nsect; - vd->dk_geom.dkg_intrlv = label.dkl_intrlv; - vd->dk_geom.dkg_apc = label.dkl_apc; - vd->dk_geom.dkg_rpm = label.dkl_rpm; - vd->dk_geom.dkg_write_reinstruct = label.dkl_write_reinstruct; - vd->dk_geom.dkg_read_reinstruct = label.dkl_read_reinstruct; - - vd->vtoc.v_sanity = label.dkl_vtoc.v_sanity; - vd->vtoc.v_version = label.dkl_vtoc.v_version; - vd->vtoc.v_sectorsz = DEV_BSIZE; - vd->vtoc.v_nparts = label.dkl_vtoc.v_nparts; - - bcopy(label.dkl_vtoc.v_volume, vd->vtoc.v_volume, - LEN_DKL_VVOL); - bcopy(label.dkl_asciilabel, vd->vtoc.v_asciilabel, - LEN_DKL_ASCII); - - for (i = 0; i < vd->nslices; i++) { - vd->vtoc.timestamp[i] = label.dkl_vtoc.v_timestamp[i]; - vd->vtoc.v_part[i].p_tag = label.dkl_vtoc.v_part[i].p_tag; - vd->vtoc.v_part[i].p_flag = label.dkl_vtoc.v_part[i].p_flag; - vd->vtoc.v_part[i].p_start = label.dkl_map[i].dkl_cylno * - label.dkl_nhead * label.dkl_nsect; - vd->vtoc.v_part[i].p_size = label.dkl_map[i].dkl_nblk; - vd->ldi_handle[i] = NULL; - vd->dev[i] = NULL; - } - /* Setup devid for the disk image */ - status = vd_file_read_devid(vd, &vd->file_devid); - - if (status == 0) { - /* a valid devid was found */ - return (0); - } - - if (status != EINVAL) { - /* - * There was an error while trying to read the devid. So this - * disk image may have a devid but we are unable to read it. - */ - PR0("can not read devid for %s", file_path); - vd->file_devid = NULL; - return (0); + if (vd->vdisk_label != VD_DISK_LABEL_UNK) { + + status = vd_file_read_devid(vd, &vd->file_devid); + + if (status == 0) { + /* a valid devid was found */ + return (0); + } + + if (status != EINVAL) { + /* + * There was an error while trying to read the devid. + * So this disk image may have a devid but we are + * unable to read it. + */ + PR0("can not read devid for %s", file_path); + vd->file_devid = NULL; + return (0); + } } /* @@ -3686,11 +3763,17 @@ return (0); } - /* write devid to the disk image */ - if (vd_file_write_devid(vd, vd->file_devid) != 0) { - PR0("fail to write devid for %s", file_path); - ddi_devid_free(vd->file_devid); - vd->file_devid = NULL; + /* + * Write devid to the disk image. The devid is stored into the disk + * image if we have a valid label; otherwise the devid will be stored + * when the user writes a valid label. + */ + if (vd->vdisk_label != VD_DISK_LABEL_UNK) { + if (vd_file_write_devid(vd, vd->file_devid) != 0) { + PR0("fail to write devid for %s", file_path); + ddi_devid_free(vd->file_devid); + vd->file_devid = NULL; + } } return (0); @@ -3720,7 +3803,7 @@ PRN("Cannot use device/file (%s), errno=%d\n", device_path, status); if (status == ENXIO || status == ENODEV || - status == ENOENT) { + status == ENOENT || status == EROFS) { return (EAGAIN); } } @@ -3746,7 +3829,7 @@ } vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */ - /* Verify backing device supports dk_cinfo, dk_geom, and vtoc */ + /* Verify backing device supports dk_cinfo */ if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred, &rval)) != 0) { @@ -3760,22 +3843,7 @@ return (EIO); } - status = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc, &vd->vdisk_label); - - if (status != 0) { - PRN("vd_read_vtoc returned errno %d for %s", - status, device_path); - return (status); - } - - if (vd->vdisk_label == VD_DISK_LABEL_VTOC && - (status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, - (intptr_t)&vd->dk_geom, (vd_open_flags | FKIOCTL), - kcred, &rval)) != 0) { - PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", - status, device_path); - return (status); - } + vd->vdisk_label = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc); /* Store the device's max transfer size for return to the client */ vd->max_xfer_sz = dk_cinfo.dki_maxtransfer; @@ -3789,6 +3857,15 @@ vd->pseudo = is_pseudo_device(dip); ddi_release_devi(dip); if (vd->pseudo) { + /* + * Currently we only support exporting pseudo devices which + * provide a valid disk label. + */ + if (vd->vdisk_label == VD_DISK_LABEL_UNK) { + PRN("%s is a pseudo device with an invalid disk " + "label\n", device_path); + return (EINVAL); + } vd->vdisk_type = VD_DISK_TYPE_SLICE; vd->nslices = 1; return (0); /* ...and we're done */ @@ -3798,45 +3875,27 @@ if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) return (vd_setup_full_disk(vd)); + /* We can only export a slice if the disk has a valid label */ + if (vd->vdisk_label == VD_DISK_LABEL_UNK) { + PRN("%s is a slice from a disk with an unknown disk label\n", + device_path); + return (EINVAL); + } /* Otherwise, we have a non-entire slice of a device */ vd->vdisk_type = VD_DISK_TYPE_SLICE; vd->nslices = 1; if (vd->vdisk_label == VD_DISK_LABEL_EFI) { + /* Slice from a disk with an EFI label */ status = vd_setup_partition_efi(vd); - return (status); - } - - /* Initialize dk_geom structure for single-slice device */ - if (vd->dk_geom.dkg_nsect == 0) { - PRN("%s geometry claims 0 sectors per track", device_path); - return (EIO); - } - if (vd->dk_geom.dkg_nhead == 0) { - PRN("%s geometry claims 0 heads", device_path); - return (EIO); + } else { + /* Slice from a disk with a VTOC label */ + ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); + status = vd_setup_partition_vtoc(vd); } - vd->dk_geom.dkg_ncyl = - vd->vdisk_size/vd->dk_geom.dkg_nsect/vd->dk_geom.dkg_nhead; - vd->dk_geom.dkg_acyl = 0; - vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl; - - - /* Initialize vtoc structure for single-slice device */ - bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume, - MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume))); - bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part)); - vd->vtoc.v_nparts = 1; - vd->vtoc.v_part[0].p_tag = V_UNASSIGNED; - vd->vtoc.v_part[0].p_flag = 0; - vd->vtoc.v_part[0].p_start = 0; - vd->vtoc.v_part[0].p_size = vd->vdisk_size; - bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel, - MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel))); - - - return (0); + + return (status); } static int
--- a/usr/src/uts/sun4v/sys/vdc.h Thu Aug 30 01:17:19 2007 -0700 +++ b/usr/src/uts/sun4v/sys/vdc.h Thu Aug 30 07:43:53 2007 -0700 @@ -245,8 +245,14 @@ vdc_lc_state_t lifecycle; /* Current state of the vdc instance */ int hshake_cnt; /* number of failed handshakes */ - int open_count; /* count of outstanding opens */ + uint8_t open[OTYPCNT]; /* mask of opened slices */ + uint8_t open_excl; /* mask of exclusively opened slices */ + ulong_t open_lyr[V_NUMPAR]; /* number of layered opens */ int dkio_flush_pending; /* # outstanding DKIO flushes */ + int validate_pending; /* # outstanding validate request */ + vd_disk_label_t vdisk_label; /* label type of device/disk imported */ + struct vtoc *vtoc; /* structure to store VTOC data */ + struct dk_geom *geom; /* structure to store geometry data */ kthread_t *msg_proc_thr; /* main msg processing thread */ @@ -273,14 +279,11 @@ vio_ver_t ver; /* version number agreed with server */ vd_disk_type_t vdisk_type; /* type of device/disk being imported */ - vd_disk_label_t vdisk_label; /* label type of device/disk imported */ uint64_t vdisk_size; /* device size in blocks */ uint64_t max_xfer_sz; /* maximum block size of a descriptor */ uint64_t block_size; /* device block size used */ - struct dk_label *label; /* structure to store disk label */ struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */ struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */ - struct vtoc *vtoc; /* structure to store VTOC data */ ddi_devid_t devid; /* device id */ uint64_t ctimeout; /* connection timeout in seconds */ boolean_t ctimeout_reached; /* connection timeout has expired */