Mercurial > illumos > illumos-gate
changeset 2032:b9ba1b330453
6422255 vdc could handle handshake restarts better
6422474 ldc handshake race condition if ldc_up called by both sides of channel simultaneously
6427409 ldc should use drv_usecwait instead of delay loops
6427830 Implement FWARC 2006/195 vDisk control operations
6427836 Improve vDisk version negotiation protocol
6427847 Make vds tolerant of serving full disks with zero-length slices
author | lm66018 |
---|---|
date | Mon, 22 May 2006 13:24:32 -0700 |
parents | 59a9f9423479 |
children | b2a2cf19c19c |
files | usr/src/uts/sun4v/io/ldc.c usr/src/uts/sun4v/io/vdc.c usr/src/uts/sun4v/io/vds.c usr/src/uts/sun4v/sys/ldc_impl.h usr/src/uts/sun4v/sys/vdc.h usr/src/uts/sun4v/sys/vdsk_common.h usr/src/uts/sun4v/sys/vio_mailbox.h |
diffstat | 7 files changed, 1750 insertions(+), 809 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/sun4v/io/ldc.c Mon May 22 04:05:00 2006 -0700 +++ b/usr/src/uts/sun4v/io/ldc.c Mon May 22 13:24:32 2006 -0700 @@ -138,6 +138,12 @@ HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc" }; +/* + * LDC retry count and delay + */ +int ldc_max_retries = LDC_MAX_RETRIES; +clock_t ldc_delay = LDC_DELAY; + #ifdef DEBUG /* @@ -529,23 +535,32 @@ /* * Set the receive queue head - * Returns an error if it fails + * Resets connection and returns an error if it fails. */ static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head) { - int rv; + int rv; + int retries; ASSERT(MUTEX_HELD(&ldcp->lock)); - rv = hv_ldc_rx_set_qhead(ldcp->id, head); - if (rv && rv != H_EWOULDBLOCK) { - cmn_err(CE_WARN, - "ldc_rx_set_qhead: (0x%lx) cannot set qhead", ldcp->id); - i_ldc_reset(ldcp); - return (ECONNRESET); - } - - return (0); + for (retries = 0; retries < ldc_max_retries; retries++) { + + if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0) + return (0); + + if (rv != H_EWOULDBLOCK) + break; + + /* wait for ldc_delay usecs */ + drv_usecwait(ldc_delay); + } + + cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx", + ldcp->id, head); + i_ldc_reset(ldcp); + + return (ECONNRESET); } @@ -602,17 +617,17 @@ /* * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off - * and retry LDC_CHK_CNT times before returning an error. + * and retry ldc_max_retries times before returning an error. * Returns 0, EWOULDBLOCK or EIO */ static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail) { int rv, retval = EWOULDBLOCK; - int loop_cnt, chk_cnt; + int retries; ASSERT(MUTEX_HELD(&ldcp->lock)); - for (chk_cnt = 0; chk_cnt < LDC_CHK_CNT; chk_cnt++) { + for (retries = 0; retries < ldc_max_retries; retries++) { if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) { retval = 0; @@ -625,8 +640,8 @@ break; } - /* spin LDC_LOOP_CNT and then try again */ - for (loop_cnt = 0; loop_cnt < LDC_LOOP_CNT; loop_cnt++); + /* wait for ldc_delay usecs */ + drv_usecwait(ldc_delay); } return (retval); } @@ -824,6 +839,7 @@ /* Save the ACK'd version */ ldcp->version.major = rcvd_ver->major; ldcp->version.minor = rcvd_ver->minor; + ldcp->hstate |= TS_RCVD_VER; ldcp->tstate |= TS_VER_DONE; DWARN(DBG_ALL_LDCS, "(0x%llx) Agreed on version v%u.%u\n", @@ -885,7 +901,6 @@ return (ECONNRESET); } - ldcp->last_msg_snt++; ldcp->tx_tail = tx_tail; ldcp->hstate |= TS_SENT_RTS; @@ -1760,8 +1775,11 @@ /* move the head one position */ rx_head = (rx_head + LDC_PACKET_SIZE) % (ldcp->rx_q_entries << LDC_PACKET_SHIFT); - if (rv = i_ldc_set_rx_head(ldcp, rx_head)) + if (rv = i_ldc_set_rx_head(ldcp, rx_head)) { + notify_client = B_TRUE; + notify_event = LDC_EVT_RESET; break; + } } /* for */ @@ -2572,6 +2590,7 @@ return (rv); } + ldcp->hstate |= TS_SENT_VER; ldcp->tx_tail = tx_tail; D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id); @@ -2845,7 +2864,7 @@ *sizep = LDC_PAYLOAD_SIZE_RAW; rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask; - (void) i_ldc_set_rx_head(ldcp, rx_head); + rv = i_ldc_set_rx_head(ldcp, rx_head); return (rv); } @@ -2865,7 +2884,7 @@ ldc_msg_t *msg; caddr_t target; size_t len = 0, bytes_read = 0; - int loop_cnt = 0, chk_cnt = 0; + int retries = 0; uint64_t q_size_mask; target = target_bufp; @@ -2920,27 +2939,24 @@ /* If in the middle of a fragmented xfer */ if (ldcp->first_fragment != 0) { - if (++loop_cnt > LDC_LOOP_CNT) { - loop_cnt = 0; - ++chk_cnt; - } - if (chk_cnt < LDC_CHK_CNT) { + + /* wait for ldc_delay usecs */ + drv_usecwait(ldc_delay); + + if (++retries < ldc_max_retries) continue; - } else { - *sizep = 0; - ldcp->last_msg_rcd = - ldcp->first_fragment - 1; - DWARN(DBG_ALL_LDCS, - "ldc_read: (0x%llx) read timeout", - ldcp->id); - return (ETIMEDOUT); - } + + *sizep = 0; + ldcp->last_msg_rcd = ldcp->first_fragment - 1; + DWARN(DBG_ALL_LDCS, + "ldc_read: (0x%llx) read timeout", + ldcp->id); + return (ETIMEDOUT); } *sizep = 0; break; } - loop_cnt = 0; - chk_cnt = 0; + retries = 0; D2(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n", @@ -2958,6 +2974,9 @@ DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, " "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); + /* throw away data */ + bytes_read = 0; + /* Reset last_msg_rcd to start of message */ if (ldcp->first_fragment != 0) { ldcp->last_msg_rcd = @@ -2977,7 +2996,7 @@ } /* purge receive queue */ - (void) i_ldc_set_rx_head(ldcp, rx_tail); + rv = i_ldc_set_rx_head(ldcp, rx_tail); break; } @@ -2993,10 +3012,9 @@ if (rv = i_ldc_ctrlmsg(ldcp, msg)) { if (rv == EAGAIN) continue; - (void) i_ldc_set_rx_head(ldcp, rx_tail); + rv = i_ldc_set_rx_head(ldcp, rx_tail); *sizep = 0; bytes_read = 0; - rv = ECONNRESET; break; } }
--- a/usr/src/uts/sun4v/io/vdc.c Mon May 22 04:05:00 2006 -0700 +++ b/usr/src/uts/sun4v/io/vdc.c Mon May 22 13:24:32 2006 -0700 @@ -115,34 +115,37 @@ static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); /* setup */ -static int vdc_send(ldc_handle_t ldc_handle, caddr_t pkt, size_t *msglen); +static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); static int vdc_do_ldc_init(vdc_t *vdc); static int vdc_start_ldc_connection(vdc_t *vdc); static int vdc_create_device_nodes(vdc_t *vdc); static int vdc_create_device_nodes_props(vdc_t *vdc); static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); +static int vdc_do_ldc_up(vdc_t *vdc); static void vdc_terminate_ldc(vdc_t *vdc); static int vdc_init_descriptor_ring(vdc_t *vdc); static void vdc_destroy_descriptor_ring(vdc_t *vdc); /* handshake with vds */ static void vdc_init_handshake_negotiation(void *arg); -static int vdc_init_ver_negotiation(vdc_t *vdc); +static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); static int vdc_init_attr_negotiation(vdc_t *vdc); static int vdc_init_dring_negotiate(vdc_t *vdc); -static int vdc_handle_ver_negotiate(); -static int vdc_handle_attr_negotiate(); static void vdc_reset_connection(vdc_t *vdc, boolean_t resetldc); static boolean_t vdc_is_able_to_tx_data(vdc_t *vdc, int flag); - -/* processing */ +static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); + +/* processing incoming messages from vDisk server */ static void vdc_process_msg_thread(vdc_t *vdc); +static void vdc_process_msg(void *arg); +static void vdc_do_process_msg(vdc_t *vdc); static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); -static void vdc_process_msg(void *arg); static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg); static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg); static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); -static void vdc_do_process_msg(vdc_t *vdc); +static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); +static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); +static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); static int vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed); static int vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int op, uint64_t arg, uint64_t slice); @@ -158,6 +161,13 @@ /* dkio */ static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); static int vdc_create_fake_geometry(vdc_t *vdc); +static int vdc_setup_disk_layout(vdc_t *vdc); +static int vdc_null_copy_func(void *from, void *to, int mode, int dir); +static int vdc_get_vtoc_convert(void *from, void *to, int mode, int dir); +static int vdc_set_vtoc_convert(void *from, void *to, int mode, int dir); +static int vdc_get_geom_convert(void *from, void *to, int mode, int dir); +static int vdc_set_geom_convert(void *from, void *to, int mode, int dir); +static int vdc_uscsicmd_convert(void *from, void *to, int mode, int dir); /* * Module variables @@ -174,6 +184,12 @@ /* variable level controlling the verbosity of the error/debug messages */ int vdc_msglevel = 0; +/* + * Supported vDisk protocol version pairs. + * + * The first array entry is the latest and preferred version. + */ +static const vio_ver_t vdc_version[] = {{1, 0}}; static void vdc_msg(const char *format, ...) @@ -395,6 +411,9 @@ if (vdc->vtoc) kmem_free(vdc->vtoc, sizeof (struct vtoc)); + if (vdc->label) + kmem_free(vdc->label, DK_LABEL_SIZE); + if (vdc->initialized & VDC_SOFT_STATE) ddi_soft_state_free(vdc_state, instance); @@ -504,21 +523,22 @@ } else { PR0("%s[%d] Retry #%d for handshake.\n", __func__, instance, retries); + vdc_init_handshake_negotiation(vdc); retries++; } } } mutex_exit(&vdc->attach_lock); - if (vdc->vtoc == NULL) - vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); - - status = vdc_populate_descriptor(vdc, (caddr_t)vdc->vtoc, - P2ROUNDUP(sizeof (struct vtoc), sizeof (uint64_t)), - VD_OP_GET_VTOC, FKIOCTL, 0); - if (status) { - cmn_err(CE_NOTE, "[%d] Failed to get VTOC", instance); - return (status); + /* + * Once the handshake is complete, we can use the DRing to send + * requests to the vDisk server to calculate the geometry and + * VTOC of the "disk" + */ + status = vdc_setup_disk_layout(vdc); + if (status != 0) { + cmn_err(CE_NOTE, "[%d] Failed to discover disk layout (err%d)", + vdc->instance, status); } /* @@ -534,7 +554,7 @@ status = vdc_create_device_nodes_props(vdc); if (status) { cmn_err(CE_NOTE, "[%d] Failed to create device nodes" - " properties", instance); + " properties (%d)", instance, status); return (status); } @@ -650,19 +670,7 @@ return (0); } - if ((status = ldc_up(vdc->ldc_handle)) != 0) { - switch (status) { - case ECONNREFUSED: /* listener not ready at other end */ - PR0("%s: ldc_up(%d,...) return %d\n", - __func__, vdc->ldc_id, status); - status = 0; - break; - default: - cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: " - "channel=%ld, err=%d", - vdc->instance, vdc->ldc_id, status); - } - } + status = vdc_do_ldc_up(vdc); PR0("%s[%d] Finished bringing up LDC\n", __func__, vdc->instance); @@ -832,7 +840,6 @@ _NOTE(ARGUNUSED(cred)) int instance; - int status = 0; vdc_t *vdc; ASSERT(dev != NULL); @@ -852,8 +859,7 @@ /* * Check to see if we can communicate with vds */ - status = vdc_is_able_to_tx_data(vdc, flag); - if (status == B_FALSE) { + if (!vdc_is_able_to_tx_data(vdc, flag)) { PR0("%s[%d] Not ready to transmit data\n", __func__, instance); return (ENOLINK); } @@ -888,7 +894,7 @@ /* * Check to see if we can communicate with vds */ - if (vdc_is_able_to_tx_data(vdc, 0) == B_FALSE) { + if (!vdc_is_able_to_tx_data(vdc, 0)) { PR0("%s[%d] Not ready to transmit data\n", __func__, instance); return (ETIMEDOUT); } @@ -982,7 +988,7 @@ ASSERT(buf->b_bcount <= (vdc->max_xfer_sz * vdc->block_size)); - if (vdc_is_able_to_tx_data(vdc, O_NONBLOCK) == B_FALSE) { + if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { vdc_msg("%s: Not ready to transmit data", __func__); bioerror(buf, ENXIO); biodone(buf); @@ -1057,16 +1063,37 @@ vdc_init_handshake_negotiation(void *arg) { vdc_t *vdc = (vdc_t *)(void *)arg; + ldc_status_t ldc_state; vd_state_t state; + int status; ASSERT(vdc != NULL); - ASSERT(vdc->ldc_state == LDC_UP); + + PR0("[%d] Initializing vdc<->vds handshake\n", vdc->instance); + + /* get LDC state */ + status = ldc_status(vdc->ldc_handle, &ldc_state); + if (status != 0) { + cmn_err(CE_NOTE, "[%d] Couldn't get LDC status: err=%d", + vdc->instance, status); + return; + } + + /* + * If the LDC connection is not UP we bring it up now and return. + * The handshake will be started again when the callback is + * triggered due to the UP event. + */ + if (ldc_state != LDC_UP) { + PR0("[%d] Triggering an LDC_UP and returning\n", vdc->instance); + (void) vdc_do_ldc_up(vdc); + return; + } mutex_enter(&vdc->lock); - /* * Do not continue if another thread has triggered a handshake which - * is in progress or detach() has stopped further handshakes. + * has not been reset or detach() has stopped further handshakes. */ if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) { PR0("%s[%d] Negotiation not triggered. [init=%x]\n", @@ -1075,14 +1102,26 @@ return; } - PR0("Initializing vdc<->vds handshake\n"); + if (vdc->hshake_cnt++ > vdc_retries) { + cmn_err(CE_NOTE, "[%d] Failed repeatedly to complete handshake" + "with vDisk server", vdc->instance); + mutex_exit(&vdc->lock); + return; + } vdc->initialized |= VDC_HANDSHAKE; + vdc->ldc_state = ldc_state; state = vdc->state; if (state == VD_STATE_INIT) { - (void) vdc_init_ver_negotiation(vdc); + /* + * Set the desired version parameter to the first entry in the + * version array. If this specific version is not supported, + * the response handling code will step down the version number + * to the next array entry and deal with it accordingly. + */ + (void) vdc_init_ver_negotiation(vdc, vdc_version[0]); } else if (state == VD_STATE_VER) { (void) vdc_init_attr_negotiation(vdc); } else if (state == VD_STATE_ATTR) { @@ -1099,8 +1138,20 @@ mutex_exit(&vdc->lock); } +/* + * Function: + * vdc_init_ver_negotiation() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * 0 - Success + */ static int -vdc_init_ver_negotiation(vdc_t *vdc) +vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) { vio_ver_msg_t pkt; size_t msglen = sizeof (pkt); @@ -1122,10 +1173,10 @@ pkt.tag.vio_subtype_env = VIO_VER_INFO; pkt.tag.vio_sid = vdc->session_id; pkt.dev_class = VDEV_DISK; - pkt.ver_major = VD_VER_MAJOR; - pkt.ver_minor = VD_VER_MINOR; - - status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen); + pkt.ver_major = ver.major; + pkt.ver_minor = ver.minor; + + status = vdc_send(vdc, (caddr_t)&pkt, &msglen); PR0("%s: vdc_send(status = %d)\n", __func__, status); if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { @@ -1139,6 +1190,18 @@ return (status); } +/* + * Function: + * vdc_init_attr_negotiation() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * 0 - Success + */ static int vdc_init_attr_negotiation(vdc_t *vdc) { @@ -1164,7 +1227,7 @@ pkt.vdisk_type = 0; /* server will set to valid device type */ pkt.vdisk_size = 0; /* server will set to valid size */ - status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen); + status = vdc_send(vdc, (caddr_t)&pkt, &msglen); PR0("%s: vdc_send(status = %d)\n", __func__, status); if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { @@ -1178,6 +1241,18 @@ return (status); } +/* + * Function: + * vdc_init_dring_negotiate() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * 0 - Success + */ static int vdc_init_dring_negotiate(vdc_t *vdc) { @@ -1189,14 +1264,15 @@ ASSERT(mutex_owned(&vdc->lock)); status = vdc_init_descriptor_ring(vdc); - PR0("%s[%d] Init of descriptor ring completed (status = %d)\n", - __func__, vdc->instance, status); if (status != 0) { cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n", vdc->instance, status); + vdc_destroy_descriptor_ring(vdc); vdc_reset_connection(vdc, B_FALSE); return (status); } + PR0("%s[%d] Init of descriptor ring completed (status = %d)\n", + __func__, vdc->instance, status); /* fill in tag */ pkt.tag.vio_msgtype = VIO_TYPE_CTRL; @@ -1211,7 +1287,7 @@ pkt.ncookies = vdc->dring_cookie_count; pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ - status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen); + status = vdc_send(vdc, (caddr_t)&pkt, &msglen); if (status != 0) { PR0("%s[%d] Failed to register DRing (status = %d)\n", __func__, vdc->instance, status); @@ -1252,20 +1328,27 @@ * xxx - other error codes returned by ldc_write */ static int -vdc_send(ldc_handle_t ldc_handle, caddr_t pkt, size_t *msglen) +vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) { size_t size = 0; int retries = 0; int status = 0; + ASSERT(vdc != NULL); + ASSERT(mutex_owned(&vdc->lock)); ASSERT(msglen != NULL); ASSERT(*msglen != 0); do { size = *msglen; - status = ldc_write(ldc_handle, pkt, &size); + status = ldc_write(vdc->ldc_handle, pkt, &size); } while (status == EWOULDBLOCK && retries++ < vdc_retries); + /* if LDC had serious issues --- reset vdc state */ + if (status == EIO || status == ECONNRESET) { + vdc_reset_connection(vdc, B_TRUE); + } + /* return the last size written */ *msglen = size; @@ -1389,7 +1472,7 @@ } } - if (found_inst == B_FALSE) { + if (!found_inst) { cmn_err(CE_NOTE, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); status = ENOENT; @@ -1438,6 +1521,30 @@ return (status); } +static int +vdc_do_ldc_up(vdc_t *vdc) +{ + int status; + + PR0("[%d] Bringing up channel %x\n", vdc->instance, vdc->ldc_id); + + if ((status = ldc_up(vdc->ldc_handle)) != 0) { + switch (status) { + case ECONNREFUSED: /* listener not ready at other end */ + PR0("%s: ldc_up(%d,...) return %d\n", + __func__, vdc->ldc_id, status); + status = 0; + break; + default: + cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: " + "channel=%ld, err=%d", + vdc->instance, vdc->ldc_id, status); + } + } + + return (status); +} + /* * vdc_is_able_to_tx_data() @@ -1526,6 +1633,18 @@ } +/* + * Function: + * vdc_terminate_ldc() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * None + */ static void vdc_terminate_ldc(vdc_t *vdc) { @@ -1553,6 +1672,19 @@ vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); } +/* + * Function: + * vdc_reset_connection() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * reset_ldc - Flag whether or not to reset the LDC connection also. + * + * Return Code: + * None + */ static void vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc) { @@ -1565,7 +1697,7 @@ vdc->state = VD_STATE_INIT; - if (reset_ldc == B_TRUE) { + if (reset_ldc) { status = ldc_reset(vdc->ldc_handle); PR0("%s[%d] ldc_reset() = %d\n", __func__, vdc->instance, status); @@ -1581,41 +1713,63 @@ * Descriptor Ring helper routines */ +/* + * Function: + * vdc_init_descriptor_ring() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * 0 - Success + */ static int vdc_init_descriptor_ring(vdc_t *vdc) { vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ - int status = -1; + int status = 0; int i; - PR0("%s\n", __func__); + PR0("%s[%d] initialized=%x\n", + __func__, vdc->instance, vdc->initialized); ASSERT(vdc != NULL); ASSERT(mutex_owned(&vdc->lock)); ASSERT(vdc->ldc_handle != NULL); - status = ldc_mem_dring_create(VD_DRING_LEN, VD_DRING_ENTRY_SZ, - &vdc->ldc_dring_hdl); - if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { - PR0("%s: Failed to create a descriptor ring", __func__); - return (status); + if ((vdc->initialized & VDC_DRING_INIT) == 0) { + PR0("%s[%d] ldc_mem_dring_create\n", __func__, vdc->instance); + status = ldc_mem_dring_create(VD_DRING_LEN, VD_DRING_ENTRY_SZ, + &vdc->ldc_dring_hdl); + if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { + PR0("%s: Failed to create a descriptor ring", __func__); + return (status); + } + vdc->dring_entry_size = VD_DRING_ENTRY_SZ; + vdc->dring_len = VD_DRING_LEN; + vdc->initialized |= VDC_DRING_INIT; } - vdc->initialized |= VDC_DRING; - vdc->dring_entry_size = VD_DRING_ENTRY_SZ; - vdc->dring_len = VD_DRING_LEN; - - vdc->dring_cookie = kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); - - status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, - LDC_SHADOW_MAP, LDC_MEM_RW, &vdc->dring_cookie[0], - &vdc->dring_cookie_count); - if (status != 0) { - PR0("%s: Failed to bind descriptor ring (%p) to channel (%p)\n", - __func__, vdc->ldc_dring_hdl, vdc->ldc_handle); - return (status); + + if ((vdc->initialized & VDC_DRING_BOUND) == 0) { + PR0("%s[%d] ldc_mem_dring_bind\n", __func__, vdc->instance); + vdc->dring_cookie = + kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); + + status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, + LDC_SHADOW_MAP, LDC_MEM_RW, + &vdc->dring_cookie[0], + &vdc->dring_cookie_count); + if (status != 0) { + PR0("%s: Failed to bind descriptor ring (%p) " + "to channel (%p)\n", + __func__, vdc->ldc_dring_hdl, vdc->ldc_handle); + return (status); + } + ASSERT(vdc->dring_cookie_count == 1); + vdc->initialized |= VDC_DRING_BOUND; } - ASSERT(vdc->dring_cookie_count == 1); - vdc->initialized |= VDC_DRING_BOUND; status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); if (status != 0) { @@ -1624,15 +1778,21 @@ return (status); } - /* Allocate the local copy of this dring */ - vdc->local_dring = kmem_zalloc(VD_DRING_LEN * sizeof (vdc_local_desc_t), + if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { + PR0("%s[%d] local dring\n", __func__, vdc->instance); + + /* Allocate the local copy of this dring */ + vdc->local_dring = + kmem_zalloc(VD_DRING_LEN * sizeof (vdc_local_desc_t), KM_SLEEP); - vdc->initialized |= VDC_DRING_LOCAL; + vdc->initialized |= VDC_DRING_LOCAL; + } /* - * Mark all DRing entries as free and init priv desc memory handles - * If any entry is initialized, we need to free it later so we set - * the bit in 'initialized' at the start. + * Mark all DRing entries as free and initialize the private + * descriptor's memory handles. If any entry is initialized, + * we need to free it later so we set the bit in 'initialized' + * at the start. */ vdc->initialized |= VDC_DRING_ENTRY; for (i = 0; i < VD_DRING_LEN; i++) { @@ -1647,7 +1807,6 @@ return (status); } vdc->local_dring[i].flags = VIO_DESC_FREE; - vdc->local_dring[i].flags |= VDC_ALLOC_HANDLE; vdc->local_dring[i].dep = dep; mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL); @@ -1665,12 +1824,25 @@ return (status); } +/* + * Function: + * vdc_destroy_descriptor_ring() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * None + */ static void vdc_destroy_descriptor_ring(vdc_t *vdc) { + vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ ldc_mem_handle_t mhdl = NULL; - int status = -1; - int i; /* loop */ + int status = -1; + int i; /* loop */ ASSERT(vdc != NULL); ASSERT(mutex_owned(&vdc->lock)); @@ -1679,28 +1851,30 @@ PR0("%s: Entered\n", __func__); if (vdc->initialized & VDC_DRING_ENTRY) { + PR0("[%d] Removing Local DRing entries\n", vdc->instance); for (i = 0; i < VD_DRING_LEN; i++) { - mhdl = vdc->local_dring[i].desc_mhdl; - - if (vdc->local_dring[i].flags | VDC_ALLOC_HANDLE) - (void) ldc_mem_free_handle(mhdl); - - mutex_destroy(&vdc->local_dring[i].lock); - cv_destroy(&vdc->local_dring[i].cv); - - bzero(&vdc->local_dring[i].desc_mhdl, - sizeof (ldc_mem_handle_t)); + ldep = &vdc->local_dring[i]; + mhdl = ldep->desc_mhdl; + + if (mhdl == NULL) + continue; + + (void) ldc_mem_free_handle(mhdl); + mutex_destroy(&ldep->lock); + cv_destroy(&ldep->cv); } vdc->initialized &= ~VDC_DRING_ENTRY; } if (vdc->initialized & VDC_DRING_LOCAL) { + PR0("[%d] Freeing Local DRing\n", vdc->instance); kmem_free(vdc->local_dring, VD_DRING_LEN * sizeof (vdc_local_desc_t)); vdc->initialized &= ~VDC_DRING_LOCAL; } if (vdc->initialized & VDC_DRING_BOUND) { + PR0("[%d] Unbinding DRing\n", vdc->instance); status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); if (status == 0) { vdc->initialized &= ~VDC_DRING_BOUND; @@ -1711,6 +1885,7 @@ } if (vdc->initialized & VDC_DRING_INIT) { + PR0("[%d] Destroying DRing\n", vdc->instance); status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); if (status == 0) { vdc->ldc_dring_hdl = NULL; @@ -1718,7 +1893,7 @@ vdc->initialized &= ~VDC_DRING_INIT; } else { vdc_msg("%s: Failed to destroy Descriptor Ring (%lx)\n", - vdc->ldc_dring_hdl); + vdc->ldc_dring_hdl); } } } @@ -1801,7 +1976,7 @@ } else { vdc_msg("Public Descriptor Ring entry corrupted"); mutex_enter(&vdc->lock); - vdc_reset_connection(vdc, B_TRUE); + vdc_reset_connection(vdc, B_FALSE); mutex_exit(&vdc->lock); return (-1); } @@ -1861,7 +2036,7 @@ if (idx == -1) { mutex_exit(&vdc->dring_lock); vdc_msg("%s[%d]: no descriptor ring entry avail, seq=%d\n", - __func__, vdc->instance, vdc->seq_num); + __func__, vdc->instance, vdc->seq_num); /* * Since strategy should not block we don't wait for the DRing @@ -1939,7 +2114,9 @@ vdc->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num, dep->payload.req_id, dep); - status = vdc_send(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); + mutex_enter(&vdc->lock); + status = vdc_send(vdc, (caddr_t)&dmsg, &msglen); + mutex_exit(&vdc->lock); PR1("%s[%d]: ldc_write() status=%d\n", __func__, vdc->instance, status); if (status != 0) { mutex_exit(&local_dep->lock); @@ -1949,6 +2126,12 @@ } /* + * If the message was successfully sent, we increment the sequence + * number to be used by the next message + */ + vdc->seq_num++; + + /* * XXX - potential performance enhancement (Investigate at a later date) * * for calls from strategy(9E), instead of waiting for a response from @@ -2077,6 +2260,20 @@ return (status); } +/* + * Function: + * vdc_wait_for_descriptor_update() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * idx - Index of the Descriptor Ring entry being modified + * dmsg - LDC message sent by vDisk server + * + * Return Code: + * 0 - Success + */ static int vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg) { @@ -2084,10 +2281,11 @@ vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */ size_t msglen = sizeof (dmsg); int retries = 0; - int status = ENXIO; + int status = 0; int rv = 0; ASSERT(vdc != NULL); + ASSERT(mutex_owned(&vdc->dring_lock)); ASSERT(idx < VD_DRING_LEN); local_dep = &vdc->local_dring[idx]; ASSERT(local_dep != NULL); @@ -2131,14 +2329,20 @@ */ VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc); retries = 0; - status = vdc_send(vdc->ldc_handle, (caddr_t)&dmsg, - &msglen); + mutex_enter(&vdc->lock); + status = vdc_send(vdc, (caddr_t)&dmsg, &msglen); + mutex_exit(&vdc->lock); if (status != 0) { vdc_msg("%s: Error (%d) while resending after " "timeout\n", __func__, status); status = ETIMEDOUT; break; } + /* + * If the message was successfully sent, we increment + * the sequence number to be used by the next message. + */ + vdc->seq_num++; } } @@ -2194,6 +2398,19 @@ return (status); } +/* + * Function: + * vdc_depopulate_descriptor() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * idx - Index of the Descriptor Ring entry being modified + * + * Return Code: + * 0 - Success + */ static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) { @@ -2231,12 +2448,28 @@ status = ldc_mem_unbind_handle(ldep->desc_mhdl); if (status != 0) { cmn_err(CE_NOTE, "[%d] unbind mem hdl 0x%lx @ idx %d failed:%d", - vdc->instance, ldep->desc_mhdl, idx, status); + vdc->instance, ldep->desc_mhdl, idx, status); } return (status); } +/* + * Function: + * vdc_populate_mem_hdl() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * idx - Index of the Descriptor Ring entry being modified + * addr - virtual address being mapped in + * nybtes - number of bytes in 'addr' + * operation - the vDisk operation being performed (VD_OP_xxx) + * + * Return Code: + * 0 - Success + */ static int vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes, int operation) @@ -2306,7 +2539,7 @@ __func__, vdc->instance, mhdl, addr, idx, rv); if (ldep->align_addr) { kmem_free(ldep->align_addr, - sizeof (caddr_t) * dep->payload.nbytes); + sizeof (caddr_t) * dep->payload.nbytes); ldep->align_addr = NULL; } return (EAGAIN); @@ -2337,6 +2570,19 @@ * Interrupt handlers for messages from LDC */ +/* + * Function: + * vdc_handle_cb() + * + * Description: + * + * Arguments: + * event - Type of event (LDC_EVT_xxx) that triggered the callback + * arg - soft state pointer for this instance of the device driver. + * + * Return Code: + * 0 - Success + */ static uint_t vdc_handle_cb(uint64_t event, caddr_t arg) { @@ -2366,7 +2612,9 @@ if (rv != 0) { cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", vdc->instance, rv); + mutex_enter(&vdc->lock); vdc_reset_connection(vdc, B_TRUE); + mutex_exit(&vdc->lock); return (LDC_SUCCESS); } @@ -2376,7 +2624,7 @@ * server. */ mutex_enter(&vdc->lock); - vdc->seq_num = 0; + vdc->seq_num = 1; vdc->seq_num_reply = 0; vdc->ldc_state = ldc_state; ASSERT(ldc_state == LDC_UP); @@ -2404,6 +2652,20 @@ if (event & LDC_EVT_RESET) { PR0("%s[%d] Recvd LDC RESET event\n", __func__, vdc->instance); + + /* get LDC state */ + rv = ldc_status(vdc->ldc_handle, &ldc_state); + if (rv != 0) { + cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d", + vdc->instance, rv); + ldc_state = LDC_OPEN; + } + mutex_enter(&vdc->lock); + vdc->ldc_state = ldc_state; + vdc_reset_connection(vdc, B_FALSE); + mutex_exit(&vdc->lock); + + vdc_init_handshake_negotiation(vdc); } if (event & LDC_EVT_DOWN) { @@ -2418,9 +2680,8 @@ } mutex_enter(&vdc->lock); vdc->ldc_state = ldc_state; + vdc_reset_connection(vdc, B_TRUE); mutex_exit(&vdc->lock); - - vdc_reset_connection(vdc, B_TRUE); } if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) @@ -2437,6 +2698,18 @@ */ +/* + * Function: + * vdc_process_msg_thread() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * None + */ static void vdc_process_msg_thread(vdc_t *vdc) { @@ -2453,7 +2726,7 @@ while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) { PR1("%s[%d] Waiting\n", __func__, vdc->instance); - while (vdc->msg_pending == B_FALSE) + while (!vdc->msg_pending) cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock); PR1("%s[%d] Message Received\n", __func__, vdc->instance); @@ -2466,13 +2739,13 @@ " server. Cannot check LDC queue: %d", vdc->instance, status); mutex_enter(&vdc->lock); - vdc_reset_connection(vdc, B_TRUE); + vdc_reset_connection(vdc, B_FALSE); mutex_exit(&vdc->lock); vdc->msg_proc_thr_state = VDC_THR_STOP; continue; } - if (q_is_empty == B_FALSE) { + if (!q_is_empty) { PR1("%s: new pkt(s) available\n", __func__); vdc_process_msg(vdc); } @@ -2531,7 +2804,7 @@ /* if status is ECONNRESET --- reset vdc state */ if (status == EIO || status == ECONNRESET) { - vdc_reset_connection(vdc, B_FALSE); + vdc_reset_connection(vdc, B_TRUE); } mutex_exit(&vdc->lock); @@ -2565,8 +2838,8 @@ */ if ((vio_msg.tag.vio_sid != vdc->session_id) && (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) { - PR0("%s: Incorrect SID 0x%x msg 0x%lx, expected 0x%x\n", - __func__, vio_msg.tag.vio_sid, &vio_msg, + cmn_err(CE_NOTE, "[%d] Invalid SID 0x%x, expect 0x%lx", + vdc->instance, vio_msg.tag.vio_sid, vdc->session_id); vdc_reset_connection(vdc, B_FALSE); mutex_exit(&vdc->lock); @@ -2621,9 +2894,6 @@ static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg) { - size_t msglen = sizeof (msg); - vd_attr_msg_t *attr_msg = NULL; - vio_dring_reg_msg_t *dring_msg = NULL; int status = -1; ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL); @@ -2633,176 +2903,16 @@ /* Depending on which state we are in; process the message */ switch (vdc->state) { case VD_STATE_INIT: - if (msg.tag.vio_subtype_env != VIO_VER_INFO) { - status = EPROTO; - break; - } - - switch (msg.tag.vio_subtype) { - case VIO_SUBTYPE_ACK: - vdc->state = VD_STATE_VER; - status = vdc_init_attr_negotiation(vdc); - break; - case VIO_SUBTYPE_NACK: - /* - * For now there is only one version number so we - * cannot step back to an earlier version but in the - * future we may need to add further logic here - * to try negotiating an earlier version as the VIO - * design allow for it. - */ - - /* - * vds could not handle the version we sent so we just - * stop negotiating. - */ - status = EPROTO; - break; - - case VIO_SUBTYPE_INFO: - /* - * Handle the case where vds starts handshake - * (for now only vdc is the instigatior) - */ - status = ENOTSUP; - break; - - default: - status = ENOTSUP; - break; - } + status = vdc_handle_ver_msg(vdc, (vio_ver_msg_t *)&msg); break; case VD_STATE_VER: - if (msg.tag.vio_subtype_env != VIO_ATTR_INFO) { - status = EPROTO; - break; - } - - switch (msg.tag.vio_subtype) { - case VIO_SUBTYPE_ACK: - /* - * We now verify the attributes sent by vds. - */ - attr_msg = (vd_attr_msg_t *)&msg; - vdc->vdisk_size = attr_msg->vdisk_size; - vdc->vdisk_type = attr_msg->vdisk_type; - - if ((attr_msg->max_xfer_sz != vdc->max_xfer_sz) || - (attr_msg->vdisk_block_size != vdc->block_size)) { - /* - * Future support: step down to the block size - * and max transfer size suggested by the - * server. (If this value is less than 128K - * then multiple Dring entries per request - * would need to be implemented) - */ - cmn_err(CE_NOTE, "[%d] Couldn't process block " - "attrs from vds", vdc->instance); - status = EINVAL; - break; - } - - if ((attr_msg->xfer_mode != VIO_DRING_MODE) || - (attr_msg->vdisk_size > INT64_MAX) || - (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { - vdc_msg("%s[%d] Couldn't process attrs " - "from vds", __func__, vdc->instance); - status = EINVAL; - break; - } - - vdc->state = VD_STATE_ATTR; - status = vdc_init_dring_negotiate(vdc); - break; - - case VIO_SUBTYPE_NACK: - /* - * vds could not handle the attributes we sent so we - * stop negotiating. - */ - status = EPROTO; - break; - - case VIO_SUBTYPE_INFO: - /* - * Handle the case where vds starts the handshake - * (for now; vdc is the only supported instigatior) - */ - status = ENOTSUP; - break; - - default: - status = ENOTSUP; - break; - } + status = vdc_handle_attr_msg(vdc, (vd_attr_msg_t *)&msg); break; - case VD_STATE_ATTR: - if (msg.tag.vio_subtype_env != VIO_DRING_REG) { - status = EPROTO; - break; - } - - switch (msg.tag.vio_subtype) { - case VIO_SUBTYPE_ACK: - /* Verify that we have sent all the descr. ring info */ - /* nop for now as we have just 1 dring */ - dring_msg = (vio_dring_reg_msg_t *)&msg; - - /* save the received dring_ident */ - vdc->dring_ident = dring_msg->dring_ident; - PR0("%s[%d] Received dring ident=0x%lx\n", - __func__, vdc->instance, vdc->dring_ident); - - /* - * Send an RDX message to vds to indicate we are ready - * to send data - */ - msg.tag.vio_msgtype = VIO_TYPE_CTRL; - msg.tag.vio_subtype = VIO_SUBTYPE_INFO; - msg.tag.vio_subtype_env = VIO_RDX; - msg.tag.vio_sid = vdc->session_id; - status = vdc_send(vdc->ldc_handle, (caddr_t)&msg, - &msglen); - if (status != 0) { - cmn_err(CE_NOTE, "[%d] Failed to send RDX" - " message (%d)", vdc->instance, status); - break; - } - - status = vdc_create_fake_geometry(vdc); - if (status != 0) { - cmn_err(CE_NOTE, "[%d] Failed to create disk " - "geometery(%d)", vdc->instance, status); - break; - } - - vdc->state = VD_STATE_RDX; - break; - - case VIO_SUBTYPE_NACK: - /* - * vds could not handle the DRing info we sent so we - * stop negotiating. - */ - cmn_err(CE_CONT, "server could not register DRing\n"); - vdc_reset_connection(vdc, B_FALSE); - vdc_destroy_descriptor_ring(vdc); - status = EPROTO; - break; - - case VIO_SUBTYPE_INFO: - /* - * Handle the case where vds starts handshake - * (for now only vdc is the instigatior) - */ - status = ENOTSUP; - break; - default: - status = ENOTSUP; - } + status = vdc_handle_dring_reg_msg(vdc, + (vio_dring_reg_msg_t *)&msg); break; case VD_STATE_RDX: @@ -2813,15 +2923,18 @@ PR0("%s: Received RDX - handshake successful\n", __func__); + vdc->hshake_cnt = 0; /* reset failed handshake count */ status = 0; vdc->state = VD_STATE_DATA; cv_broadcast(&vdc->attach_cv); break; + case VD_STATE_DATA: default: - cmn_err(CE_NOTE, "[%d] unknown handshake negotiation state %d", + cmn_err(CE_NOTE, "[%d] Unexpected handshake state %d", vdc->instance, vdc->state); + status = EPROTO; break; } @@ -2834,7 +2947,7 @@ * vdc_process_data_msg() * * Description: - * This function is called by the message processing thread each time it + * This function is called by the message processing thread each time * a message with a msgtype of VIO_TYPE_DATA is received. It will either * be an ACK or NACK from vds[1] which vdc handles as follows. * ACK - wake up the waiting thread @@ -2858,7 +2971,6 @@ int status = 0; vdc_local_desc_t *local_dep = NULL; vio_dring_msg_t *dring_msg = NULL; - size_t msglen = sizeof (*dring_msg); uint_t num_msgs; uint_t start; uint_t end; @@ -2894,72 +3006,33 @@ /* * Verify that the sequence number is what vdc expects. */ - if (vdc_verify_seq_num(vdc, dring_msg, num_msgs) == B_FALSE) { + if (!vdc_verify_seq_num(vdc, dring_msg, num_msgs)) { return (ENXIO); } - switch (msg.tag.vio_subtype) { - case VIO_SUBTYPE_ACK: - PR2("%s: DATA ACK\n", __func__); - - /* - * Wake the thread waiting for each DRing entry ACK'ed - */ - for (i = 0; i < num_msgs; i++) { - int idx = (start + i) % VD_DRING_LEN; - - local_dep = &vdc->local_dring[idx]; - mutex_enter(&local_dep->lock); - cv_signal(&local_dep->cv); - mutex_exit(&local_dep->lock); - } - break; - - case VIO_SUBTYPE_NACK: + /* + * Wake the thread waiting for each DRing entry ACK'ed + */ + for (i = 0; i < num_msgs; i++) { + int idx = (start + i) % VD_DRING_LEN; + + local_dep = &vdc->local_dring[idx]; + mutex_enter(&local_dep->lock); + cv_signal(&local_dep->cv); + mutex_exit(&local_dep->lock); + } + + if (msg.tag.vio_subtype == VIO_SUBTYPE_NACK) { PR0("%s: DATA NACK\n", __func__); - dring_msg = (vio_dring_msg_t *)&msg; VDC_DUMP_DRING_MSG(dring_msg); - - /* Resend necessary messages */ - for (i = 0; i < num_msgs; i++) { - int idx = (start + i) % VD_DRING_LEN; - - local_dep = &vdc->local_dring[idx]; - ASSERT(local_dep != NULL); - mutex_enter(&local_dep->lock); - - if (local_dep->dep->hdr.dstate != VIO_DESC_READY) { - PR0("%s[%d]: Won't resend entry %d [flag=%d]\n", - __func__, vdc->instance, idx, - local_dep->dep->hdr.dstate); - mutex_exit(&local_dep->lock); - break; - } - - /* we'll reuse the message passed in */ - VIO_INIT_DRING_DATA_TAG(msg); - dring_msg->tag.vio_sid = vdc->session_id; - dring_msg->seq_num = ++(vdc->seq_num); - VDC_DUMP_DRING_MSG(dring_msg); - - status = vdc_send(vdc->ldc_handle, (caddr_t)&dring_msg, - &msglen); - PR1("%s: ldc_write() status=%d\n", __func__, status); - if (status != 0) { - vdc_msg("%s ldc_write(%d)\n", __func__, status); - mutex_exit(&local_dep->lock); - break; - } - - mutex_exit(&local_dep->lock); - } - break; - - case VIO_SUBTYPE_INFO: - default: - cmn_err(CE_NOTE, "[%d] Got an unexpected DATA msg [subtype %d]", - vdc->instance, msg.tag.vio_subtype); - break; + vdc_reset_connection(vdc, B_FALSE); + + /* we need to drop the lock to trigger the handshake */ + mutex_exit(&vdc->lock); + vdc_init_handshake_negotiation(vdc); + mutex_enter(&vdc->lock); + } else if (msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { + status = EPROTO; } return (status); @@ -2977,11 +3050,262 @@ _NOTE(ARGUNUSED(vdc)) _NOTE(ARGUNUSED(msg)) - int status = ENOTSUP; - ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance); + return (ENOTSUP); +} + +/* + * Function: + * vdc_handle_ver_msg() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * ver_msg - LDC message sent by vDisk server + * + * Return Code: + * 0 - Success + */ +static int +vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) +{ + int status = 0; + + ASSERT(vdc != NULL); + ASSERT(mutex_owned(&vdc->lock)); + + if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { + return (EPROTO); + } + + if (ver_msg->dev_class != VDEV_DISK_SERVER) { + return (EINVAL); + } + + switch (ver_msg->tag.vio_subtype) { + case VIO_SUBTYPE_ACK: + /* + * We check to see if the version returned is indeed supported + * (The server may have also adjusted the minor number downwards + * and if so 'ver_msg' will contain the actual version agreed) + */ + if (vdc_is_supported_version(ver_msg)) { + vdc->ver.major = ver_msg->ver_major; + vdc->ver.minor = ver_msg->ver_minor; + ASSERT(vdc->ver.major > 0); + + vdc->state = VD_STATE_VER; + status = vdc_init_attr_negotiation(vdc); + } else { + status = EPROTO; + } + break; + + case VIO_SUBTYPE_NACK: + /* + * call vdc_is_supported_version() which will return the next + * supported version (if any) in 'ver_msg' + */ + (void) vdc_is_supported_version(ver_msg); + if (ver_msg->ver_major > 0) { + size_t len = sizeof (*ver_msg); + + ASSERT(vdc->ver.major > 0); + + /* reset the necessary fields and resend */ + ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; + ver_msg->dev_class = VDEV_DISK; + + status = vdc_send(vdc, (caddr_t)ver_msg, &len); + PR0("[%d] Resend VER info (LDC status = %d)\n", + vdc->instance, status); + if (len != sizeof (*ver_msg)) + status = EBADMSG; + } else { + cmn_err(CE_NOTE, "[%d] No common version with " + "vDisk server", vdc->instance); + status = ENOTSUP; + } + + break; + case VIO_SUBTYPE_INFO: + /* + * Handle the case where vds starts handshake + * (for now only vdc is the instigatior) + */ + status = ENOTSUP; + break; + + default: + status = EINVAL; + break; + } + + return (status); +} + +/* + * Function: + * vdc_handle_attr_msg() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * attr_msg - LDC message sent by vDisk server + * + * Return Code: + * 0 - Success + */ +static int +vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) +{ + int status = 0; + + ASSERT(vdc != NULL); + ASSERT(mutex_owned(&vdc->lock)); + + if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { + return (EPROTO); + } + + switch (attr_msg->tag.vio_subtype) { + case VIO_SUBTYPE_ACK: + /* + * We now verify the attributes sent by vds. + */ + vdc->vdisk_size = attr_msg->vdisk_size; + vdc->vdisk_type = attr_msg->vdisk_type; + + if ((attr_msg->max_xfer_sz != vdc->max_xfer_sz) || + (attr_msg->vdisk_block_size != vdc->block_size)) { + /* + * Future support: step down to the block size + * and max transfer size suggested by the + * server. (If this value is less than 128K + * then multiple Dring entries per request + * would need to be implemented) + */ + cmn_err(CE_NOTE, "[%d] Couldn't process block " + "attributes from vds", vdc->instance); + status = EINVAL; + break; + } + + if ((attr_msg->xfer_mode != VIO_DRING_MODE) || + (attr_msg->vdisk_size > INT64_MAX) || + (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { + vdc_msg("%s[%d] Couldn't process attrs " + "from vds", __func__, vdc->instance); + status = EINVAL; + break; + } + + vdc->state = VD_STATE_ATTR; + status = vdc_init_dring_negotiate(vdc); + break; + + case VIO_SUBTYPE_NACK: + /* + * vds could not handle the attributes we sent so we + * stop negotiating. + */ + status = EPROTO; + break; + + case VIO_SUBTYPE_INFO: + /* + * Handle the case where vds starts the handshake + * (for now; vdc is the only supported instigatior) + */ + status = ENOTSUP; + break; + + default: + status = ENOTSUP; + break; + } + + return (status); +} + +/* + * Function: + * vdc_handle_dring_reg_msg() + * + * Description: + * + * Arguments: + * vdc - soft state pointer for this instance of the driver. + * dring_msg - LDC message sent by vDisk server + * + * Return Code: + * 0 - Success + */ +static int +vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) +{ + int status = 0; + vio_rdx_msg_t msg = {0}; + size_t msglen = sizeof (msg); + + ASSERT(vdc != NULL); + ASSERT(mutex_owned(&vdc->lock)); + + if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { + return (EPROTO); + } + + switch (dring_msg->tag.vio_subtype) { + case VIO_SUBTYPE_ACK: + /* save the received dring_ident */ + vdc->dring_ident = dring_msg->dring_ident; + PR0("%s[%d] Received dring ident=0x%lx\n", + __func__, vdc->instance, vdc->dring_ident); + + /* + * Send an RDX message to vds to indicate we are ready + * to send data + */ + msg.tag.vio_msgtype = VIO_TYPE_CTRL; + msg.tag.vio_subtype = VIO_SUBTYPE_INFO; + msg.tag.vio_subtype_env = VIO_RDX; + msg.tag.vio_sid = vdc->session_id; + status = vdc_send(vdc, (caddr_t)&msg, &msglen); + if (status != 0) { + cmn_err(CE_NOTE, "[%d] Failed to send RDX" + " message (%d)", vdc->instance, status); + break; + } + + vdc->state = VD_STATE_RDX; + break; + + case VIO_SUBTYPE_NACK: + /* + * vds could not handle the DRing info we sent so we + * stop negotiating. + */ + cmn_err(CE_CONT, "server could not register DRing\n"); + vdc_reset_connection(vdc, B_FALSE); + vdc_destroy_descriptor_ring(vdc); + status = EPROTO; + break; + + case VIO_SUBTYPE_INFO: + /* + * Handle the case where vds starts handshake + * (for now only vdc is the instigatior) + */ + status = ENOTSUP; + break; + default: + status = ENOTSUP; + } + return (status); } @@ -3035,13 +3359,85 @@ return (B_TRUE); } + +/* + * Function: + * vdc_is_supported_version() + * + * Description: + * This routine checks if the major/minor version numbers specified in + * 'ver_msg' are supported. If not it finds the next version that is + * in the supported version list 'vdc_version[]' and sets the fields in + * 'ver_msg' to those values + * + * Arguments: + * ver_msg - LDC message sent by vDisk server + * + * Return Code: + * B_TRUE - Success + * B_FALSE - Version not supported + */ +static boolean_t +vdc_is_supported_version(vio_ver_msg_t *ver_msg) +{ + int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); + + for (int i = 0; i < vdc_num_versions; i++) { + ASSERT(vdc_version[i].major > 0); + ASSERT((i == 0) || + (vdc_version[i].major < vdc_version[i-1].major)); + + /* + * If the major versions match, adjust the minor version, if + * necessary, down to the highest value supported by this + * client. The server should support all minor versions lower + * than the value it sent + */ + if (ver_msg->ver_major == vdc_version[i].major) { + if (ver_msg->ver_minor > vdc_version[i].minor) { + PR0("Adjusting minor version from %u to %u", + ver_msg->ver_minor, vdc_version[i].minor); + ver_msg->ver_minor = vdc_version[i].minor; + } + return (B_TRUE); + } + + /* + * If the message contains a higher major version number, set + * the message's major/minor versions to the current values + * and return false, so this message will get resent with + * these values, and the server will potentially try again + * with the same or a lower version + */ + if (ver_msg->ver_major > vdc_version[i].major) { + ver_msg->ver_major = vdc_version[i].major; + ver_msg->ver_minor = vdc_version[i].minor; + PR0("Suggesting major/minor (0x%x/0x%x)\n", + ver_msg->ver_major, ver_msg->ver_minor); + + return (B_FALSE); + } + + /* + * Otherwise, the message's major version is less than the + * current major version, so continue the loop to the next + * (lower) supported version + */ + } + + /* + * No common version was found; "ground" the version pair in the + * message to terminate negotiation + */ + ver_msg->ver_major = 0; + ver_msg->ver_minor = 0; + + return (B_FALSE); +} /* -------------------------------------------------------------------------- */ /* * DKIO(7) support - * - * XXX FIXME - needs to be converted to use the structures defined in the - * latest VIO spec to communicate with the vDisk server. */ typedef struct vdc_dk_arg { @@ -3105,53 +3501,52 @@ mutex_exit(&vdc->lock); } - /* * This structure is used in the DKIO(7I) array below. */ typedef struct vdc_dk_ioctl { uint8_t op; /* VD_OP_XXX value */ int cmd; /* Solaris ioctl operation number */ - uint8_t copy; /* copyin and/or copyout needed ? */ size_t nbytes; /* size of structure to be copied */ - size_t nbytes32; /* size of 32bit struct if different */ - /* to 64bit struct (zero otherwise) */ + + /* function to convert between vDisk and Solaris structure formats */ + int (*convert)(void *vd_buf, void *ioctl_arg, int mode, int dir); } vdc_dk_ioctl_t; /* * Subset of DKIO(7I) operations currently supported */ static vdc_dk_ioctl_t dk_ioctl[] = { - {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, - 0, 0}, - {VD_OP_GET_WCE, DKIOCGETWCE, 0, - 0, 0}, - {VD_OP_SET_WCE, DKIOCSETWCE, 0, - 0, 0}, - {VD_OP_GET_VTOC, DKIOCGVTOC, VD_COPYOUT, - sizeof (struct vtoc), sizeof (struct vtoc32)}, - {VD_OP_SET_VTOC, DKIOCSVTOC, VD_COPYIN, - sizeof (struct vtoc), sizeof (struct vtoc32)}, - {VD_OP_SET_DISKGEOM, DKIOCSGEOM, VD_COPYIN, - sizeof (struct dk_geom), 0}, - {VD_OP_GET_DISKGEOM, DKIOCGGEOM, VD_COPYOUT, - sizeof (struct dk_geom), 0}, - {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, VD_COPYOUT, - sizeof (struct dk_geom), 0}, - {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, VD_COPYOUT, - sizeof (struct dk_geom), 0}, - {VD_OP_SET_DISKGEOM, DKIOCSGEOM, VD_COPYOUT, - sizeof (struct dk_geom), 0}, - {VD_OP_SCSICMD, USCSICMD, VD_COPYIN|VD_COPYOUT, - sizeof (struct uscsi_cmd), sizeof (struct uscsi_cmd32)}, - {0, DKIOCINFO, VD_COPYOUT, - sizeof (struct dk_cinfo), 0}, - {0, DKIOCGMEDIAINFO, VD_COPYOUT, - sizeof (struct dk_minfo), 0}, - {0, DKIOCREMOVABLE, 0, - 0, 0}, - {0, CDROMREADOFFSET, 0, - 0, 0} + {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), + vdc_null_copy_func}, + {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), + vdc_null_copy_func}, + {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), + vdc_null_copy_func}, + {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), + vdc_get_vtoc_convert}, + {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), + vdc_set_vtoc_convert}, + {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), + vdc_get_geom_convert}, + {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), + vdc_get_geom_convert}, + {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), + vdc_get_geom_convert}, + {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), + vdc_get_geom_convert}, + {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), + vdc_set_geom_convert}, + + /* + * These particular ioctls are not sent to the server - vdc fakes up + * the necessary info. + */ + {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, + {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, + {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, + {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, + {0, CDROMREADOFFSET, 0, vdc_null_copy_func} }; /* @@ -3159,8 +3554,7 @@ * vd_process_ioctl() * * Description: - * This routine is the driver entry point for handling user - * requests to get the device geometry. + * This routine processes disk specific ioctl calls * * Arguments: * dev - the device number @@ -3168,13 +3562,6 @@ * arg - pointer to user provided structure * (contains data to be set or reference parameter for get) * mode - bit flag, indicating open settings, 32/64 bit type, etc - * rvalp - calling process return value, used in some ioctl calls - * (passed throught to vds who fills in the value) - * - * Assumptions: - * vds will make the ioctl calls in the 64 bit address space so vdc - * will convert the data to/from 32 bit as necessary before doing - * the copyin or copyout. * * Return Code: * 0 @@ -3188,14 +3575,11 @@ { int instance = SDUNIT(getminor(dev)); vdc_t *vdc = NULL; - int op = -1; /* VD_OP_XXX value */ int rv = -1; int idx = 0; /* index into dk_ioctl[] */ size_t len = 0; /* #bytes to send to vds */ size_t alloc_len = 0; /* #bytes to allocate mem for */ - size_t copy_len = 0; /* #bytes to copy in/out */ caddr_t mem_p = NULL; - boolean_t do_convert_32to64 = B_FALSE; size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); PR0("%s: Processing ioctl(%x) for dev %x : model %x\n", @@ -3211,8 +3595,7 @@ /* * Check to see if we can communicate with the vDisk server */ - rv = vdc_is_able_to_tx_data(vdc, O_NONBLOCK); - if (rv == B_FALSE) { + if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) { PR0("%s[%d] Not ready to transmit data\n", __func__, instance); return (ENOLINK); } @@ -3234,29 +3617,16 @@ return (ENOTSUP); } - copy_len = len = dk_ioctl[idx].nbytes; - op = dk_ioctl[idx].op; + len = dk_ioctl[idx].nbytes; /* - * Some ioctl operations have different sized structures for 32 bit - * and 64 bit. If the userland caller is 32 bit, we need to check - * to see if the operation is one of those special cases and - * flag that we need to convert to and/or from 32 bit since vds - * will make the call as 64 bit. - */ - if ((ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) && - (dk_ioctl[idx].nbytes != 0) && - (dk_ioctl[idx].nbytes32 != 0)) { - do_convert_32to64 = B_TRUE; - copy_len = dk_ioctl[idx].nbytes32; - } - - /* - * Deal with the ioctls which the server does not provide. + * Deal with the ioctls which the server does not provide. vdc can + * fake these up and return immediately */ switch (cmd) { case CDROMREADOFFSET: case DKIOCREMOVABLE: + case USCSICMD: return (ENOTTY); case DKIOCINFO: @@ -3289,73 +3659,29 @@ } /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ - ASSERT(op != 0); + ASSERT(dk_ioctl[idx].op != 0); /* LDC requires that the memory being mapped is 8-byte aligned */ alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); PR1("%s[%d]: struct size %d alloc %d\n", __func__, instance, len, alloc_len); - if (alloc_len != 0) - mem_p = kmem_zalloc(alloc_len, KM_SLEEP); - - if (dk_ioctl[idx].copy & VD_COPYIN) { - if (arg == NULL) { - if (mem_p != NULL) - kmem_free(mem_p, alloc_len); - return (EINVAL); - } - - ASSERT(copy_len != 0); - - rv = ddi_copyin((void *)arg, mem_p, copy_len, mode); - if (rv != 0) { - if (mem_p != NULL) - kmem_free(mem_p, alloc_len); - return (EFAULT); - } - - /* - * some operations need the data to be converted from 32 bit - * to 64 bit structures so that vds can process them on the - * other side. - */ - if (do_convert_32to64) { - switch (cmd) { - case DKIOCSVTOC: - { - struct vtoc vt; - struct vtoc32 vt32; - - ASSERT(mem_p != NULL); - vt32 = *((struct vtoc32 *)(mem_p)); - - vtoc32tovtoc(vt32, vt); - bcopy(&vt, mem_p, len); - break; - } - - case USCSICMD: - { - struct uscsi_cmd scmd; - struct uscsi_cmd *uscmd = &scmd; - struct uscsi_cmd32 *uscmd32; - - ASSERT(mem_p != NULL); - uscmd32 = (struct uscsi_cmd32 *)mem_p; - - /* - * Convert the ILP32 uscsi data from the - * application to LP64 for internal use. - */ - uscsi_cmd32touscsi_cmd(uscmd32, uscmd); - bcopy(uscmd, mem_p, len); - break; - } - default: - break; - } - } + ASSERT(alloc_len != 0); /* sanity check */ + mem_p = kmem_zalloc(alloc_len, KM_SLEEP); + + /* + * Call the conversion function for this ioctl whhich if necessary + * converts from the Solaris format to the format ARC'ed + * as part of the vDisk protocol (FWARC 2006/195) + */ + ASSERT(dk_ioctl[idx].convert != NULL); + rv = (dk_ioctl[idx].convert)(arg, mem_p, mode, VD_COPYIN); + if (rv != 0) { + PR0("%s[%d]: convert returned %d for ioctl 0x%x\n", + __func__, instance, rv, cmd); + if (mem_p != NULL) + kmem_free(mem_p, alloc_len); + return (rv); } /* @@ -3399,8 +3725,8 @@ /* * send request to vds to service the ioctl. */ - rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, op, mode, - SDPART((getminor(dev)))); + rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, dk_ioctl[idx].op, + mode, SDPART((getminor(dev)))); if (rv != 0) { /* * This is not necessarily an error. The ioctl could @@ -3429,72 +3755,18 @@ } /* - * if we don't have to do a copyout, we have nothing left to do - * so we just return. + * Call the conversion function (if it exists) for this ioctl + * which converts from the format ARC'ed as part of the vDisk + * protocol (FWARC 2006/195) back to a format understood by + * the rest of Solaris. */ - if ((dk_ioctl[idx].copy & VD_COPYOUT) == 0) { + rv = (dk_ioctl[idx].convert)(mem_p, arg, mode, VD_COPYOUT); + if (rv != 0) { + PR0("%s[%d]: convert returned %d for ioctl 0x%x\n", + __func__, instance, rv, cmd); if (mem_p != NULL) kmem_free(mem_p, alloc_len); - return (0); - } - - /* sanity check */ - if (mem_p == NULL) - return (EFAULT); - - - /* - * some operations need the data to be converted from 64 bit - * back to 32 bit structures after vds has processed them. - */ - if (do_convert_32to64) { - switch (cmd) { - case DKIOCGVTOC: - { - struct vtoc vt; - struct vtoc32 vt32; - - ASSERT(mem_p != NULL); - vt = *((struct vtoc *)(mem_p)); - - vtoctovtoc32(vt, vt32); - bcopy(&vt32, mem_p, copy_len); - break; - } - - case USCSICMD: - { - struct uscsi_cmd32 *uc32; - struct uscsi_cmd *uc; - - len = sizeof (struct uscsi_cmd32); - - ASSERT(mem_p != NULL); - uc = (struct uscsi_cmd *)mem_p; - uc32 = kmem_zalloc(len, KM_SLEEP); - - uscsi_cmdtouscsi_cmd32(uc, uc32); - bcopy(uc32, mem_p, copy_len); - PR0("%s[%d]: uscsi_cmd32:%x\n", __func__, instance, - ((struct uscsi_cmd32 *)mem_p)->uscsi_cdblen); - kmem_free(uc32, len); - break; - } - default: - PR1("%s[%d]: This mode (%x) should just work for(%x)\n", - __func__, instance, mode, cmd); - break; - } - } - - ASSERT(len != 0); - ASSERT(mem_p != NULL); - - rv = ddi_copyout(mem_p, (void *)arg, copy_len, mode); - if (rv != 0) { - vdc_msg("%s[%d]: Could not do copy out for ioctl (%x)\n", - __func__, instance, cmd); - rv = EFAULT; + return (rv); } if (mem_p != NULL) @@ -3505,6 +3777,224 @@ /* * Function: + * + * Description: + * This is an empty conversion function used by ioctl calls which + * do not need to convert the data being passed in/out to userland + */ +static int +vdc_null_copy_func(void *from, void *to, int mode, int dir) +{ + _NOTE(ARGUNUSED(from)) + _NOTE(ARGUNUSED(to)) + _NOTE(ARGUNUSED(mode)) + _NOTE(ARGUNUSED(dir)) + + return (0); +} + +/* + * Function: + * vdc_get_vtoc_convert() + * + * Description: + * This routine fakes up the disk info needed for some DKIO ioctls. + * + * Arguments: + * from - the buffer containing the data to be copied from + * to - the buffer to be copied to + * mode - flags passed to ioctl() call + * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT + * + * Return Code: + * 0 - Success + * ENXIO - incorrect buffer passed in. + * EFAULT - ddi_copyxxx routine encountered an error. + */ +static int +vdc_get_vtoc_convert(void *from, void *to, int mode, int dir) +{ + void *tmp_mem = NULL; + void *tmp_memp; + struct vtoc vt; + struct vtoc32 vt32; + int copy_len = 0; + int rv = 0; + + if (dir != VD_COPYOUT) + return (0); /* nothing to do */ + + if ((from == NULL) || (to == NULL)) + return (ENXIO); + + if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) + copy_len = sizeof (struct vtoc32); + else + copy_len = sizeof (struct vtoc); + + tmp_mem = kmem_alloc(copy_len, KM_SLEEP); + + VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); + if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { + vtoctovtoc32(vt, vt32); + tmp_memp = &vt32; + } else { + tmp_memp = &vt; + } + rv = ddi_copyout(tmp_memp, to, copy_len, mode); + if (rv != 0) + rv = EFAULT; + + kmem_free(tmp_mem, copy_len); + return (rv); +} + +/* + * Function: + * vdc_set_vtoc_convert() + * + * Description: + * + * Arguments: + * from - Buffer with data + * to - Buffer where data is to be copied to + * mode - flags passed to ioctl + * dir - direction of copy (in or out) + * + * Return Code: + * 0 - Success + * ENXIO - Invalid buffer passed in + * EFAULT - ddi_copyin of data failed + */ +static int +vdc_set_vtoc_convert(void *from, void *to, int mode, int dir) +{ + void *tmp_mem = NULL; + struct vtoc vt; + struct vtoc *vtp = &vt; + vd_vtoc_t vtvd; + int copy_len = 0; + int rv = 0; + + if (dir != VD_COPYIN) + return (0); /* nothing to do */ + + if ((from == NULL) || (to == NULL)) + return (ENXIO); + + if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) + copy_len = sizeof (struct vtoc32); + else + copy_len = sizeof (struct vtoc); + + tmp_mem = kmem_alloc(copy_len, KM_SLEEP); + + rv = ddi_copyin(from, tmp_mem, copy_len, mode); + if (rv != 0) { + kmem_free(tmp_mem, copy_len); + return (EFAULT); + } + + if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { + vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); + } else { + vtp = tmp_mem; + } + + VTOC2VD_VTOC(vtp, &vtvd); + bcopy(&vtvd, to, sizeof (vd_vtoc_t)); + kmem_free(tmp_mem, copy_len); + + return (0); +} + +/* + * Function: + * vdc_get_geom_convert() + * + * Description: + * + * Arguments: + * from - Buffer with data + * to - Buffer where data is to be copied to + * mode - flags passed to ioctl + * dir - direction of copy (in or out) + * + * Return Code: + * 0 - Success + * ENXIO - Invalid buffer passed in + * EFAULT - ddi_copyin of data failed + */ +static int +vdc_get_geom_convert(void *from, void *to, int mode, int dir) +{ + struct dk_geom geom; + int copy_len = sizeof (struct dk_geom); + int rv = 0; + + if (dir != VD_COPYOUT) + return (0); /* nothing to do */ + + if ((from == NULL) || (to == NULL)) + return (ENXIO); + + VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); + rv = ddi_copyout(&geom, to, copy_len, mode); + if (rv != 0) + rv = EFAULT; + + return (rv); +} + +/* + * Function: + * vdc_set_geom_convert() + * + * Description: + * This routine performs the necessary convertions from the DKIOCSVTOC + * Solaris structure to the format defined in FWARC 2006/195 + * + * Arguments: + * from - Buffer with data + * to - Buffer where data is to be copied to + * mode - flags passed to ioctl + * dir - direction of copy (in or out) + * + * Return Code: + * 0 - Success + * ENXIO - Invalid buffer passed in + * EFAULT - ddi_copyin of data failed + */ +static int +vdc_set_geom_convert(void *from, void *to, int mode, int dir) +{ + vd_geom_t vdgeom; + void *tmp_mem = NULL; + int copy_len = sizeof (struct dk_geom); + int rv = 0; + + if (dir != VD_COPYIN) + return (0); /* nothing to do */ + + if ((from == NULL) || (to == NULL)) + return (ENXIO); + + tmp_mem = kmem_alloc(copy_len, KM_SLEEP); + + rv = ddi_copyin(from, tmp_mem, copy_len, mode); + if (rv != 0) { + kmem_free(tmp_mem, copy_len); + return (EFAULT); + } + DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); + bcopy(&vdgeom, to, sizeof (vdgeom)); + kmem_free(tmp_mem, copy_len); + + return (0); +} + +/* + * Function: * vdc_create_fake_geometry() * * Description: @@ -3523,6 +4013,8 @@ static int vdc_create_fake_geometry(vdc_t *vdc) { + int rv = 0; + ASSERT(vdc != NULL); /* @@ -3551,10 +4043,76 @@ /* * DKIOCGMEDIAINFO support */ - vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); + if (vdc->minfo == NULL) + vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); vdc->minfo->dki_media_type = DK_FIXED_DISK; vdc->minfo->dki_capacity = 1; vdc->minfo->dki_lbsize = DEV_BSIZE; - return (0); + return (rv); } + +/* + * Function: + * vdc_setup_disk_layout() + * + * Description: + * This routine discovers all the necessary details about the "disk" + * by requesting the data that is available from the vDisk server and by + * faking up the rest of the data. + * + * Arguments: + * vdc - soft state pointer for this instance of the device driver. + * + * Return Code: + * 0 - Success + */ +static int +vdc_setup_disk_layout(vdc_t *vdc) +{ + dev_t dev; + int slice = 0; + int rv; + + ASSERT(vdc != NULL); + + rv = vdc_create_fake_geometry(vdc); + if (rv != 0) { + cmn_err(CE_NOTE, "[%d] Failed to create disk geometry (err%d)", + vdc->instance, rv); + } + + if (vdc->vtoc == NULL) + vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); + + dev = makedevice(ddi_driver_major(vdc->dip), + VD_MAKE_DEV(vdc->instance, 0)); + rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); + if (rv) { + cmn_err(CE_NOTE, "[%d] Failed to get VTOC (err=%d)", + vdc->instance, rv); + return (rv); + } + + /* + * Read disk label from start of disk + */ + vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); + + /* + * find the slice that represents the entire "disk" and use that to + * read the disk label. The convention in Solaris is that slice 2 + * represents the whole disk so we check that it is otherwise we + * default to slice 0 + */ + if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && + (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { + slice = 2; + } else { + slice = 0; + } + rv = vdc_populate_descriptor(vdc, (caddr_t)vdc->label, DK_LABEL_SIZE, + VD_OP_BREAD, 0, slice); + + return (rv); +}
--- a/usr/src/uts/sun4v/io/vds.c Mon May 22 04:05:00 2006 -0700 +++ b/usr/src/uts/sun4v/io/vds.c Mon May 22 13:24:32 2006 -0700 @@ -47,7 +47,6 @@ #include <sys/vdsk_mailbox.h> #include <sys/vdsk_common.h> #include <sys/vtoc.h> -#include <sys/scsi/impl/uscsi.h> /* Virtual disk server initialization flags */ @@ -178,14 +177,21 @@ int (*function)(vd_t *vd, vd_dring_payload_t *request); } vds_operation_t; -typedef struct ioctl { - uint8_t operation; - const char *operation_name; - int cmd; - const char *cmd_name; - uint_t copy; - size_t nbytes; -} ioctl_t; +typedef struct vd_ioctl { + uint8_t operation; /* vdisk operation */ + const char *operation_name; /* vdisk operation name */ + size_t nbytes; /* size of operation buffer */ + int cmd; /* corresponding ioctl cmd */ + const char *cmd_name; /* ioctl cmd name */ + void *arg; /* ioctl cmd argument */ + /* convert input vd_buf to output ioctl_arg */ + void (*copyin)(void *vd_buf, void *ioctl_arg); + /* convert input ioctl_arg to output vd_buf */ + void (*copyout)(void *ioctl_arg, void *vd_buf); +} vd_ioctl_t; + +/* Define trivial copyin/copyout conversion function flag */ +#define VD_IDENTITY ((void (*)(void *, void *))-1) static int vds_ldc_retries = VDS_LDC_RETRIES; @@ -194,6 +200,17 @@ static int vd_open_flags = VD_OPEN_FLAGS; +/* + * Supported protocol version pairs, from highest (newest) to lowest (oldest) + * + * Each supported major version should appear only once, paired with (and only + * with) its highest supported minor version number (as the protocol requires + * supporting all lower minor version numbers as well) + */ +static const vio_ver_t vds_version[] = {{1, 0}}; +static const size_t vds_num_versions = + sizeof (vds_version)/sizeof (vds_version[0]); + #ifdef DEBUG static int vd_msglevel; #endif /* DEBUG */ @@ -282,17 +299,41 @@ return (status); } +static void +vd_geom2dk_geom(void *vd_buf, void *ioctl_arg) +{ + VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); +} + +static void +vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg) +{ + VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); +} + +static void +dk_geom2vd_geom(void *ioctl_arg, void *vd_buf) +{ + DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf); +} + +static void +vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) +{ + VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); +} + static int -vd_do_slice_ioctl(vd_t *vd, int cmd, void *buf) +vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) { switch (cmd) { case DKIOCGGEOM: - ASSERT(buf != NULL); - bcopy(&vd->dk_geom, buf, sizeof (vd->dk_geom)); + ASSERT(ioctl_arg != NULL); + bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom)); return (0); case DKIOCGVTOC: - ASSERT(buf != NULL); - bcopy(&vd->vtoc, buf, sizeof (vd->vtoc)); + ASSERT(ioctl_arg != NULL); + bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc)); return (0); default: return (ENOTSUP); @@ -300,7 +341,7 @@ } static int -vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, ioctl_t *ioctl) +vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) { int rval = 0, status; size_t nbytes = request->nbytes; /* modifiable copy */ @@ -310,8 +351,8 @@ ASSERT(request->slice < vd->nslices); PR0("Performing %s", ioctl->operation_name); - /* Get data from client, if necessary */ - if (ioctl->copy & VD_COPYIN) { + /* Get data from client and convert, if necessary */ + if (ioctl->copyin != NULL) { ASSERT(nbytes != 0 && buf != NULL); PR1("Getting \"arg\" data from client"); if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, @@ -321,6 +362,12 @@ "copying from client", status); return (status); } + + /* Convert client's data, if necessary */ + if (ioctl->copyin == VD_IDENTITY) /* use client buffer */ + ioctl->arg = buf; + else /* convert client vdisk operation data to ioctl data */ + (ioctl->copyin)(buf, (void *)ioctl->arg); } /* @@ -328,10 +375,12 @@ * real driver perform the ioctl() */ if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) { - if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, buf)) != 0) + if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, + (void *)ioctl->arg)) != 0) return (status); } else if ((status = ldi_ioctl(vd->ldi_handle[request->slice], - ioctl->cmd, (intptr_t)buf, FKIOCTL, kcred, &rval)) != 0) { + ioctl->cmd, (intptr_t)ioctl->arg, FKIOCTL, kcred, + &rval)) != 0) { PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status); return (status); } @@ -342,10 +391,15 @@ } #endif /* DEBUG */ - /* Send data to client, if necessary */ - if (ioctl->copy & VD_COPYOUT) { + /* Convert data and send to client, if necessary */ + if (ioctl->copyout != NULL) { ASSERT(nbytes != 0 && buf != NULL); PR1("Sending \"arg\" data to client"); + + /* Convert ioctl data to vdisk operation data, if necessary */ + if (ioctl->copyout != VD_IDENTITY) + (ioctl->copyout)((void *)ioctl->arg, buf); + if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, request->cookie, request->ncookies, LDC_COPY_OUT)) != 0) { @@ -358,39 +412,113 @@ return (status); } +/* + * Open any slices which have become non-empty as a result of performing a + * set-VTOC operation for the client. + * + * When serving a full disk, vds attempts to exclusively open all of the + * disk's slices to prevent another thread or process in the service domain + * from "stealing" a slice or from performing I/O to a slice while a vds + * client is accessing it. Unfortunately, underlying drivers, such as sd(7d) + * and cmdk(7d), return an error when attempting to open the device file for a + * slice which is currently empty according to the VTOC. This driver behavior + * means that vds must skip opening empty slices when initializing a vdisk for + * full-disk service and try to open slices that become non-empty (via a + * set-VTOC operation) during use of the full disk in order to begin serving + * such slices to the client. This approach has an inherent (and therefore + * unavoidable) race condition; it also means that failure to open a + * newly-non-empty slice has different semantics than failure to open an + * initially-non-empty slice: Due to driver bahavior, opening a + * newly-non-empty slice is a necessary side effect of vds performing a + * (successful) set-VTOC operation for a client on an in-service (and in-use) + * disk in order to begin serving the slice; failure of this side-effect + * operation does not mean that the client's set-VTOC operation failed or that + * operations on other slices must fail. Therefore, this function prints an + * error message on failure to open a slice, but does not return an error to + * its caller--unlike failure to open a slice initially, which results in an + * error that prevents serving the vdisk (and thereby requires an + * administrator to resolve the problem). Note that, apart from another + * thread or process opening a new slice during the race-condition window, + * failure to open a slice in this function will likely indicate an underlying + * drive problem, which will also likely become evident in errors returned by + * operations on other slices, and which will require administrative + * intervention and possibly servicing the drive. + */ +static void +vd_open_new_slices(vd_t *vd) +{ + int rval, status; + struct vtoc vtoc; + + + /* Get the (new) VTOC for updated slice sizes */ + if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, (intptr_t)&vtoc, + FKIOCTL, kcred, &rval)) != 0) { + PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d", status); + return; + } + + /* Open any newly-non-empty slices */ + for (int slice = 0; slice < vd->nslices; slice++) { + /* Skip zero-length slices */ + if (vtoc.v_part[slice].p_size == 0) { + if (vd->ldi_handle[slice] != NULL) + PR0("Open slice %u now has zero length", slice); + continue; + } + + /* Skip already-open slices */ + if (vd->ldi_handle[slice] != NULL) + continue; + + PR0("Opening newly-non-empty slice %u", slice); + if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, + vd_open_flags, kcred, &vd->ldi_handle[slice], + vd->vds->ldi_ident)) != 0) { + PRN("ldi_open_by_dev() returned errno %d " + "for slice %u", status, slice); + } + } +} + #define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t)) static int vd_ioctl(vd_t *vd, vd_dring_payload_t *request) { - static ioctl_t ioctl[] = { + int i, status; + void *buf = NULL; + struct dk_geom dk_geom = {0}; + struct vtoc vtoc = {0}; + vd_ioctl_t ioctl[] = { /* Command (no-copy) operations */ - {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), DKIOCFLUSHWRITECACHE, - STRINGIZE(DKIOCFLUSHWRITECACHE), 0, 0}, + {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0, + DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE), + NULL, NULL, NULL}, /* "Get" (copy-out) operations */ - {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), DKIOCGETWCE, - STRINGIZE(DKIOCGETWCE), VD_COPYOUT, RNDSIZE(int)}, - {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), DKIOCGGEOM, - STRINGIZE(DKIOCGGEOM), VD_COPYOUT, RNDSIZE(struct dk_geom)}, - {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), DKIOCGVTOC, - STRINGIZE(DKIOCGVTOC), VD_COPYOUT, RNDSIZE(struct vtoc)}, + {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), + DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), + NULL, NULL, VD_IDENTITY}, + {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), + RNDSIZE(vd_geom_t), + DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), + &dk_geom, NULL, dk_geom2vd_geom}, + {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t), + DKIOCGVTOC, STRINGIZE(DKIOCGVTOC), + &vtoc, NULL, vtoc2vd_vtoc}, /* "Set" (copy-in) operations */ - {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), DKIOCSETWCE, - STRINGIZE(DKIOCSETWCE), VD_COPYOUT, RNDSIZE(int)}, - {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), DKIOCSGEOM, - STRINGIZE(DKIOCSGEOM), VD_COPYIN, RNDSIZE(struct dk_geom)}, - {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), DKIOCSVTOC, - STRINGIZE(DKIOCSVTOC), VD_COPYIN, RNDSIZE(struct vtoc)}, - - /* "Get/set" (copy-in/copy-out) operations */ - {VD_OP_SCSICMD, STRINGIZE(VD_OP_SCSICMD), USCSICMD, - STRINGIZE(USCSICMD), VD_COPYIN|VD_COPYOUT, - RNDSIZE(struct uscsi_cmd)} - + {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), + DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), + NULL, VD_IDENTITY, NULL}, + {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), + RNDSIZE(vd_geom_t), + DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), + &dk_geom, vd_geom2dk_geom, NULL}, + {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t), + DKIOCSVTOC, STRINGIZE(DKIOCSVTOC), + &vtoc, vd_vtoc2vtoc, NULL}, }; - int i, status; - void *buf = NULL; size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); @@ -403,15 +531,13 @@ */ for (i = 0; i < nioctls; i++) { if (request->operation == ioctl[i].operation) { - if (request->nbytes > ioctl[i].nbytes) { - PRN("%s: Expected <= %lu \"nbytes\", " - "got %lu", ioctl[i].operation_name, - ioctl[i].nbytes, request->nbytes); - return (EINVAL); - } else if ((request->nbytes % sizeof (uint64_t)) != 0) { - PRN("%s: nbytes = %lu not a multiple of %lu", - ioctl[i].operation_name, request->nbytes, - sizeof (uint64_t)); + /* LDC memory operations require 8-byte multiples */ + ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); + + if (request->nbytes != ioctl[i].nbytes) { + PRN("%s: Expected nbytes = %lu, got %lu", + ioctl[i].operation_name, ioctl[i].nbytes, + request->nbytes); return (EINVAL); } @@ -425,6 +551,9 @@ status = vd_do_ioctl(vd, request, buf, &ioctl[i]); if (request->nbytes) kmem_free(buf, request->nbytes); + if ((request->operation == VD_OP_SET_VTOC) && + (vd->vdisk_type == VD_DISK_TYPE_DISK)) + vd_open_new_slices(vd); return (status); } @@ -441,8 +570,7 @@ {VD_OP_GET_VTOC, vd_ioctl}, {VD_OP_SET_VTOC, vd_ioctl}, {VD_OP_GET_DISKGEOM, vd_ioctl}, - {VD_OP_SET_DISKGEOM, vd_ioctl}, - {VD_OP_SCSICMD, vd_ioctl} + {VD_OP_SET_DISKGEOM, vd_ioctl} }; static const size_t vds_noperations = @@ -505,19 +633,83 @@ } /* - * Return 1 if the "type", "subtype", and "env" fields of the "tag" first - * argument match the corresponding remaining arguments; otherwise, return 0 + * Return true if the "type", "subtype", and "env" fields of the "tag" first + * argument match the corresponding remaining arguments; otherwise, return false */ -int +boolean_t vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env) { return ((tag->vio_msgtype == type) && (tag->vio_subtype == subtype) && - (tag->vio_subtype_env == env)) ? 1 : 0; + (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE; } +/* + * Check whether the major/minor version specified in "ver_msg" is supported + * by this server. + */ +static boolean_t +vds_supported_version(vio_ver_msg_t *ver_msg) +{ + for (int i = 0; i < vds_num_versions; i++) { + ASSERT(vds_version[i].major > 0); + ASSERT((i == 0) || + (vds_version[i].major < vds_version[i-1].major)); + + /* + * If the major versions match, adjust the minor version, if + * necessary, down to the highest value supported by this + * server and return true so this message will get "ack"ed; + * the client should also support all minor versions lower + * than the value it sent + */ + if (ver_msg->ver_major == vds_version[i].major) { + if (ver_msg->ver_minor > vds_version[i].minor) { + PR0("Adjusting minor version from %u to %u", + ver_msg->ver_minor, vds_version[i].minor); + ver_msg->ver_minor = vds_version[i].minor; + } + return (B_TRUE); + } + + /* + * If the message contains a higher major version number, set + * the message's major/minor versions to the current values + * and return false, so this message will get "nack"ed with + * these values, and the client will potentially try again + * with the same or a lower version + */ + if (ver_msg->ver_major > vds_version[i].major) { + ver_msg->ver_major = vds_version[i].major; + ver_msg->ver_minor = vds_version[i].minor; + return (B_FALSE); + } + + /* + * Otherwise, the message's major version is less than the + * current major version, so continue the loop to the next + * (lower) supported version + */ + } + + /* + * No common version was found; "ground" the version pair in the + * message to terminate negotiation + */ + ver_msg->ver_major = 0; + ver_msg->ver_minor = 0; + return (B_FALSE); +} + +/* + * Process a version message from a client. vds expects to receive version + * messages from clients seeking service, but never issues version messages + * itself; therefore, vds can ACK or NACK client version messages, but does + * not expect to receive version-message ACKs or NACKs (and will treat such + * messages as invalid). + */ static int -process_ver_msg(vio_msg_t *msg, size_t msglen) +vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) { vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg; @@ -541,16 +733,46 @@ return (EBADMSG); } - if ((ver_msg->ver_major != VD_VER_MAJOR) || - (ver_msg->ver_minor != VD_VER_MINOR)) { - /* Unsupported version; send back supported version */ - ver_msg->ver_major = VD_VER_MAJOR; - ver_msg->ver_minor = VD_VER_MINOR; + /* + * We're talking to the expected kind of client; set our device class + * for "ack/nack" back to the client + */ + ver_msg->dev_class = VDEV_DISK_SERVER; + + /* + * Check whether the (valid) version message specifies a version + * supported by this server. If the version is not supported, return + * EBADMSG so the message will get "nack"ed; vds_supported_version() + * will have updated the message with a supported version for the + * client to consider + */ + if (!vds_supported_version(ver_msg)) return (EBADMSG); - } + + + /* + * A version has been agreed upon; use the client's SID for + * communication on this channel now + */ + ASSERT(!(vd->initialized & VD_SID)); + vd->sid = ver_msg->tag.vio_sid; + vd->initialized |= VD_SID; - /* Valid message, version accepted */ - ver_msg->dev_class = VDEV_DISK_SERVER; + /* + * When multiple versions are supported, this function should store + * the negotiated major and minor version values in the "vd" data + * structure to govern further communication; in particular, note that + * the client might have specified a lower minor version for the + * agreed major version than specifed in the vds_version[] array. The + * following assertions should help remind future maintainers to make + * the appropriate changes to support multiple versions. + */ + ASSERT(vds_num_versions == 1); + ASSERT(ver_msg->ver_major == vds_version[0].major); + ASSERT(ver_msg->ver_minor == vds_version[0].minor); + + PR0("Using major version %u, minor version %u", + ver_msg->ver_major, ver_msg->ver_minor); return (0); } @@ -598,7 +820,6 @@ * "max_xfer_sz" isn't an integral multiple of the page size. * Must first get the maximum transfer size in bytes. */ -#if 1 /* NEWOBP */ size_t max_xfer_bytes = attr_msg->vdisk_block_size ? attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : attr_msg->max_xfer_sz; @@ -607,13 +828,6 @@ ((max_xfer_bytes/PAGESIZE + ((max_xfer_bytes % PAGESIZE) ? 1 : 0))* (sizeof (ldc_mem_cookie_t))); -#else /* NEWOBP */ - size_t max_inband_msglen = - sizeof (vd_dring_inband_msg_t) + - ((attr_msg->max_xfer_sz/PAGESIZE - + (attr_msg->max_xfer_sz % PAGESIZE ? 1 : 0))* - (sizeof (ldc_mem_cookie_t))); -#endif /* NEWOBP */ /* * Set the maximum expected message length to @@ -710,7 +924,7 @@ if (dring_minfo.vaddr == NULL) { PRN("Descriptor ring virtual address is NULL"); - return (EBADMSG); /* FIXME appropriate status? */ + return (ENXIO); } @@ -753,7 +967,6 @@ return (EBADMSG); } - /* FIXME set ack in unreg_msg? */ return (0); } @@ -1005,13 +1218,8 @@ { int retry, status; size_t size = *nbytes; - boolean_t isempty = B_FALSE; - /* FIXME work around interrupt problem */ - if ((ldc_chkq(ldc_handle, &isempty) != 0) || isempty) - return (ENOMSG); - for (retry = 0, status = ETIMEDOUT; retry < vds_ldc_retries && status == ETIMEDOUT; retry++) { @@ -1058,14 +1266,9 @@ */ switch (vd->state) { case VD_STATE_INIT: /* expect version message */ - if ((status = process_ver_msg(msg, msglen)) != 0) + if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0) return (status); - /* The first version message sets the SID */ - ASSERT(!(vd->initialized & VD_SID)); - vd->sid = msg->tag.vio_sid; - vd->initialized |= VD_SID; - /* Version negotiated, move to that state */ vd->state = VD_STATE_VER; return (0); @@ -1226,28 +1429,66 @@ } static void -vd_process_queue(void *arg) +vd_recv_msg(void *arg) { - vd_t *vd = (vd_t *)arg; - size_t max_msglen, nbytes; - vio_msg_t *vio_msg; + vd_t *vd = (vd_t *)arg; + int status = 0; PR2("Entered"); ASSERT(vd != NULL); mutex_enter(&vd->lock); - max_msglen = vd->max_msglen; /* vd->maxmsglen can change */ - vio_msg = kmem_alloc(max_msglen, KM_SLEEP); - for (nbytes = vd->max_msglen; - vd->enabled && recv_msg(vd->ldc_handle, vio_msg, &nbytes) == 0; - nbytes = vd->max_msglen) - vd_process_msg(vd, vio_msg, nbytes); - kmem_free(vio_msg, max_msglen); + /* + * Receive and process any messages in the LDC queue; max_msglen is + * reset each time through the loop, as vd->max_msglen can increase + * during connection handshake + */ + for (size_t max_msglen = vd->max_msglen; + vd->enabled && status == 0; + max_msglen = vd->max_msglen) { + size_t msglen = max_msglen; + vio_msg_t *vio_msg = kmem_alloc(max_msglen, KM_SLEEP); + + if ((status = recv_msg(vd->ldc_handle, vio_msg, &msglen)) == 0) + vd_process_msg(vd, vio_msg, msglen); + else if (status != ENOMSG) + vd_reset_connection(vd, B_TRUE); + kmem_free(vio_msg, max_msglen); + } mutex_exit(&vd->lock); PR2("Returning"); } static uint_t +vd_do_handle_ldc_events(vd_t *vd, uint64_t event) +{ + ASSERT(mutex_owned(&vd->lock)); + + if (!vd->enabled) + return (LDC_SUCCESS); + + if (event & LDC_EVT_RESET) { + PR0("Channel was reset"); + return (LDC_SUCCESS); + } + + if (event & LDC_EVT_UP) { + /* Reset the connection state when channel comes (back) up */ + vd_reset_connection(vd, B_FALSE); + } + + if (event & LDC_EVT_READ) { + PR1("New data available"); + /* Queue a task to receive the new data */ + if (ddi_taskq_dispatch(vd->taskq, vd_recv_msg, vd, DDI_SLEEP) != + DDI_SUCCESS) + PRN("Unable to dispatch vd_recv_msg()"); + } + + return (LDC_SUCCESS); +} + +static uint_t vd_handle_ldc_events(uint64_t event, caddr_t arg) { uint_t status; @@ -1256,24 +1497,9 @@ ASSERT(vd != NULL); mutex_enter(&vd->lock); - if (event & LDC_EVT_READ) { - PR1("New packet(s) available"); - /* Queue a task to process the new data */ - if (ddi_taskq_dispatch(vd->taskq, vd_process_queue, vd, 0) != - DDI_SUCCESS) - PRN("Unable to dispatch vd_process_queue()"); - } else if (event & LDC_EVT_RESET) { - PR0("Attempting to bring up reset channel"); - if (((status = ldc_up(vd->ldc_handle)) != 0) && - (status != ECONNREFUSED)) { - PRN("ldc_up() returned errno %d", status); - } - } else if (event & LDC_EVT_UP) { - /* Reset the connection state when channel comes (back) up */ - vd_reset_connection(vd, B_FALSE); - } + status = vd_do_handle_ldc_events(vd, event); mutex_exit(&vd->lock); - return (LDC_SUCCESS); + return (status); } static uint_t @@ -1348,20 +1574,104 @@ } static int -vd_get_params(ldi_handle_t lh, char *block_device, vd_t *vd) +vd_setup_full_disk(vd_t *vd) +{ + int rval, status; + major_t major = getmajor(vd->dev[0]); + minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; + struct vtoc vtoc; + + + /* Get the VTOC for slice sizes */ + if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, (intptr_t)&vtoc, + FKIOCTL, kcred, &rval)) != 0) { + PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d", status); + return (status); + } + + /* Set full-disk parameters */ + vd->vdisk_type = VD_DISK_TYPE_DISK; + vd->nslices = (sizeof (vd->dev))/(sizeof (vd->dev[0])); + + /* Move dev number and LDI handle to entire-disk-slice array elements */ + vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; + vd->dev[0] = 0; + vd->ldi_handle[VD_ENTIRE_DISK_SLICE] = vd->ldi_handle[0]; + vd->ldi_handle[0] = NULL; + + /* Initialize device numbers for remaining slices and open them */ + for (int slice = 0; slice < vd->nslices; slice++) { + /* + * Skip the entire-disk slice, as it's already open and its + * device known + */ + if (slice == VD_ENTIRE_DISK_SLICE) + continue; + ASSERT(vd->dev[slice] == 0); + ASSERT(vd->ldi_handle[slice] == NULL); + + /* + * Construct the device number for the current slice + */ + vd->dev[slice] = makedevice(major, (minor + slice)); + + /* + * At least some underlying drivers refuse to open + * devices for (currently) zero-length slices, so skip + * them for now + */ + if (vtoc.v_part[slice].p_size == 0) { + PR0("Skipping zero-length slice %u", slice); + continue; + } + + /* + * Open all non-empty slices of the disk to serve them to the + * client. Slices are opened exclusively to prevent other + * threads or processes in the service domain from performing + * I/O to slices being accessed by a client. Failure to open + * a slice results in vds not serving this disk, as the client + * could attempt (and should be able) to access any non-empty + * slice immediately. Any slices successfully opened before a + * failure will get closed by vds_destroy_vd() as a result of + * the error returned by this function. + */ + PR0("Opening device major %u, minor %u = slice %u", + major, minor, slice); + if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, + vd_open_flags, kcred, &vd->ldi_handle[slice], + vd->vds->ldi_ident)) != 0) { + PRN("ldi_open_by_dev() returned errno %d " + "for slice %u", status, slice); + /* vds_destroy_vd() will close any open slices */ + return (status); + } + } + + return (0); +} + +static int +vd_setup_vd(char *block_device, vd_t *vd) { int otyp, rval, status; dev_info_t *dip; struct dk_cinfo dk_cinfo; + if ((status = ldi_open_by_name(block_device, vd_open_flags, kcred, + &vd->ldi_handle[0], vd->vds->ldi_ident)) != 0) { + PRN("ldi_open_by_name(%s) = errno %d", block_device, status); + return (status); + } + /* Get block device's device number, otyp, and size */ - if ((status = ldi_get_dev(lh, &vd->dev[0])) != 0) { + if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) { PRN("ldi_get_dev() returned errno %d for %s", status, block_device); return (status); } - if ((status = ldi_get_otyp(lh, &otyp)) != 0) { + if ((status = ldi_get_otyp(vd->ldi_handle[0], &otyp)) != 0) { PRN("ldi_get_otyp() returned errno %d for %s", status, block_device); return (status); @@ -1370,7 +1680,7 @@ PRN("Cannot serve non-block device %s", block_device); return (ENOTBLK); } - if (ldi_get_size(lh, &vd->vdisk_size) != DDI_SUCCESS) { + if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { PRN("ldi_get_size() failed for %s", block_device); return (EIO); } @@ -1390,8 +1700,8 @@ } /* Get dk_cinfo to determine slice of backing block device */ - if ((status = ldi_ioctl(lh, DKIOCINFO, (intptr_t)&dk_cinfo, - FKIOCTL, kcred, &rval)) != 0) { + if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO, + (intptr_t)&dk_cinfo, FKIOCTL, kcred, &rval)) != 0) { PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s", status, block_device); return (status); @@ -1403,27 +1713,20 @@ return (EIO); } - /* If block device slice is entire disk, fill in all slice devices */ - if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) { - uint_t slice; - major_t major = getmajor(vd->dev[0]); - minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; - vd->vdisk_type = VD_DISK_TYPE_DISK; - vd->nslices = V_NUMPAR; - for (slice = 0; slice < vd->nslices; slice++) - vd->dev[slice] = makedevice(major, (minor + slice)); - return (0); /* ...and we're done */ - } + /* If slice is entire-disk slice, initialize for full disk */ + if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) + return (vd_setup_full_disk(vd)); - /* Otherwise, we have a (partial) slice of a block device */ + + /* Otherwise, we have a non-entire slice of a block device */ vd->vdisk_type = VD_DISK_TYPE_SLICE; vd->nslices = 1; /* Initialize dk_geom structure for single-slice block device */ - if ((status = ldi_ioctl(lh, DKIOCGGEOM, (intptr_t)&vd->dk_geom, - FKIOCTL, kcred, &rval)) != 0) { + if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM, + (intptr_t)&vd->dk_geom, FKIOCTL, kcred, &rval)) != 0) { PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s", status, block_device); return (status); @@ -1443,8 +1746,8 @@ /* Initialize vtoc structure for single-slice block device */ - if ((status = ldi_ioctl(lh, DKIOCGVTOC, (intptr_t)&vd->vtoc, - FKIOCTL, kcred, &rval)) != 0) { + if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGVTOC, + (intptr_t)&vd->vtoc, FKIOCTL, kcred, &rval)) != 0) { PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d for %s", status, block_device); return (status); @@ -1469,11 +1772,9 @@ vd_t **vdp) { char tq_name[TASKQ_NAMELEN]; - int param_status, status; - uint_t slice; + int status; ddi_iblock_cookie_t iblock = NULL; ldc_attr_t ldc_attr; - ldi_handle_t lh = NULL; vd_t *vd; @@ -1490,19 +1791,9 @@ vd->vds = vds; - /* Get device parameters */ - if ((status = ldi_open_by_name(block_device, FREAD, kcred, &lh, - vds->ldi_ident)) != 0) { - PRN("ldi_open_by_name(%s) = errno %d", block_device, status); + /* Open vdisk and initialize parameters */ + if ((status = vd_setup_vd(block_device, vd)) != 0) return (status); - } - param_status = vd_get_params(lh, block_device, vd); - if ((status = ldi_close(lh, FREAD, kcred)) != 0) { - PRN("ldi_close(%s) = errno %d", block_device, status); - return (status); - } - if (param_status != 0) - return (param_status); ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR); PR0("vdisk_type = %s, pseudo = %s, nslices = %u", ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"), @@ -1520,24 +1811,6 @@ vd->initialized |= VD_LOCKING; - /* Open the backing-device slices */ - for (slice = 0; slice < vd->nslices; slice++) { - ASSERT(vd->ldi_handle[slice] == NULL); - PR0("Opening device %u, minor %u = slice %u", - getmajor(vd->dev[slice]), getminor(vd->dev[slice]), slice); - if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK, - vd_open_flags, kcred, &vd->ldi_handle[slice], - vds->ldi_ident)) != 0) { - PRN("ldi_open_by_dev() returned errno %d for slice %u", - status, slice); - /* vds_destroy_vd() will close any open slices */ -#if 0 /* FIXME */ - return (status); -#endif - } - } - - /* Create the task queue for the vdisk */ (void) snprintf(tq_name, sizeof (tq_name), "vd%lu", id); PR1("tq_name = %s", tq_name); @@ -1572,12 +1845,6 @@ return (status); } - if (((status = ldc_up(vd->ldc_handle)) != 0) && - (status != ECONNREFUSED)) { - PRN("ldc_up() returned errno %d", status); - return (status); - } - /* Add the successfully-initialized vdisk to the server's table */ if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) { @@ -1786,7 +2053,7 @@ return; } if (curr_ldc_id != prev_ldc_id) { - _NOTE(NOTREACHED); /* FIXME is there a better way? */ + _NOTE(NOTREACHED); /* lint is confused */ PRN("Not changing vdisk: " "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id); return; @@ -1922,6 +2189,13 @@ } vds->initialized |= VDS_MDEG; + /* Prevent auto-detaching so driver is available whenever MD changes */ + if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) != + DDI_PROP_SUCCESS) { + PRN("failed to set \"%s\" property for instance %u", + DDI_NO_AUTODETACH, instance); + } + ddi_report_dev(dip); return (DDI_SUCCESS); }
--- a/usr/src/uts/sun4v/sys/ldc_impl.h Mon May 22 04:05:00 2006 -0700 +++ b/usr/src/uts/sun4v/sys/ldc_impl.h Mon May 22 13:24:32 2006 -0700 @@ -88,12 +88,14 @@ #define TS_UP (TS_READY | TS_VER_DONE | TS_HSHAKE_DONE) /* LDC Channel Transport Handshake states */ -#define TS_SENT_RTS 0x01 /* Sent RTS */ -#define TS_RCVD_RTR 0x02 /* Received RTR */ -#define TS_SENT_RDX 0x04 /* Sent RDX */ -#define TS_RCVD_RTS 0x10 /* Received RTS */ -#define TS_SENT_RTR 0x20 /* Sent RTR */ -#define TS_RCVD_RDX 0x40 /* Received RDX */ +#define TS_SENT_VER 0x01 /* Sent version */ +#define TS_SENT_RTS 0x02 /* Sent RTS */ +#define TS_RCVD_RTR 0x04 /* Received RTR */ +#define TS_SENT_RDX 0x08 /* Sent RDX */ +#define TS_RCVD_VER 0x10 /* Received version */ +#define TS_RCVD_RTS 0x20 /* Received RTS */ +#define TS_SENT_RTR 0x40 /* Sent RTR */ +#define TS_RCVD_RDX 0x80 /* Received RDX */ /* LDC MSG Envelope */ #define LDC_LEN_MASK 0x3F @@ -104,13 +106,12 @@ #define LDC_FRAG_CONT 0x00 /* frag_info = 0x00 */ /* - * LDC fragmented xfer loop wait cnt - * When data is arriving in fragments, the read thread will - * look for a packet 'LDC_CHK_CNT' times. Between each check - * it will loop 'LDC_LOOP_CNT' times + * LDC will retry LDC_MAX_RETRIES times when sending or + * receiving data or if the HV returns back EWOULDBLOCK. + * Between each retry it will wait LDC_DELAY usecs. */ -#define LDC_CHK_CNT 1000 -#define LDC_LOOP_CNT 1000 +#define LDC_MAX_RETRIES 1000 +#define LDC_DELAY 1 /* * LDC Version information
--- a/usr/src/uts/sun4v/sys/vdc.h Mon May 22 04:05:00 2006 -0700 +++ b/usr/src/uts/sun4v/sys/vdc.h Mon May 22 13:24:32 2006 -0700 @@ -68,13 +68,6 @@ #define VDC_HANDSHAKE_STOP 0x2000 /* stop further handshakes */ /* - * Bit-field values to indicate status of local DRing entry - * - * The lowest 8 bits are reserved for the DRing state. - */ -#define VDC_ALLOC_HANDLE 0x10 - -/* * Definitions of strings to be used to create device node properties. * (vdc uses the capitalised versions of these properties as they are 64-bit) */ @@ -100,7 +93,7 @@ * variables controlling how long to wait before timing out and how many * retries to attempt before giving up when communicating with vds. */ -#define VDC_RETRIES 10 +#define VDC_RETRIES 3 #define VDC_USEC_TIMEOUT_MIN (30 * MICROSEC) /* 30 sec */ @@ -133,7 +126,7 @@ #define VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc) \ ASSERT(vdc != NULL); \ dmsg.tag.vio_sid = vdc->session_id; \ - dmsg.seq_num = ++(vdc->seq_num); + dmsg.seq_num = vdc->seq_num; /* * The states the message processing thread can be in. @@ -175,9 +168,11 @@ dev_info_t *dip; /* device info pointer */ int instance; /* driver instance number */ int initialized; /* keeps track of what's init'ed */ + int hshake_cnt; /* number of failed handshakes */ int open; /* count of outstanding opens */ int dkio_flush_pending; /* # outstanding DKIO flushes */ + vio_ver_t ver; /* version number agreed with server */ uint64_t session_id; /* common ID sent with all messages */ uint64_t seq_num; /* most recent sequence num generated */ uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */ @@ -187,6 +182,7 @@ uint64_t vdisk_size; /* device size in bytes */ uint64_t max_xfer_sz; /* maximum block size of a descriptor */ uint64_t block_size; /* device block size used */ + struct dk_label *label; /* structure to store disk label */ struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */ struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */ struct vtoc *vtoc; /* structure to store VTOC data */
--- a/usr/src/uts/sun4v/sys/vdsk_common.h Mon May 22 04:05:00 2006 -0700 +++ b/usr/src/uts/sun4v/sys/vdsk_common.h Mon May 22 13:24:32 2006 -0700 @@ -146,45 +146,131 @@ */ /* - * VTOC message - * - * vDisk Get Volume Table of Contents (VD_OP_GET_VTOC) - * + * vDisk geometry definition (VD_OP_GET_DISKGEOM and VD_OP_SET_DISKGEOM) + */ +typedef struct vd_geom { + uint16_t ncyl; /* number of data cylinders */ + uint16_t acyl; /* number of alternate cylinders */ + uint16_t bcyl; /* cyl offset for fixed head area */ + uint16_t nhead; /* number of heads */ + uint16_t nsect; /* number of data sectors per track */ + uint16_t intrlv; /* interleave factor */ + uint16_t apc; /* alternates per cyl (SCSI only) */ + uint16_t rpm; /* revolutions per minute */ + uint16_t pcyl; /* number of physical cylinders */ + uint16_t write_reinstruct; /* # sectors to skip, writes */ + uint16_t read_reinstruct; /* # sectors to skip, reads */ +} vd_geom_t; + + +/* + * vDisk partition definition */ typedef struct vd_partition { - uint16_t p_tag; /* ID tag of partition */ - uint16_t p_flag; /* permision flags */ + uint16_t id_tag; /* ID tag of partition */ + uint16_t perm; /* permission flags for partition */ uint32_t reserved; /* padding */ - int64_t p_start; /* start sector no of partition */ - int64_t p_size; /* # of blocks in partition */ + uint64_t start; /* block number of partition start */ + uint64_t nblocks; /* number of blocks in partition */ } vd_partition_t; +/* + * vDisk VTOC definition (VD_OP_GET_VTOC and VD_OP_SET_VTOC) + */ +#define VD_VOLNAME_LEN 8 /* length of volume_name field */ +#define VD_ASCIILABEL_LEN 128 /* length of ascii_label field */ typedef struct vd_vtoc { - uint8_t v_volume[LEN_DKL_VVOL]; /* volume name */ - uint16_t v_sectorsz; /* sector size in bytes */ - uint16_t v_nparts; /* num of partitions */ - uint32_t reserved; /* padding */ - uint8_t v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */ - + char volume_name[VD_VOLNAME_LEN]; /* volume name */ + uint16_t sector_size; /* sector size in bytes */ + uint16_t num_partitions; /* number of partitions */ + char ascii_label[VD_ASCIILABEL_LEN]; /* ASCII label */ + vd_partition_t partition[V_NUMPAR]; /* partition headers */ } vd_vtoc_t; /* - * vDisk Get Geometry (VD_OP_GET_GEOM) + * Copy the contents of a vd_geom_t to the contents of a dk_geom struct + */ +#define VD_GEOM2DK_GEOM(vd_geom, dk_geom) \ +{ \ + bzero((dk_geom), sizeof (*(dk_geom))); \ + (dk_geom)->dkg_ncyl = (vd_geom)->ncyl; \ + (dk_geom)->dkg_acyl = (vd_geom)->acyl; \ + (dk_geom)->dkg_bcyl = (vd_geom)->bcyl; \ + (dk_geom)->dkg_nhead = (vd_geom)->nhead; \ + (dk_geom)->dkg_nsect = (vd_geom)->nsect; \ + (dk_geom)->dkg_intrlv = (vd_geom)->intrlv; \ + (dk_geom)->dkg_apc = (vd_geom)->apc; \ + (dk_geom)->dkg_rpm = (vd_geom)->rpm; \ + (dk_geom)->dkg_pcyl = (vd_geom)->pcyl; \ + (dk_geom)->dkg_write_reinstruct = (vd_geom)->write_reinstruct; \ + (dk_geom)->dkg_read_reinstruct = (vd_geom)->read_reinstruct; \ +} + +/* + * Copy the contents of a vd_vtoc_t to the contents of a vtoc struct */ -typedef struct vd_geom { - uint16_t dkg_ncyl; /* # of data cylinders */ - uint16_t dkg_acyl; /* # of alternate cylinders */ - uint16_t dkg_bcyl; /* cyl offset (for fixed head area) */ - uint16_t dkg_nhead; /* # of heads */ - uint16_t dkg_nsect; /* # of data sectors per track */ - uint16_t dkg_intrlv; /* interleave factor */ - uint16_t dkg_apc; /* alternates per cyl (SCSI only) */ - uint16_t dkg_rpm; /* revolutions per minute */ - uint16_t dkg_pcyl; /* # of physical cylinders */ - uint16_t dkg_write_reinstruct; /* # sectors to skip, writes */ - uint16_t dkg_read_reinstruct; /* # sectors to skip, reads */ -} vd_geom_t; +#define VD_VTOC2VTOC(vd_vtoc, vtoc) \ +{ \ + bzero((vtoc), sizeof (*(vtoc))); \ + bcopy((vd_vtoc)->volume_name, (vtoc)->v_volume, \ + MIN(sizeof ((vd_vtoc)->volume_name), \ + sizeof ((vtoc)->v_volume))); \ + bcopy((vd_vtoc)->ascii_label, (vtoc)->v_asciilabel, \ + MIN(sizeof ((vd_vtoc)->ascii_label), \ + sizeof ((vtoc)->v_asciilabel))); \ + (vtoc)->v_sanity = VTOC_SANE; \ + (vtoc)->v_version = V_VERSION; \ + (vtoc)->v_sectorsz = (vd_vtoc)->sector_size; \ + (vtoc)->v_nparts = (vd_vtoc)->num_partitions; \ + for (int i = 0; i < (vd_vtoc)->num_partitions; i++) { \ + (vtoc)->v_part[i].p_tag = (vd_vtoc)->partition[i].id_tag; \ + (vtoc)->v_part[i].p_flag = (vd_vtoc)->partition[i].perm; \ + (vtoc)->v_part[i].p_start = (vd_vtoc)->partition[i].start; \ + (vtoc)->v_part[i].p_size = (vd_vtoc)->partition[i].nblocks; \ + } \ +} + +/* + * Copy the contents of a dk_geom struct to the contents of a vd_geom_t + */ +#define DK_GEOM2VD_GEOM(dk_geom, vd_geom) \ +{ \ + bzero((vd_geom), sizeof (*(vd_geom))); \ + (vd_geom)->ncyl = (dk_geom)->dkg_ncyl; \ + (vd_geom)->acyl = (dk_geom)->dkg_acyl; \ + (vd_geom)->bcyl = (dk_geom)->dkg_bcyl; \ + (vd_geom)->nhead = (dk_geom)->dkg_nhead; \ + (vd_geom)->nsect = (dk_geom)->dkg_nsect; \ + (vd_geom)->intrlv = (dk_geom)->dkg_intrlv; \ + (vd_geom)->apc = (dk_geom)->dkg_apc; \ + (vd_geom)->rpm = (dk_geom)->dkg_rpm; \ + (vd_geom)->pcyl = (dk_geom)->dkg_pcyl; \ + (vd_geom)->write_reinstruct = (dk_geom)->dkg_write_reinstruct; \ + (vd_geom)->read_reinstruct = (dk_geom)->dkg_read_reinstruct; \ +} + +/* + * Copy the contents of a vtoc struct to the contents of a vd_vtoc_t + */ +#define VTOC2VD_VTOC(vtoc, vd_vtoc) \ +{ \ + bzero((vd_vtoc), sizeof (*(vd_vtoc))); \ + bcopy((vtoc)->v_volume, (vd_vtoc)->volume_name, \ + MIN(sizeof ((vtoc)->v_volume), \ + sizeof ((vd_vtoc)->volume_name))); \ + bcopy((vtoc)->v_asciilabel, (vd_vtoc)->ascii_label, \ + MIN(sizeof ((vtoc)->v_asciilabel), \ + sizeof ((vd_vtoc)->ascii_label))); \ + (vd_vtoc)->sector_size = (vtoc)->v_sectorsz; \ + (vd_vtoc)->num_partitions = (vtoc)->v_nparts; \ + for (int i = 0; i < (vtoc)->v_nparts; i++) { \ + (vd_vtoc)->partition[i].id_tag = (vtoc)->v_part[i].p_tag; \ + (vd_vtoc)->partition[i].perm = (vtoc)->v_part[i].p_flag; \ + (vd_vtoc)->partition[i].start = (vtoc)->v_part[i].p_start; \ + (vd_vtoc)->partition[i].nblocks = (vtoc)->v_part[i].p_size; \ + } \ +} #ifdef __cplusplus
--- a/usr/src/uts/sun4v/sys/vio_mailbox.h Mon May 22 04:05:00 2006 -0700 +++ b/usr/src/uts/sun4v/sys/vio_mailbox.h Mon May 22 13:24:32 2006 -0700 @@ -146,6 +146,13 @@ * tag.submsgtype = VIO_SUBTYPE_{INFO|ACK|NACK} * tag.subtype_env == VIO_VER_INFO */ + +/* Structure to store a version tuple */ +typedef struct vio_ver { + uint16_t major; /* major version number */ + uint16_t minor; /* minor version number */ +} vio_ver_t; + typedef struct vio_ver_msg { /* Common tag */ vio_msg_tag_t tag; @@ -162,6 +169,7 @@ uint64_t resv3[VIO_PAYLOAD_ELEMS - 1]; } vio_ver_msg_t; + /* * VIO Descriptor Ring Register message. *