changeset 18755:416b7ca89d27

8628 nvme: use a semaphore to guard submission queue Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Jason King <jason.king@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Garrett D'Amore <garrett@damore.org> Approved by: Richard Lowe <richlowe@richlowe.net>
author Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
date Mon, 13 Feb 2017 14:44:28 +0100
parents 65b9ca6ebd01
children 5b81dc110fdd
files usr/src/uts/common/io/nvme/nvme.c usr/src/uts/common/io/nvme/nvme_var.h
diffstat 2 files changed, 46 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/nvme/nvme.c	Tue Apr 04 21:43:02 2017 -0400
+++ b/usr/src/uts/common/io/nvme/nvme.c	Mon Feb 13 14:44:28 2017 +0100
@@ -44,7 +44,7 @@
  *
  * Command Processing:
  *
- * NVMe devices can have up to 65536 I/O queue pairs, with each queue holding up
+ * NVMe devices can have up to 65535 I/O queue pairs, with each queue holding up
  * to 65536 I/O commands. The driver will configure one I/O queue pair per
  * available interrupt vector, with the queue length usually much smaller than
  * the maximum of 65536. If the hardware doesn't provide enough queues, fewer
@@ -69,7 +69,8 @@
  * wraps around in that time a submission may find the next array slot to still
  * be used by a long-running command. In this case the array is sequentially
  * searched for the next free slot. The length of the command array is the same
- * as the configured queue length.
+ * as the configured queue length. Queue overrun is prevented by the semaphore,
+ * so a command submission may block if the queue is full.
  *
  *
  * Polled I/O Support:
@@ -257,7 +258,9 @@
 static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
     bd_xfer_t *);
 static int nvme_admin_cmd(nvme_cmd_t *, int);
-static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *);
+static void nvme_submit_admin_cmd(nvme_qpair_t *, nvme_cmd_t *);
+static int nvme_submit_io_cmd(nvme_qpair_t *, nvme_cmd_t *);
+static void nvme_submit_cmd_common(nvme_qpair_t *, nvme_cmd_t *);
 static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
 static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
 static void nvme_wakeup_cmd(void *);
@@ -271,7 +274,7 @@
 static inline int nvme_check_cmd_status(nvme_cmd_t *);
 
 static void nvme_abort_cmd(nvme_cmd_t *);
-static int nvme_async_event(nvme_t *);
+static void nvme_async_event(nvme_t *);
 static int nvme_format_nvm(nvme_t *, uint32_t, uint8_t, boolean_t, uint8_t,
     boolean_t, uint8_t);
 static int nvme_get_logpage(nvme_t *, void **, size_t *, uint8_t, ...);
@@ -721,6 +724,7 @@
 	int i;
 
 	mutex_destroy(&qp->nq_mutex);
+	sema_destroy(&qp->nq_sema);
 
 	if (qp->nq_sqdma != NULL)
 		nvme_free_dma(qp->nq_sqdma);
@@ -746,6 +750,7 @@
 
 	mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER,
 	    DDI_INTR_PRI(nvme->n_intr_pri));
+	sema_init(&qp->nq_sema, nentry, NULL, SEMA_DRIVER, NULL);
 
 	if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t),
 	    DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS)
@@ -812,18 +817,29 @@
 	kmem_cache_free(nvme_cmd_cache, cmd);
 }
 
+static void
+nvme_submit_admin_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
+{
+	sema_p(&qp->nq_sema);
+	nvme_submit_cmd_common(qp, cmd);
+}
+
 static int
-nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
+nvme_submit_io_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd)
+{
+	if (sema_tryp(&qp->nq_sema) == 0)
+		return (EAGAIN);
+
+	nvme_submit_cmd_common(qp, cmd);
+	return (0);
+}
+
+static void
+nvme_submit_cmd_common(nvme_qpair_t *qp, nvme_cmd_t *cmd)
 {
 	nvme_reg_sqtdbl_t tail = { 0 };
 
 	mutex_enter(&qp->nq_mutex);
-
-	if (qp->nq_active_cmds == qp->nq_nentry) {
-		mutex_exit(&qp->nq_mutex);
-		return (DDI_FAILURE);
-	}
-
 	cmd->nc_completed = B_FALSE;
 
 	/*
@@ -849,7 +865,6 @@
 	nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
 
 	mutex_exit(&qp->nq_mutex);
-	return (DDI_SUCCESS);
 }
 
 static nvme_cmd_t *
@@ -895,6 +910,7 @@
 
 	nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r);
 	mutex_exit(&qp->nq_mutex);
+	sema_v(&qp->nq_sema);
 
 	return (cmd);
 }
@@ -1363,7 +1379,6 @@
 	nvme_health_log_t *health_log = NULL;
 	size_t logsize = 0;
 	nvme_async_event_t event;
-	int ret;
 
 	/*
 	 * Check for errors associated with the async request itself. The only
@@ -1397,14 +1412,7 @@
 
 	/* Clear CQE and re-submit the async request. */
 	bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t));
-	ret = nvme_submit_cmd(nvme->n_adminq, cmd);
-
-	if (ret != DDI_SUCCESS) {
-		dev_err(nvme->n_dip, CE_WARN,
-		    "!failed to resubmit async event request");
-		atomic_inc_32(&nvme->n_async_resubmit_failed);
-		nvme_free_cmd(cmd);
-	}
+	nvme_submit_admin_cmd(nvme->n_adminq, cmd);
 
 	switch (event.b.ae_type) {
 	case NVME_ASYNC_TYPE_ERROR:
@@ -1517,19 +1525,8 @@
 static int
 nvme_admin_cmd(nvme_cmd_t *cmd, int sec)
 {
-	int ret;
-
 	mutex_enter(&cmd->nc_mutex);
-	ret = nvme_submit_cmd(cmd->nc_nvme->n_adminq, cmd);
-
-	if (ret != DDI_SUCCESS) {
-		mutex_exit(&cmd->nc_mutex);
-		dev_err(cmd->nc_nvme->n_dip, CE_WARN,
-		    "!nvme_submit_cmd failed");
-		atomic_inc_32(&cmd->nc_nvme->n_admin_queue_full);
-		nvme_free_cmd(cmd);
-		return (DDI_FAILURE);
-	}
+	nvme_submit_admin_cmd(cmd->nc_nvme->n_adminq, cmd);
 
 	if (nvme_wait_cmd(cmd, sec) == B_FALSE) {
 		/*
@@ -1543,26 +1540,16 @@
 	return (DDI_SUCCESS);
 }
 
-static int
+static void
 nvme_async_event(nvme_t *nvme)
 {
 	nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP);
-	int ret;
 
 	cmd->nc_sqid = 0;
 	cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT;
 	cmd->nc_callback = nvme_async_event_task;
 
-	ret = nvme_submit_cmd(nvme->n_adminq, cmd);
-
-	if (ret != DDI_SUCCESS) {
-		dev_err(nvme->n_dip, CE_WARN,
-		    "!nvme_submit_cmd failed for ASYNCHRONOUS EVENT");
-		nvme_free_cmd(cmd);
-		return (DDI_FAILURE);
-	}
-
-	return (DDI_SUCCESS);
+	nvme_submit_admin_cmd(nvme->n_adminq, cmd);
 }
 
 static int
@@ -2379,11 +2366,7 @@
 	/*
 	 * Post an asynchronous event command to catch errors.
 	 */
-	if (nvme_async_event(nvme) != DDI_SUCCESS) {
-		dev_err(nvme->n_dip, CE_WARN,
-		    "!failed to post async event");
-		goto fail;
-	}
+	nvme_async_event(nvme);
 
 	/*
 	 * Identify Controller
@@ -2608,13 +2591,8 @@
 	 * Post more asynchronous events commands to reduce event reporting
 	 * latency as suggested by the spec.
 	 */
-	for (i = 1; i != nvme->n_async_event_limit; i++) {
-		if (nvme_async_event(nvme) != DDI_SUCCESS) {
-			dev_err(nvme->n_dip, CE_WARN,
-			    "!failed to post async event %d", i);
-			goto fail;
-		}
-	}
+	for (i = 1; i != nvme->n_async_event_limit; i++)
+		nvme_async_event(nvme);
 
 	return (DDI_SUCCESS);
 
@@ -3278,9 +3256,10 @@
 nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc)
 {
 	nvme_t *nvme = ns->ns_nvme;
-	nvme_cmd_t *cmd, *ret;
+	nvme_cmd_t *cmd;
 	nvme_qpair_t *ioq;
 	boolean_t poll;
+	int ret;
 
 	if (nvme->n_dead)
 		return (EIO);
@@ -3300,16 +3279,18 @@
 	 */
 	poll = (xfer->x_flags & BD_XFER_POLL) != 0;
 
-	if (nvme_submit_cmd(ioq, cmd) != DDI_SUCCESS)
-		return (EAGAIN);
+	ret = nvme_submit_io_cmd(ioq, cmd);
+
+	if (ret != 0)
+		return (ret);
 
 	if (!poll)
 		return (0);
 
 	do {
-		ret = nvme_retrieve_cmd(nvme, ioq);
-		if (ret != NULL)
-			nvme_bd_xfer_done(ret);
+		cmd = nvme_retrieve_cmd(nvme, ioq);
+		if (cmd != NULL)
+			nvme_bd_xfer_done(cmd);
 		else
 			drv_usecwait(10);
 	} while (ioq->nq_active_cmds != 0);
--- a/usr/src/uts/common/io/nvme/nvme_var.h	Tue Apr 04 21:43:02 2017 -0400
+++ b/usr/src/uts/common/io/nvme/nvme_var.h	Mon Feb 13 14:44:28 2017 +0100
@@ -12,6 +12,7 @@
 /*
  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  * Copyright 2016 The MathWorks, Inc. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
  */
 
 #ifndef _NVME_VAR_H
@@ -109,6 +110,7 @@
 	int nq_phase;
 
 	kmutex_t nq_mutex;
+	ksema_t nq_sema;
 };
 
 struct nvme {
@@ -186,11 +188,9 @@
 	uint32_t n_abort_failed;
 	uint32_t n_cmd_timeout;
 	uint32_t n_cmd_aborted;
-	uint32_t n_async_resubmit_failed;
 	uint32_t n_wrong_logpage;
 	uint32_t n_unknown_logpage;
 	uint32_t n_too_many_cookies;
-	uint32_t n_admin_queue_full;
 
 	/* errors detected by hardware */
 	uint32_t n_data_xfr_err;