view usr/src/uts/sun4v/sys/ldc_impl.h @ 10971:2663784ac9bb

backout 6726533: causes 6898072
author jmcp <James.McPherson@Sun.COM>
date Thu, 05 Nov 2009 15:54:24 -0800
parents a4cc5f506eee
children 496fd9979cfc
line wrap: on
line source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef _LDC_IMPL_H
#define	_LDC_IMPL_H

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#ifdef __cplusplus
extern "C" {
#endif

#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ioctl.h>

/* Memory map table entries */
#define	LDC_MTBL_ENTRIES	8192	/* 8 K */

/* Define LDC Queue info */
#define	LDC_PACKET_SHIFT	6
#define	LDC_QUEUE_ENTRIES	512
#define	LDC_MTU_MSGS		4
#define	LDC_QUEUE_SIZE		(LDC_QUEUE_ENTRIES << LDC_PACKET_SHIFT)
#define	LDC_DEFAULT_MTU		(LDC_QUEUE_SIZE / LDC_MTU_MSGS)
#define	LDC_RXDQ_MULTIPLIER	2

/*
 * LDC Reliable mode - initial packet seqid
 * - If peer initiated handshake, RDX should contain init_seqid + 1
 * - If this endpoint initiated handshake first data packet should
 *   contain the message init_seqid + 1
 */
#define	LDC_INIT_SEQID	0x0

/* LDC Message types */
#define	LDC_CTRL	0x01	/* Control Pkt */
#define	LDC_DATA	0x02	/* Data Pkt */
#define	LDC_ERR		0x10	/* Error Pkt */

/* LDC Message Subtypes */
#define	LDC_INFO	0x01	/* Control/Data/Error info pkt */
#define	LDC_ACK		0x02	/* Control/Data ACK */
#define	LDC_NACK	0x04	/* Control/Data NACK */

/* LDC Control Messages */
#define	LDC_VER		0x01	/* Version message */
#define	LDC_RTS		0x02	/* Request to Send */
#define	LDC_RTR		0x03	/* Ready To Receive */
#define	LDC_RDX		0x04	/* Ready for data exchange */

#define	LDC_CTRL_MASK	0x0f	/* Mask to read control bits */

/* LDC Channel Transport State (tstate) */
#define	TS_TXQ_RDY	0x01	/* allocated TX queue */
#define	TS_RXQ_RDY	0x02	/* allocated RX queue */
#define	TS_INIT		(TS_TXQ_RDY | TS_RXQ_RDY)
#define	TS_QCONF_RDY	0x04	/* registered queues with HV */
#define	TS_CNEX_RDY	0x08	/* registered channel with cnex */
#define	TS_OPEN		(TS_INIT | TS_QCONF_RDY | TS_CNEX_RDY)
#define	TS_LINK_READY	0x10	/* both endpts registered Rx queues */
#define	TS_READY	(TS_OPEN | TS_LINK_READY)
#define	TS_VER_DONE	0x20	/* negotiated version */
#define	TS_VREADY	(TS_READY | TS_VER_DONE)
#define	TS_HSHAKE_DONE	0x40	/* completed handshake */
#define	TS_UP		(TS_READY | TS_VER_DONE | TS_HSHAKE_DONE)

#define	TS_IN_RESET	0x100	/* channel is in reset state */

/*  LDC Channel Transport Handshake states */
#define	TS_SENT_VER	0x01	/* Sent version */
#define	TS_SENT_RTS	0x02	/* Sent RTS */
#define	TS_RCVD_RTR	0x04	/* Received RTR */
#define	TS_SENT_RDX	0x08	/* Sent RDX */
#define	TS_RCVD_VER	0x10	/* Received version */
#define	TS_RCVD_RTS	0x20	/* Received RTS */
#define	TS_SENT_RTR	0x40	/* Sent RTR */
#define	TS_RCVD_RDX	0x80	/* Received RDX */

/* LDC Interrupt State */
#define	LDC_INTR_NONE	0x00	/* No interrupts */
#define	LDC_INTR_ACTIVE	0x01	/* Interrupt being processed */
#define	LDC_INTR_PEND	0x02	/* Interrupt pending */

/* LDC MSG Envelope */
#define	LDC_LEN_MASK	0x3F
#define	LDC_FRAG_MASK	0xC0

#define	LDC_FRAG_START	0x40	/* frag_info = 0x01 */
#define	LDC_FRAG_STOP	0x80	/* frag_info = 0x02 */
#define	LDC_FRAG_CONT	0x00	/* frag_info = 0x00 */

/*
 * LDC will retry LDC_MAX_RETRIES times when sending or
 * receiving data or if the HV returns back EWOULDBLOCK.
 * Between each retry it will wait LDC_DELAY usecs.
 */
#define	LDC_MAX_RETRIES	1000
#define	LDC_DELAY	1

/* delay(usec) between channel unregister retries in ldc_close() */
#define	LDC_CLOSE_DELAY	1

/*
 * LDC Version information
 */
#define	LDC_PAYLOAD_VER_OFF	8	/* offset of version in payload */

typedef struct ldc_ver {
	uint16_t	major;
	uint16_t	minor;
} ldc_ver_t;

/*
 * Each guest consists of one or more LDC endpoints represented by a ldc_chan
 * structure. Each ldc_chan structure points to a ldc_mtbl structure that
 * contains information about the map table associated with this LDC endpoint.
 * The map table contains the list of pages being shared by this guest over
 * this endpoint with the guest at the other end of this endpoint. Each LDC
 * endpoint also points to a list of memory handles used to bind and export
 * memory segments from this guest. If a memory segment is bound, it points to
 * a memory segment structure, which inturn consists of an array of ldc_page
 * structure for all the pages within that segment. Each ldc_page structure
 * contains information about the shared page and also points to the
 * corresponding entry in the map table.
 *
 * Each LDC endpoint also points to a list of ldc_dring structures that refer
 * to both imported and exported descriptor rings. If it is a exported
 * descriptor ring, it then points to memory handle/memseg corresponding to
 * the region of memory associated with the descriptor ring.
 *
 *     +----------+   +----------+   +----------+
 *     | ldc_chan |-->| ldc_chan |-->| ldc_chan |-->....
 *     +----------+   +----------+   +----------+
 *       |  |  |
 *       |  |  |
 *       |  |  |      +-----------+     +-----------+
 *       |  |  +----->| ldc_dring |---->| ldc_dring |---->......
 *       |  |         +-----------+     +-----------+
 *       |  |               |
 *       |  |               +----------------------------+
 *       |  |                                            |
 *       |  |                                            v
 *       |  |      +----------+     +----------+     +----------+
 *       |  +----->| ldc_mhdl |---->| ldc_mhdl |---->| ldc_mhdl |---> ....
 *       |         +----------+     +----------+     +----------+
 *       v                 |                             |
 *  +----------+           |    +------------+           |    +------------+
 *  | ldc_mtbl |--+        +--->| ldc_memseg |-----+     +--->| ldc_memseg |
 *  +----------+  |             +------------+     |          +------------+
 *                |                   |            |            |       |
 *                v                   v            v            |       v
 *     +--------------+         +----------+  +--------+        |   +--------+
 *     | ldc_mte_slot |<--------| ldc_page |  | cookie |        |   | cookie |
 *     +--------------+         +----------+  +--------+        |   +--------+
 *     | ldc_mte_slot |<--------| ldc_page |  | cookie |        v
 *     +--------------+         +----------+  +--------+   +----------+
 *     | ldc_mte_slot |<-----------------------------------| ldc_page |
 *     +--------------+                                    +----------+
 *     | ldc_mte_slot |
 *     +--------------+
 *     |    ......    |/ +------------+
 *     +--------------+  |   entry    |
 *     | ldc_mte_slot |  +------------+
 *     +--------------+  | inv_cookie |
 *                     \ +------------+
 *
 */

/*
 * Message format of each packet sent over the LDC channel.
 * Each packet is 64-bytes long.
 *
 * Each packet that is sent over LDC can contain either data or acks.
 * The type will reflect the contents. The len will contain in bytes
 * the amount of data being sent. In the case of ACKs, the seqid and
 * data fields will contain the SEQIDs of messages for which ACKs are
 * being sent.
 *
 * Raw pkt format:
 *
 *          +------------------------------------------------------+
 *  0 - 7   |                 data payload                         |
 *          +------------------------------------------------------+
 *
 * Unreliable pkt format:
 *
 *          +------------------------------------------------------+
 *      0   |          seqid          | env  | ctrl | stype | type |
 *          +------------------------------------------------------+
 *  1 - 7   |                 data payload                         |
 *          +------------------------------------------------------+
 *
 * Reliable pkt format:
 *
 *          +------------------------------------------------------+
 *      0   |            seqid        | env  | ctrl | stype | type |
 *          +------------------------------------------------------+
 *      1   |          ackid          |         unused             |
 *          +------------------------------------------------------+
 *  2 - 7   |                 data payload                         |
 *          +------------------------------------------------------+
 */

typedef struct ldc_msg {
	union {
		struct {
			uint8_t		_type;	/* Message type */
			uint8_t		_stype;	/* Message subtype */
			uint8_t		_ctrl;	/* Control/Error Message */
			uint8_t 	_env;	/* Message Envelope */
			uint32_t	_seqid;	/* Sequence ID */

			union {
				uint8_t	_ud[LDC_PAYLOAD_SIZE_UNRELIABLE];
						/* Unreliable data payload */
				struct {
					uint32_t _unused;	/* unused */
					uint32_t _ackid;	/* ACK ID */
					uint8_t	_rd[LDC_PAYLOAD_SIZE_RELIABLE];
						/* Reliable data payload */
				} _rl;
			} _data;
		} _tpkt;

		uint8_t		_raw[LDC_PAYLOAD_SIZE_RAW];
	} _pkt;

} ldc_msg_t;

#define	raw		_pkt._raw
#define	type		_pkt._tpkt._type
#define	stype		_pkt._tpkt._stype
#define	ctrl		_pkt._tpkt._ctrl
#define	env		_pkt._tpkt._env
#define	seqid		_pkt._tpkt._seqid
#define	udata		_pkt._tpkt._data._ud
#define	ackid		_pkt._tpkt._data._rl._ackid
#define	rdata		_pkt._tpkt._data._rl._rd

/*
 * LDC Map Table Entry (MTE)
 *
 *   6    6                               1    1  1
 *  |3    0|                       psz|   3|   1| 0| 9| 8| 7|6|5|4|      0|
 *  +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+
 *  | rsvd |           PFN            | 0  | 0  |CW|CR|IW|IR|X|W|R| pgszc |
 *  +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+
 *  |                       hv invalidation cookie                        |
 *  +---------------------------------------------------------------------+
 */
typedef union {
	struct {
		uint64_t	_rsvd2:8,	/* <63:56> reserved */
				rpfn:43,	/* <55:13> real pfn */
				_rsvd1:2,	/* <12:11> reserved */
				cw:1,		/* <10> copy write access */
				cr:1,		/* <9> copy read perm */
				iw:1,		/* <8> iommu write perm */
				ir:1,		/* <7> iommu read perm */
				x:1,		/* <6> execute perm */
				w:1,		/* <5> write perm */
				r:1,		/* <4> read perm */
				pgszc:4;	/* <3:0> pgsz code */
	} mte_bit;

	uint64_t 		ll;

} ldc_mte_t;

#define	mte_rpfn	mte_bit.rpfn
#define	mte_cw		mte_bit.cw
#define	mte_cr		mte_bit.cr
#define	mte_iw		mte_bit.iw
#define	mte_ir		mte_bit.ir
#define	mte_x		mte_bit.x
#define	mte_w		mte_bit.w
#define	mte_r		mte_bit.r
#define	mte_pgszc	mte_bit.pgszc

#define	MTE_BSZS_SHIFT(sz)	((sz) * 3)
#define	MTEBYTES(sz)    	(MMU_PAGESIZE << MTE_BSZS_SHIFT(sz))
#define	MTEPAGES(sz)    	(1 << MTE_BSZS_SHIFT(sz))
#define	MTE_PAGE_SHIFT(sz)	(MMU_PAGESHIFT + MTE_BSZS_SHIFT(sz))
#define	MTE_PAGE_OFFSET(sz)	(MTEBYTES(sz) - 1)
#define	MTE_PAGEMASK(sz)	(~MTE_PAGE_OFFSET(sz))
#define	MTE_PFNMASK(sz)		(~(MTE_PAGE_OFFSET(sz) >> MMU_PAGESHIFT))

/*
 * LDC Map Table Slot
 */
typedef struct ldc_mte_slot {
	ldc_mte_t	entry;
	uint64_t	cookie;
} ldc_mte_slot_t;

/*
 * LDC Memory Map Table
 *
 * Each LDC has a memory map table it uses to list all the pages
 * it exporting to its peer over the channel. This structure
 * contains information about the map table and is pointed to
 * by the ldc_chan structure.
 */
typedef struct ldc_mtbl {
	kmutex_t		lock;		/* Table lock */
	size_t			size;		/* Table size (in bytes) */
	uint64_t		next_entry;	/* Next entry to use */
	uint64_t		num_entries;	/* Num entries in table */
	uint64_t		num_avail;	/* Num of available entries */
	boolean_t		contigmem;	/* TRUE=Contig mem alloc'd */
	ldc_mte_slot_t		*table;		/* The table itself */
} ldc_mtbl_t;

/*
 * LDC page and memory segment information
 */
typedef struct ldc_page {
	uintptr_t		raddr;		/* Exported page RA */
	uint64_t		index;		/* Index in map table */
	ldc_mte_slot_t		*mte;		/* Map table entry */
} ldc_page_t;

typedef struct ldc_memseg {
	caddr_t			vaddr;		/* Exported segment VA */
	uintptr_t		raddr;		/* Exported segment VA */
	size_t			size;		/* Exported segment size */
	uint64_t		npages;		/* Number of pages */
	ldc_page_t		*pages;		/* Array of exported pages */
	uint32_t		ncookies;	/* Number of cookies */
	ldc_mem_cookie_t	*cookies;
	uint64_t		next_cookie;	/* Index to next cookie */
} ldc_memseg_t;

/*
 * LDC Cookie address format
 *
 *   6       6          m+n
 *  |3|      0|          |                  m|                  0|
 *  +-+-------+----------+-------------------+-------------------+
 *  |X| pgszc |   rsvd   |      table_idx    |     page_offset   |
 *  +-+-------+----------+-------------------+-------------------+
 */
#define	LDC_COOKIE_PGSZC_MASK	0x7
#define	LDC_COOKIE_PGSZC_SHIFT	60

/*
 * LDC Memory handle
 */
typedef struct ldc_chan ldc_chan_t;

typedef struct ldc_mhdl {
	kmutex_t		lock;		/* Mutex for memory handle */
	ldc_mstatus_t		status;		/* Memory map status */

	uint8_t			mtype;		/* Type of sharing */
	uint8_t			perm;		/* Access permissions */
	boolean_t		myshadow;	/* TRUE=alloc'd shadow mem */

	ldc_chan_t		*ldcp;		/* Pointer to channel struct */
	ldc_memseg_t		*memseg;	/* Bound memory segment */
	struct ldc_mhdl		*next;		/* Next memory handle */
} ldc_mhdl_t;

/*
 * LDC Descriptor rings
 */

typedef struct ldc_dring {
	kmutex_t		lock;		/* Desc ring lock */
	ldc_mstatus_t		status;		/* Desc ring status */

	uint32_t		dsize;		/* Descriptor size */
	uint32_t		length;		/* Descriptor ring length */
	uint64_t		size;		/* Desc ring size (in bytes) */
	caddr_t			base;		/* Descriptor ring base addr */

	ldc_chan_t		*ldcp;		/* Pointer to bound channel */
	ldc_mem_handle_t	mhdl;		/* Mem handle to desc ring */

	struct ldc_dring	*ch_next;	/* Next dring in channel */
	struct ldc_dring 	*next;		/* Next dring overall */

} ldc_dring_t;


/*
 * Channel specific information is kept in a separate
 * structure. These are then stored on a array indexed
 * by the channel number.
 */
struct ldc_chan {
	ldc_chan_t	*next;		/* Next channel */

	kmutex_t	lock;		/* Channel lock */
	uint64_t	id;		/* Channel ID */
	ldc_status_t	status;		/* Channel status */
	uint32_t	tstate;		/* Channel transport state */
	uint32_t	hstate;		/* Channel transport handshake state */

	ldc_dev_t	devclass;	/* Associated device class */
	uint64_t	devinst;	/* Associated device instance */
	ldc_mode_t	mode;		/* Channel mode */

	uint64_t	mtu;		/* Max TU size */

	ldc_ver_t	version;	/* Channel version */
	uint32_t	next_vidx;	/* Next version to match */

	uint_t		(*cb)(uint64_t event, caddr_t arg);
	caddr_t		cb_arg;		/* Channel callback and arg */
	boolean_t	cb_inprogress;	/* Channel callback in progress */
	boolean_t	cb_enabled;	/* Channel callbacks are enabled */

	uint8_t		tx_intr_state;	/* Tx interrupt state */
	uint8_t		rx_intr_state;	/* Rx interrupt state */

	kmutex_t	tx_lock;	/* Transmit lock */
	uint64_t	tx_q_entries;	/* Num entries in transmit queue */
	uint64_t	tx_q_va;	/* Virtual addr of transmit queue */
	uint64_t	tx_q_ra;	/* Real addr of transmit queue */
	uint64_t	tx_head;	/* Tx queue head */
	uint64_t	tx_ackd_head;	/* Tx queue ACKd head (Reliable) */
	uint64_t	tx_tail;	/* Tx queue tail */

	uint64_t	rx_q_entries;	/* Num entries in receive queue */
	uint64_t	rx_q_va;	/* Virtual addr of receive queue */
	uint64_t	rx_q_ra;	/* Real addr of receive queue */

	uint64_t	rx_dq_entries;	/* Num entries in the data queue */
	uint64_t	rx_dq_va;	/* Virtual addr of the data queue */
	uint64_t	rx_dq_head;	/* Receive data queue head */
	uint64_t	rx_dq_tail;	/* Receive data queue tail */
	uint64_t	rx_ack_head;	/* Receive data ACK peek head ptr */

	uint64_t	link_state;	/* Underlying HV channel state */

	ldc_mtbl_t	*mtbl;		/* Memory table used by channel */
	ldc_mhdl_t	*mhdl_list;	/* List of memory handles */
	kmutex_t	mlist_lock;	/* Mem handle list lock */

	ldc_dring_t	*exp_dring_list; /* Exported desc ring list */
	kmutex_t	exp_dlist_lock;	/* Lock for exported desc ring list */
	ldc_dring_t	*imp_dring_list; /* Imported desc ring list */
	kmutex_t	imp_dlist_lock;	/* Lock for imported desc ring list */

	uint8_t		pkt_payload;	/* Size of packet payload */

	uint32_t	last_msg_snt;	/* Seqid of last packet sent */
	uint32_t	last_ack_rcd;	/* Seqid of last ACK recd */
	uint32_t	last_msg_rcd;	/* Seqid of last packet received */

	uint32_t	stream_remains;	/* Number of bytes in stream */
					/* packet buffer */
	uint32_t	stream_offset;	/* Offset into packet buffer for */
					/* next read */
	uint8_t		*stream_bufferp; /* Stream packet buffer */

	int		(*read_p)(ldc_chan_t *ldcp, caddr_t bufferp,
				size_t *sizep);
	int		(*write_p)(ldc_chan_t *ldcp, caddr_t bufferp,
				size_t *sizep);

	uint64_t	(*readq_get_state)(ldc_chan_t *ldcp, uint64_t *head,
				uint64_t *tail, uint64_t *link_state);

	int		(*readq_set_head)(ldc_chan_t *ldcp, uint64_t head);
};


/*
 * LDC module soft state structure
 */
typedef struct ldc_soft_state {
	kmutex_t 	lock;		/* Protects ldc_soft_state_t  */
	ldc_cnex_t	cinfo;		/* channel nexus info */
	uint64_t	channel_count;	/* Number of channels */
	uint64_t	channels_open;	/* Number of open channels */
	ldc_chan_t 	*chan_list;	/* List of LDC endpoints */
	ldc_dring_t	*dring_list;	/* Descriptor rings (for export) */

	kmem_cache_t	*memhdl_cache;	/* Memory handle cache */
	kmem_cache_t	*memseg_cache;	/* Memory segment cache */
} ldc_soft_state_t;


/*
 * Debugging Utilities
 */
#define	DBG_ALL_LDCS	-1
#ifdef	DEBUG
#define	D1		\
if (ldcdbg & 0x01)	\
	ldcdebug
#define	D2		\
if (ldcdbg & 0x02)	\
	ldcdebug
#define	DWARN		\
if (ldcdbg & 0x04)	\
	ldcdebug
#else
#define	D1
#define	D2
#define	DWARN
#endif

#ifdef __cplusplus
}
#endif

#endif /* _LDC_IMPL_H */