comparison usr/src/uts/sun4v/sys/vdc.h @ 10812:c6175c150216

6726533 vdisk failover should handle storage/storage-path failures (a la mpxio)
author Alexandre Chartre <Alexandre.Chartre@Sun.COM>
date Mon, 19 Oct 2009 13:40:30 -0700
parents 68d0fe4c716e
children 2663784ac9bb
comparison
equal deleted inserted replaced
10811:72fec120256a 10812:c6175c150216
82 #define VDC_SEQ_NUM_INVALID -1 /* Error */ 82 #define VDC_SEQ_NUM_INVALID -1 /* Error */
83 #define VDC_SEQ_NUM_SKIP 0 /* Request already processed */ 83 #define VDC_SEQ_NUM_SKIP 0 /* Request already processed */
84 #define VDC_SEQ_NUM_TODO 1 /* Request needs processing */ 84 #define VDC_SEQ_NUM_TODO 1 /* Request needs processing */
85 85
86 /* 86 /*
87 * Flags for virtual disk operations.
88 */
89 #define VDC_OP_STATE_RUNNING 0x01 /* do operation in running state */
90 #define VDC_OP_ERRCHK_BACKEND 0x02 /* check backend on error */
91 #define VDC_OP_ERRCHK_CONFLICT 0x04 /* check resv conflict on error */
92
93 #define VDC_OP_ERRCHK (VDC_OP_ERRCHK_BACKEND | VDC_OP_ERRCHK_CONFLICT)
94 #define VDC_OP_NORMAL (VDC_OP_STATE_RUNNING | VDC_OP_ERRCHK)
95
96 /*
87 * Macros to get UNIT and PART number 97 * Macros to get UNIT and PART number
88 */ 98 */
89 #define VDCUNIT_SHIFT 3 99 #define VDCUNIT_SHIFT 3
90 #define VDCPART_MASK 7 100 #define VDCPART_MASK 7
91 101
169 typedef enum vdc_state { 179 typedef enum vdc_state {
170 VDC_STATE_INIT, /* device is initialized */ 180 VDC_STATE_INIT, /* device is initialized */
171 VDC_STATE_INIT_WAITING, /* waiting for ldc connection */ 181 VDC_STATE_INIT_WAITING, /* waiting for ldc connection */
172 VDC_STATE_NEGOTIATE, /* doing handshake negotiation */ 182 VDC_STATE_NEGOTIATE, /* doing handshake negotiation */
173 VDC_STATE_HANDLE_PENDING, /* handle requests in backup dring */ 183 VDC_STATE_HANDLE_PENDING, /* handle requests in backup dring */
184 VDC_STATE_FAULTED, /* multipath backend is inaccessible */
185 VDC_STATE_FAILED, /* device is not usable */
174 VDC_STATE_RUNNING, /* running and accepting requests */ 186 VDC_STATE_RUNNING, /* running and accepting requests */
175 VDC_STATE_DETACH, /* detaching */ 187 VDC_STATE_DETACH, /* detaching */
176 VDC_STATE_RESETTING /* resetting connection with vds */ 188 VDC_STATE_RESETTING /* resetting connection with vds */
177 } vdc_state_t; 189 } vdc_state_t;
190
191 /*
192 * States of the service provided by a vds server
193 */
194 typedef enum vdc_service_state {
195 VDC_SERVICE_NONE = -1, /* no state define */
196 VDC_SERVICE_OFFLINE, /* no connection with the service */
197 VDC_SERVICE_CONNECTED, /* connection established */
198 VDC_SERVICE_ONLINE, /* connection and backend available */
199 VDC_SERVICE_FAILED, /* connection failed */
200 VDC_SERVICE_FAULTED /* connection but backend unavailable */
201 } vdc_service_state_t;
178 202
179 /* 203 /*
180 * The states that the vdc instance can be in. 204 * The states that the vdc instance can be in.
181 */ 205 */
182 typedef enum vdc_lc_state { 206 typedef enum vdc_lc_state {
196 VIO_read_dir, /* read data from server */ 220 VIO_read_dir, /* read data from server */
197 VIO_write_dir, /* write data to server */ 221 VIO_write_dir, /* write data to server */
198 VIO_both_dir /* transfer both in and out in same buffer */ 222 VIO_both_dir /* transfer both in and out in same buffer */
199 } vio_desc_direction_t; 223 } vio_desc_direction_t;
200 224
201 typedef enum {
202 CB_STRATEGY, /* non-blocking strategy call */
203 CB_SYNC /* synchronous operation */
204 } vio_cb_type_t;
205
206 typedef struct vdc_local_desc { 225 typedef struct vdc_local_desc {
207 boolean_t is_free; /* local state - inuse or not */ 226 boolean_t is_free; /* local state - inuse or not */
208 227
209 int operation; /* VD_OP_xxx to be performed */ 228 int operation; /* VD_OP_xxx to be performed */
210 caddr_t addr; /* addr passed in by consumer */ 229 caddr_t addr; /* addr passed in by consumer */
211 int slice; 230 int slice;
212 diskaddr_t offset; /* disk offset */ 231 diskaddr_t offset; /* disk offset */
213 size_t nbytes; 232 size_t nbytes;
214 vio_cb_type_t cb_type; /* operation type blk/nonblk */ 233 struct buf *buf; /* buf of operation */
215 void *cb_arg; /* buf passed to strategy() */
216 vio_desc_direction_t dir; /* direction of transfer */ 234 vio_desc_direction_t dir; /* direction of transfer */
235 int flags; /* flags of operation */
217 236
218 caddr_t align_addr; /* used if addr non-aligned */ 237 caddr_t align_addr; /* used if addr non-aligned */
219 ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */ 238 ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */
220 vd_dring_entry_t *dep; /* public Dring Entry Pointer */ 239 vd_dring_entry_t *dep; /* public Dring Entry Pointer */
221 240
222 } vdc_local_desc_t; 241 } vdc_local_desc_t;
223 242
224 /* 243 /*
225 * I/O queue used by failfast 244 * I/O queue used for checking backend or failfast
226 */ 245 */
227 typedef struct vdc_io { 246 typedef struct vdc_io {
228 struct vdc_io *vio_next; /* next pending I/O in the queue */ 247 struct vdc_io *vio_next; /* next pending I/O in the queue */
229 struct buf *vio_buf; /* buf for CB_STRATEGY I/O */ 248 int vio_index; /* descriptor index */
230 clock_t vio_qtime; /* time the I/O was queued */ 249 clock_t vio_qtime; /* time the I/O was queued */
231 } vdc_io_t; 250 } vdc_io_t;
232 251
233 /* 252 /*
234 * Per vDisk server channel states 253 * Per vDisk server channel states
244 typedef struct vdc_server { 263 typedef struct vdc_server {
245 struct vdc_server *next; /* Next server */ 264 struct vdc_server *next; /* Next server */
246 struct vdc *vdcp; /* Ptr to vdc struct */ 265 struct vdc *vdcp; /* Ptr to vdc struct */
247 uint64_t id; /* Server port id */ 266 uint64_t id; /* Server port id */
248 uint64_t state; /* Server state */ 267 uint64_t state; /* Server state */
268 vdc_service_state_t svc_state; /* Service state */
269 vdc_service_state_t log_state; /* Last state logged */
249 uint64_t ldc_id; /* Server LDC id */ 270 uint64_t ldc_id; /* Server LDC id */
250 ldc_handle_t ldc_handle; /* Server LDC handle */ 271 ldc_handle_t ldc_handle; /* Server LDC handle */
251 ldc_status_t ldc_state; /* Server LDC state */ 272 ldc_status_t ldc_state; /* Server LDC state */
252 uint64_t ctimeout; /* conn tmout (secs) */ 273 uint64_t ctimeout; /* conn tmout (secs) */
253 } vdc_server_t; 274 } vdc_server_t;
260 kmutex_t lock; /* protects next 2 sections of vars */ 281 kmutex_t lock; /* protects next 2 sections of vars */
261 kcondvar_t running_cv; /* signal when upper layers can send */ 282 kcondvar_t running_cv; /* signal when upper layers can send */
262 kcondvar_t initwait_cv; /* signal when ldc conn is up */ 283 kcondvar_t initwait_cv; /* signal when ldc conn is up */
263 kcondvar_t dring_free_cv; /* signal when desc is avail */ 284 kcondvar_t dring_free_cv; /* signal when desc is avail */
264 kcondvar_t membind_cv; /* signal when mem can be bound */ 285 kcondvar_t membind_cv; /* signal when mem can be bound */
265 boolean_t self_reset; 286 boolean_t self_reset; /* self initiated reset */
287 kcondvar_t io_pending_cv; /* signal on pending I/O */
288 boolean_t io_pending; /* pending I/O */
266 289
267 int initialized; /* keeps track of what's init'ed */ 290 int initialized; /* keeps track of what's init'ed */
268 vdc_lc_state_t lifecycle; /* Current state of the vdc instance */ 291 vdc_lc_state_t lifecycle; /* Current state of the vdc instance */
269 292
270 int hshake_cnt; /* number of failed handshakes */ 293 int hshake_cnt; /* number of failed handshakes */
283 kmutex_t read_lock; /* lock to protect read */ 306 kmutex_t read_lock; /* lock to protect read */
284 kcondvar_t read_cv; /* cv to wait for READ events */ 307 kcondvar_t read_cv; /* cv to wait for READ events */
285 vdc_rd_state_t read_state; /* current read state */ 308 vdc_rd_state_t read_state; /* current read state */
286 309
287 uint32_t sync_op_cnt; /* num of active sync operations */ 310 uint32_t sync_op_cnt; /* num of active sync operations */
288 boolean_t sync_op_pending; /* sync operation is pending */
289 boolean_t sync_op_blocked; /* blocked waiting to do sync op */ 311 boolean_t sync_op_blocked; /* blocked waiting to do sync op */
290 uint32_t sync_op_status; /* status of sync operation */
291 kcondvar_t sync_pending_cv; /* cv wait for sync op to finish */
292 kcondvar_t sync_blocked_cv; /* cv wait for other syncs to finish */ 312 kcondvar_t sync_blocked_cv; /* cv wait for other syncs to finish */
293 313
294 uint64_t session_id; /* common ID sent with all messages */ 314 uint64_t session_id; /* common ID sent with all messages */
295 uint64_t seq_num; /* most recent sequence num generated */ 315 uint64_t seq_num; /* most recent sequence num generated */
296 uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */ 316 uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */
324 int ownership; /* ownership status flags */ 344 int ownership; /* ownership status flags */
325 kthread_t *ownership_thread; /* ownership thread */ 345 kthread_t *ownership_thread; /* ownership thread */
326 kcondvar_t ownership_cv; /* cv for ownership update */ 346 kcondvar_t ownership_cv; /* cv for ownership update */
327 347
328 /* 348 /*
329 * The failfast fields are protected by the lock mutex. 349 * The eio and failfast fields are protected by the lock mutex.
330 */ 350 */
331 kthread_t *failfast_thread; /* failfast thread */ 351 kthread_t *eio_thread; /* error io thread */
352 kcondvar_t eio_cv; /* cv for eio thread update */
353 vdc_io_t *eio_queue; /* error io queue */
332 clock_t failfast_interval; /* interval in microsecs */ 354 clock_t failfast_interval; /* interval in microsecs */
333 kcondvar_t failfast_cv; /* cv for failfast update */
334 kcondvar_t failfast_io_cv; /* cv wait for I/O to finish */
335 vdc_io_t *failfast_io_queue; /* failfast io queue */
336 355
337 /* 356 /*
338 * kstats used to store I/O statistics consumed by iostat(1M). 357 * kstats used to store I/O statistics consumed by iostat(1M).
339 * These are protected by the lock mutex. 358 * These are protected by the lock mutex.
340 */ 359 */