Mercurial > illumos > illumos-gate
comparison usr/src/uts/sun4v/sys/vdc.h @ 10812:c6175c150216
6726533 vdisk failover should handle storage/storage-path failures (a la mpxio)
author | Alexandre Chartre <Alexandre.Chartre@Sun.COM> |
---|---|
date | Mon, 19 Oct 2009 13:40:30 -0700 |
parents | 68d0fe4c716e |
children | 2663784ac9bb |
comparison
equal
deleted
inserted
replaced
10811:72fec120256a | 10812:c6175c150216 |
---|---|
82 #define VDC_SEQ_NUM_INVALID -1 /* Error */ | 82 #define VDC_SEQ_NUM_INVALID -1 /* Error */ |
83 #define VDC_SEQ_NUM_SKIP 0 /* Request already processed */ | 83 #define VDC_SEQ_NUM_SKIP 0 /* Request already processed */ |
84 #define VDC_SEQ_NUM_TODO 1 /* Request needs processing */ | 84 #define VDC_SEQ_NUM_TODO 1 /* Request needs processing */ |
85 | 85 |
86 /* | 86 /* |
87 * Flags for virtual disk operations. | |
88 */ | |
89 #define VDC_OP_STATE_RUNNING 0x01 /* do operation in running state */ | |
90 #define VDC_OP_ERRCHK_BACKEND 0x02 /* check backend on error */ | |
91 #define VDC_OP_ERRCHK_CONFLICT 0x04 /* check resv conflict on error */ | |
92 | |
93 #define VDC_OP_ERRCHK (VDC_OP_ERRCHK_BACKEND | VDC_OP_ERRCHK_CONFLICT) | |
94 #define VDC_OP_NORMAL (VDC_OP_STATE_RUNNING | VDC_OP_ERRCHK) | |
95 | |
96 /* | |
87 * Macros to get UNIT and PART number | 97 * Macros to get UNIT and PART number |
88 */ | 98 */ |
89 #define VDCUNIT_SHIFT 3 | 99 #define VDCUNIT_SHIFT 3 |
90 #define VDCPART_MASK 7 | 100 #define VDCPART_MASK 7 |
91 | 101 |
169 typedef enum vdc_state { | 179 typedef enum vdc_state { |
170 VDC_STATE_INIT, /* device is initialized */ | 180 VDC_STATE_INIT, /* device is initialized */ |
171 VDC_STATE_INIT_WAITING, /* waiting for ldc connection */ | 181 VDC_STATE_INIT_WAITING, /* waiting for ldc connection */ |
172 VDC_STATE_NEGOTIATE, /* doing handshake negotiation */ | 182 VDC_STATE_NEGOTIATE, /* doing handshake negotiation */ |
173 VDC_STATE_HANDLE_PENDING, /* handle requests in backup dring */ | 183 VDC_STATE_HANDLE_PENDING, /* handle requests in backup dring */ |
184 VDC_STATE_FAULTED, /* multipath backend is inaccessible */ | |
185 VDC_STATE_FAILED, /* device is not usable */ | |
174 VDC_STATE_RUNNING, /* running and accepting requests */ | 186 VDC_STATE_RUNNING, /* running and accepting requests */ |
175 VDC_STATE_DETACH, /* detaching */ | 187 VDC_STATE_DETACH, /* detaching */ |
176 VDC_STATE_RESETTING /* resetting connection with vds */ | 188 VDC_STATE_RESETTING /* resetting connection with vds */ |
177 } vdc_state_t; | 189 } vdc_state_t; |
190 | |
191 /* | |
192 * States of the service provided by a vds server | |
193 */ | |
194 typedef enum vdc_service_state { | |
195 VDC_SERVICE_NONE = -1, /* no state define */ | |
196 VDC_SERVICE_OFFLINE, /* no connection with the service */ | |
197 VDC_SERVICE_CONNECTED, /* connection established */ | |
198 VDC_SERVICE_ONLINE, /* connection and backend available */ | |
199 VDC_SERVICE_FAILED, /* connection failed */ | |
200 VDC_SERVICE_FAULTED /* connection but backend unavailable */ | |
201 } vdc_service_state_t; | |
178 | 202 |
179 /* | 203 /* |
180 * The states that the vdc instance can be in. | 204 * The states that the vdc instance can be in. |
181 */ | 205 */ |
182 typedef enum vdc_lc_state { | 206 typedef enum vdc_lc_state { |
196 VIO_read_dir, /* read data from server */ | 220 VIO_read_dir, /* read data from server */ |
197 VIO_write_dir, /* write data to server */ | 221 VIO_write_dir, /* write data to server */ |
198 VIO_both_dir /* transfer both in and out in same buffer */ | 222 VIO_both_dir /* transfer both in and out in same buffer */ |
199 } vio_desc_direction_t; | 223 } vio_desc_direction_t; |
200 | 224 |
201 typedef enum { | |
202 CB_STRATEGY, /* non-blocking strategy call */ | |
203 CB_SYNC /* synchronous operation */ | |
204 } vio_cb_type_t; | |
205 | |
206 typedef struct vdc_local_desc { | 225 typedef struct vdc_local_desc { |
207 boolean_t is_free; /* local state - inuse or not */ | 226 boolean_t is_free; /* local state - inuse or not */ |
208 | 227 |
209 int operation; /* VD_OP_xxx to be performed */ | 228 int operation; /* VD_OP_xxx to be performed */ |
210 caddr_t addr; /* addr passed in by consumer */ | 229 caddr_t addr; /* addr passed in by consumer */ |
211 int slice; | 230 int slice; |
212 diskaddr_t offset; /* disk offset */ | 231 diskaddr_t offset; /* disk offset */ |
213 size_t nbytes; | 232 size_t nbytes; |
214 vio_cb_type_t cb_type; /* operation type blk/nonblk */ | 233 struct buf *buf; /* buf of operation */ |
215 void *cb_arg; /* buf passed to strategy() */ | |
216 vio_desc_direction_t dir; /* direction of transfer */ | 234 vio_desc_direction_t dir; /* direction of transfer */ |
235 int flags; /* flags of operation */ | |
217 | 236 |
218 caddr_t align_addr; /* used if addr non-aligned */ | 237 caddr_t align_addr; /* used if addr non-aligned */ |
219 ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */ | 238 ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */ |
220 vd_dring_entry_t *dep; /* public Dring Entry Pointer */ | 239 vd_dring_entry_t *dep; /* public Dring Entry Pointer */ |
221 | 240 |
222 } vdc_local_desc_t; | 241 } vdc_local_desc_t; |
223 | 242 |
224 /* | 243 /* |
225 * I/O queue used by failfast | 244 * I/O queue used for checking backend or failfast |
226 */ | 245 */ |
227 typedef struct vdc_io { | 246 typedef struct vdc_io { |
228 struct vdc_io *vio_next; /* next pending I/O in the queue */ | 247 struct vdc_io *vio_next; /* next pending I/O in the queue */ |
229 struct buf *vio_buf; /* buf for CB_STRATEGY I/O */ | 248 int vio_index; /* descriptor index */ |
230 clock_t vio_qtime; /* time the I/O was queued */ | 249 clock_t vio_qtime; /* time the I/O was queued */ |
231 } vdc_io_t; | 250 } vdc_io_t; |
232 | 251 |
233 /* | 252 /* |
234 * Per vDisk server channel states | 253 * Per vDisk server channel states |
244 typedef struct vdc_server { | 263 typedef struct vdc_server { |
245 struct vdc_server *next; /* Next server */ | 264 struct vdc_server *next; /* Next server */ |
246 struct vdc *vdcp; /* Ptr to vdc struct */ | 265 struct vdc *vdcp; /* Ptr to vdc struct */ |
247 uint64_t id; /* Server port id */ | 266 uint64_t id; /* Server port id */ |
248 uint64_t state; /* Server state */ | 267 uint64_t state; /* Server state */ |
268 vdc_service_state_t svc_state; /* Service state */ | |
269 vdc_service_state_t log_state; /* Last state logged */ | |
249 uint64_t ldc_id; /* Server LDC id */ | 270 uint64_t ldc_id; /* Server LDC id */ |
250 ldc_handle_t ldc_handle; /* Server LDC handle */ | 271 ldc_handle_t ldc_handle; /* Server LDC handle */ |
251 ldc_status_t ldc_state; /* Server LDC state */ | 272 ldc_status_t ldc_state; /* Server LDC state */ |
252 uint64_t ctimeout; /* conn tmout (secs) */ | 273 uint64_t ctimeout; /* conn tmout (secs) */ |
253 } vdc_server_t; | 274 } vdc_server_t; |
260 kmutex_t lock; /* protects next 2 sections of vars */ | 281 kmutex_t lock; /* protects next 2 sections of vars */ |
261 kcondvar_t running_cv; /* signal when upper layers can send */ | 282 kcondvar_t running_cv; /* signal when upper layers can send */ |
262 kcondvar_t initwait_cv; /* signal when ldc conn is up */ | 283 kcondvar_t initwait_cv; /* signal when ldc conn is up */ |
263 kcondvar_t dring_free_cv; /* signal when desc is avail */ | 284 kcondvar_t dring_free_cv; /* signal when desc is avail */ |
264 kcondvar_t membind_cv; /* signal when mem can be bound */ | 285 kcondvar_t membind_cv; /* signal when mem can be bound */ |
265 boolean_t self_reset; | 286 boolean_t self_reset; /* self initiated reset */ |
287 kcondvar_t io_pending_cv; /* signal on pending I/O */ | |
288 boolean_t io_pending; /* pending I/O */ | |
266 | 289 |
267 int initialized; /* keeps track of what's init'ed */ | 290 int initialized; /* keeps track of what's init'ed */ |
268 vdc_lc_state_t lifecycle; /* Current state of the vdc instance */ | 291 vdc_lc_state_t lifecycle; /* Current state of the vdc instance */ |
269 | 292 |
270 int hshake_cnt; /* number of failed handshakes */ | 293 int hshake_cnt; /* number of failed handshakes */ |
283 kmutex_t read_lock; /* lock to protect read */ | 306 kmutex_t read_lock; /* lock to protect read */ |
284 kcondvar_t read_cv; /* cv to wait for READ events */ | 307 kcondvar_t read_cv; /* cv to wait for READ events */ |
285 vdc_rd_state_t read_state; /* current read state */ | 308 vdc_rd_state_t read_state; /* current read state */ |
286 | 309 |
287 uint32_t sync_op_cnt; /* num of active sync operations */ | 310 uint32_t sync_op_cnt; /* num of active sync operations */ |
288 boolean_t sync_op_pending; /* sync operation is pending */ | |
289 boolean_t sync_op_blocked; /* blocked waiting to do sync op */ | 311 boolean_t sync_op_blocked; /* blocked waiting to do sync op */ |
290 uint32_t sync_op_status; /* status of sync operation */ | |
291 kcondvar_t sync_pending_cv; /* cv wait for sync op to finish */ | |
292 kcondvar_t sync_blocked_cv; /* cv wait for other syncs to finish */ | 312 kcondvar_t sync_blocked_cv; /* cv wait for other syncs to finish */ |
293 | 313 |
294 uint64_t session_id; /* common ID sent with all messages */ | 314 uint64_t session_id; /* common ID sent with all messages */ |
295 uint64_t seq_num; /* most recent sequence num generated */ | 315 uint64_t seq_num; /* most recent sequence num generated */ |
296 uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */ | 316 uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */ |
324 int ownership; /* ownership status flags */ | 344 int ownership; /* ownership status flags */ |
325 kthread_t *ownership_thread; /* ownership thread */ | 345 kthread_t *ownership_thread; /* ownership thread */ |
326 kcondvar_t ownership_cv; /* cv for ownership update */ | 346 kcondvar_t ownership_cv; /* cv for ownership update */ |
327 | 347 |
328 /* | 348 /* |
329 * The failfast fields are protected by the lock mutex. | 349 * The eio and failfast fields are protected by the lock mutex. |
330 */ | 350 */ |
331 kthread_t *failfast_thread; /* failfast thread */ | 351 kthread_t *eio_thread; /* error io thread */ |
352 kcondvar_t eio_cv; /* cv for eio thread update */ | |
353 vdc_io_t *eio_queue; /* error io queue */ | |
332 clock_t failfast_interval; /* interval in microsecs */ | 354 clock_t failfast_interval; /* interval in microsecs */ |
333 kcondvar_t failfast_cv; /* cv for failfast update */ | |
334 kcondvar_t failfast_io_cv; /* cv wait for I/O to finish */ | |
335 vdc_io_t *failfast_io_queue; /* failfast io queue */ | |
336 | 355 |
337 /* | 356 /* |
338 * kstats used to store I/O statistics consumed by iostat(1M). | 357 * kstats used to store I/O statistics consumed by iostat(1M). |
339 * These are protected by the lock mutex. | 358 * These are protected by the lock mutex. |
340 */ | 359 */ |