25465
|
1 /*
|
|
2 * CDDL HEADER START
|
|
3 *
|
|
4 * The contents of this file are subject to the terms of the
|
|
5 * Common Development and Distribution License (the "License").
|
|
6 * You may not use this file except in compliance with the License.
|
|
7 *
|
|
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
9 * or http://www.opensolaris.org/os/licensing.
|
|
10 * See the License for the specific language governing permissions
|
|
11 * and limitations under the License.
|
|
12 *
|
|
13 * When distributing Covered Code, include this CDDL HEADER in each
|
|
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
15 * If applicable, add the following below this CDDL HEADER, with the
|
|
16 * fields enclosed by brackets "[]" replaced with your own identifying
|
|
17 * information: Portions Copyright [yyyy] [name of copyright owner]
|
|
18 *
|
|
19 * CDDL HEADER END
|
|
20 */
|
|
21 /*
|
|
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
23 * Copyright (c) 2019, Joyent, Inc.
|
|
24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
|
|
25 * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
|
|
26 * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
|
|
27 * Copyright (c) 2020, George Amanakis. All rights reserved.
|
|
28 */
|
|
29
|
|
30 #ifndef _SYS_ARC_IMPL_H
|
|
31 #define _SYS_ARC_IMPL_H
|
|
32
|
|
33 #include <sys/arc.h>
|
|
34 #include <sys/multilist.h>
|
|
35
|
|
36 #ifdef __cplusplus
|
|
37 extern "C" {
|
|
38 #endif
|
|
39
|
|
40 /*
|
|
41 * Note that buffers can be in one of 6 states:
|
|
42 * ARC_anon - anonymous (discussed below)
|
|
43 * ARC_mru - recently used, currently cached
|
|
44 * ARC_mru_ghost - recently used, no longer in cache
|
|
45 * ARC_mfu - frequently used, currently cached
|
|
46 * ARC_mfu_ghost - frequently used, no longer in cache
|
|
47 * ARC_l2c_only - exists in L2ARC but not other states
|
|
48 * When there are no active references to the buffer, they are
|
|
49 * are linked onto a list in one of these arc states. These are
|
|
50 * the only buffers that can be evicted or deleted. Within each
|
|
51 * state there are multiple lists, one for meta-data and one for
|
|
52 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
|
|
53 * etc.) is tracked separately so that it can be managed more
|
|
54 * explicitly: favored over data, limited explicitly.
|
|
55 *
|
|
56 * Anonymous buffers are buffers that are not associated with
|
|
57 * a DVA. These are buffers that hold dirty block copies
|
|
58 * before they are written to stable storage. By definition,
|
|
59 * they are "ref'd" and are considered part of arc_mru
|
|
60 * that cannot be freed. Generally, they will aquire a DVA
|
|
61 * as they are written and migrate onto the arc_mru list.
|
|
62 *
|
|
63 * The ARC_l2c_only state is for buffers that are in the second
|
|
64 * level ARC but no longer in any of the ARC_m* lists. The second
|
|
65 * level ARC itself may also contain buffers that are in any of
|
|
66 * the ARC_m* states - meaning that a buffer can exist in two
|
|
67 * places. The reason for the ARC_l2c_only state is to keep the
|
|
68 * buffer header in the hash table, so that reads that hit the
|
|
69 * second level ARC benefit from these fast lookups.
|
|
70 */
|
|
71
|
|
72 typedef struct arc_state {
|
|
73 /*
|
|
74 * list of evictable buffers
|
|
75 */
|
|
76 multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
|
|
77 /*
|
|
78 * total amount of evictable data in this state
|
|
79 */
|
|
80 zfs_refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
|
|
81 /*
|
|
82 * total amount of data in this state; this includes: evictable,
|
|
83 * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
|
|
84 */
|
|
85 zfs_refcount_t arcs_size;
|
|
86 } arc_state_t;
|
|
87
|
|
88 typedef struct arc_callback arc_callback_t;
|
|
89
|
|
90 struct arc_callback {
|
|
91 void *acb_private;
|
|
92 arc_read_done_func_t *acb_done;
|
|
93 arc_buf_t *acb_buf;
|
|
94 boolean_t acb_encrypted;
|
|
95 boolean_t acb_compressed;
|
|
96 boolean_t acb_noauth;
|
|
97 zbookmark_phys_t acb_zb;
|
|
98 zio_t *acb_zio_dummy;
|
|
99 zio_t *acb_zio_head;
|
|
100 arc_callback_t *acb_next;
|
|
101 };
|
|
102
|
|
103 typedef struct arc_write_callback arc_write_callback_t;
|
|
104
|
|
105 struct arc_write_callback {
|
|
106 void *awcb_private;
|
|
107 arc_write_done_func_t *awcb_ready;
|
|
108 arc_write_done_func_t *awcb_children_ready;
|
|
109 arc_write_done_func_t *awcb_physdone;
|
|
110 arc_write_done_func_t *awcb_done;
|
|
111 arc_buf_t *awcb_buf;
|
|
112 };
|
|
113
|
|
114 /*
|
|
115 * ARC buffers are separated into multiple structs as a memory saving measure:
|
|
116 * - Common fields struct, always defined, and embedded within it:
|
|
117 * - L2-only fields, always allocated but undefined when not in L2ARC
|
|
118 * - L1-only fields, only allocated when in L1ARC
|
|
119 *
|
|
120 * Buffer in L1 Buffer only in L2
|
|
121 * +------------------------+ +------------------------+
|
|
122 * | arc_buf_hdr_t | | arc_buf_hdr_t |
|
|
123 * | | | |
|
|
124 * | | | |
|
|
125 * | | | |
|
|
126 * +------------------------+ +------------------------+
|
|
127 * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
|
|
128 * | (undefined if L1-only) | | |
|
|
129 * +------------------------+ +------------------------+
|
|
130 * | l1arc_buf_hdr_t |
|
|
131 * | |
|
|
132 * | |
|
|
133 * | |
|
|
134 * | |
|
|
135 * +------------------------+
|
|
136 *
|
|
137 * Because it's possible for the L2ARC to become extremely large, we can wind
|
|
138 * up eating a lot of memory in L2ARC buffer headers, so the size of a header
|
|
139 * is minimized by only allocating the fields necessary for an L1-cached buffer
|
|
140 * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
|
|
141 * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
|
|
142 * words in pointers. arc_hdr_realloc() is used to switch a header between
|
|
143 * these two allocation states.
|
|
144 */
|
|
145 typedef struct l1arc_buf_hdr {
|
|
146 kmutex_t b_freeze_lock;
|
|
147 zio_cksum_t *b_freeze_cksum;
|
|
148 #ifdef ZFS_DEBUG
|
|
149 /*
|
|
150 * Used for debugging with kmem_flags - by allocating and freeing
|
|
151 * b_thawed when the buffer is thawed, we get a record of the stack
|
|
152 * trace that thawed it.
|
|
153 */
|
|
154 void *b_thawed;
|
|
155 #endif
|
|
156
|
|
157 arc_buf_t *b_buf;
|
|
158 uint32_t b_bufcnt;
|
|
159 /* for waiting on writes to complete */
|
|
160 kcondvar_t b_cv;
|
|
161 uint8_t b_byteswap;
|
|
162
|
|
163 /* protected by arc state mutex */
|
|
164 arc_state_t *b_state;
|
|
165 multilist_node_t b_arc_node;
|
|
166
|
|
167 /* updated atomically */
|
|
168 clock_t b_arc_access;
|
|
169
|
|
170 /* self protecting */
|
|
171 zfs_refcount_t b_refcnt;
|
|
172
|
|
173 arc_callback_t *b_acb;
|
|
174 abd_t *b_pabd;
|
|
175 } l1arc_buf_hdr_t;
|
|
176
|
|
177 typedef enum l2arc_dev_hdr_flags_t {
|
|
178 L2ARC_DEV_HDR_EVICT_FIRST = (1 << 0) /* mirror of l2ad_first */
|
|
179 } l2arc_dev_hdr_flags_t;
|
|
180
|
|
181 /*
|
|
182 * Pointer used in persistent L2ARC (for pointing to log blocks).
|
|
183 */
|
|
184 typedef struct l2arc_log_blkptr {
|
|
185 /*
|
|
186 * Offset of log block within the device, in bytes
|
|
187 */
|
|
188 uint64_t lbp_daddr;
|
|
189 /*
|
|
190 * Aligned payload size (in bytes) of the log block
|
|
191 */
|
|
192 uint64_t lbp_payload_asize;
|
|
193 /*
|
|
194 * Offset in bytes of the first buffer in the payload
|
|
195 */
|
|
196 uint64_t lbp_payload_start;
|
|
197 /*
|
|
198 * lbp_prop has the following format:
|
|
199 * * logical size (in bytes)
|
|
200 * * aligned (after compression) size (in bytes)
|
|
201 * * compression algorithm (we always LZ4-compress l2arc logs)
|
|
202 * * checksum algorithm (used for lbp_cksum)
|
|
203 */
|
|
204 uint64_t lbp_prop;
|
|
205 zio_cksum_t lbp_cksum; /* checksum of log */
|
|
206 } l2arc_log_blkptr_t;
|
|
207
|
|
208 /*
|
|
209 * The persistent L2ARC device header.
|
|
210 * Byte order of magic determines whether 64-bit bswap of fields is necessary.
|
|
211 */
|
|
212 typedef struct l2arc_dev_hdr_phys {
|
|
213 uint64_t dh_magic; /* L2ARC_DEV_HDR_MAGIC */
|
|
214 uint64_t dh_version; /* Persistent L2ARC version */
|
|
215
|
|
216 /*
|
|
217 * Global L2ARC device state and metadata.
|
|
218 */
|
|
219 uint64_t dh_spa_guid;
|
|
220 uint64_t dh_vdev_guid;
|
|
221 uint64_t dh_log_entries; /* mirror of l2ad_log_entries */
|
|
222 uint64_t dh_evict; /* evicted offset in bytes */
|
|
223 uint64_t dh_flags; /* l2arc_dev_hdr_flags_t */
|
|
224 /*
|
|
225 * Used in zdb.c for determining if a log block is valid, in the same
|
|
226 * way that l2arc_rebuild() does.
|
|
227 */
|
|
228 uint64_t dh_start; /* mirror of l2ad_start */
|
|
229 uint64_t dh_end; /* mirror of l2ad_end */
|
|
230 /*
|
|
231 * Start of log block chain. [0] -> newest log, [1] -> one older (used
|
|
232 * for initiating prefetch).
|
|
233 */
|
|
234 l2arc_log_blkptr_t dh_start_lbps[2];
|
|
235 /*
|
|
236 * Aligned size of all log blocks as accounted by vdev_space_update().
|
|
237 */
|
|
238 uint64_t dh_lb_asize; /* mirror of l2ad_lb_asize */
|
|
239 uint64_t dh_lb_count; /* mirror of l2ad_lb_count */
|
|
240 const uint64_t dh_pad[32]; /* pad to 512 bytes */
|
|
241 zio_eck_t dh_tail;
|
|
242 } l2arc_dev_hdr_phys_t;
|
|
243 CTASSERT(sizeof (l2arc_dev_hdr_phys_t) == SPA_MINBLOCKSIZE);
|
|
244
|
|
245 /*
|
|
246 * A single ARC buffer header entry in a l2arc_log_blk_phys_t.
|
|
247 */
|
|
248 typedef struct l2arc_log_ent_phys {
|
|
249 dva_t le_dva; /* dva of buffer */
|
|
250 uint64_t le_birth; /* birth txg of buffer */
|
|
251 /*
|
|
252 * le_prop has the following format:
|
|
253 * * logical size (in bytes)
|
|
254 * * physical (compressed) size (in bytes)
|
|
255 * * compression algorithm
|
|
256 * * object type (used to restore arc_buf_contents_t)
|
|
257 * * protected status (used for encryption)
|
|
258 * * prefetch status (used in l2arc_read_done())
|
|
259 */
|
|
260 uint64_t le_prop;
|
|
261 uint64_t le_daddr; /* buf location on l2dev */
|
|
262 /*
|
|
263 * We pad the size of each entry to a power of 2 so that the size of
|
|
264 * l2arc_log_blk_phys_t is power-of-2 aligned with SPA_MINBLOCKSHIFT,
|
|
265 * because of the L2ARC_SET_*SIZE macros.
|
|
266 */
|
|
267 const uint64_t le_pad[3]; /* pad to 64 bytes */
|
|
268 } l2arc_log_ent_phys_t;
|
|
269
|
|
270 #define L2ARC_LOG_BLK_MAX_ENTRIES (1022)
|
|
271
|
|
272 /*
|
|
273 * A log block of up to 1022 ARC buffer log entries, chained into the
|
|
274 * persistent L2ARC metadata linked list. Byte order of magic determines
|
|
275 * whether 64-bit bswap of fields is necessary.
|
|
276 */
|
|
277 typedef struct l2arc_log_blk_phys {
|
|
278 uint64_t lb_magic; /* L2ARC_LOG_BLK_MAGIC */
|
|
279 /*
|
|
280 * There are 2 chains (headed by dh_start_lbps[2]), and this field
|
|
281 * points back to the previous block in this chain. We alternate
|
|
282 * which chain we append to, so they are time-wise and offset-wise
|
|
283 * interleaved, but that is an optimization rather than for
|
|
284 * correctness.
|
|
285 */
|
|
286 l2arc_log_blkptr_t lb_prev_lbp; /* pointer to prev log block */
|
|
287 /*
|
|
288 * Pad header section to 128 bytes
|
|
289 */
|
|
290 uint64_t lb_pad[7];
|
|
291 /* Payload */
|
|
292 l2arc_log_ent_phys_t lb_entries[L2ARC_LOG_BLK_MAX_ENTRIES];
|
|
293 } l2arc_log_blk_phys_t; /* 64K total */
|
|
294 /*
|
|
295 * The size of l2arc_log_blk_phys_t has to be power-of-2 aligned with
|
|
296 * SPA_MINBLOCKSHIFT because of L2BLK_SET_*SIZE macros.
|
|
297 */
|
|
298 CTASSERT(IS_P2ALIGNED(sizeof (l2arc_log_blk_phys_t),
|
|
299 1ULL << SPA_MINBLOCKSHIFT));
|
|
300 CTASSERT(sizeof (l2arc_log_blk_phys_t) >= SPA_MINBLOCKSIZE);
|
|
301 CTASSERT(sizeof (l2arc_log_blk_phys_t) <= SPA_MAXBLOCKSIZE);
|
|
302
|
|
303 /*
|
|
304 * These structures hold in-flight abd buffers for log blocks as they're being
|
|
305 * written to the L2ARC device.
|
|
306 */
|
|
307 typedef struct l2arc_lb_abd_buf {
|
|
308 abd_t *abd;
|
|
309 list_node_t node;
|
|
310 } l2arc_lb_abd_buf_t;
|
|
311
|
|
312 /*
|
|
313 * These structures hold pointers to log blocks present on the L2ARC device.
|
|
314 */
|
|
315 typedef struct l2arc_lb_ptr_buf {
|
|
316 l2arc_log_blkptr_t *lb_ptr;
|
|
317 list_node_t node;
|
|
318 } l2arc_lb_ptr_buf_t;
|
|
319
|
|
320 /* Macros for setting fields in le_prop and lbp_prop */
|
|
321 #define L2BLK_GET_LSIZE(field) \
|
|
322 BF64_GET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
|
|
323 #define L2BLK_SET_LSIZE(field, x) \
|
|
324 BF64_SET_SB((field), 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
|
|
325 #define L2BLK_GET_PSIZE(field) \
|
|
326 BF64_GET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
|
|
327 #define L2BLK_SET_PSIZE(field, x) \
|
|
328 BF64_SET_SB((field), 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
|
|
329 #define L2BLK_GET_COMPRESS(field) \
|
|
330 BF64_GET((field), 32, SPA_COMPRESSBITS)
|
|
331 #define L2BLK_SET_COMPRESS(field, x) \
|
|
332 BF64_SET((field), 32, SPA_COMPRESSBITS, x)
|
|
333 #define L2BLK_GET_PREFETCH(field) BF64_GET((field), 39, 1)
|
|
334 #define L2BLK_SET_PREFETCH(field, x) BF64_SET((field), 39, 1, x)
|
|
335 #define L2BLK_GET_CHECKSUM(field) BF64_GET((field), 40, 8)
|
|
336 #define L2BLK_SET_CHECKSUM(field, x) BF64_SET((field), 40, 8, x)
|
|
337 #define L2BLK_GET_TYPE(field) BF64_GET((field), 48, 8)
|
|
338 #define L2BLK_SET_TYPE(field, x) BF64_SET((field), 48, 8, x)
|
|
339 #define L2BLK_GET_PROTECTED(field) BF64_GET((field), 56, 1)
|
|
340 #define L2BLK_SET_PROTECTED(field, x) BF64_SET((field), 56, 1, x)
|
|
341
|
|
342 #define PTR_SWAP(x, y) \
|
|
343 do { \
|
|
344 void *tmp = (x);\
|
|
345 x = y; \
|
|
346 y = tmp; \
|
|
347 _NOTE(CONSTCOND)\
|
|
348 } while (0)
|
|
349
|
|
350 #define L2ARC_DEV_HDR_MAGIC 0x5a46534341434845LLU /* ASCII: "ZFSCACHE" */
|
|
351 #define L2ARC_LOG_BLK_MAGIC 0x4c4f47424c4b4844LLU /* ASCII: "LOGBLKHD" */
|
|
352
|
|
353 /*
|
|
354 * L2ARC Internals
|
|
355 */
|
|
356 typedef struct l2arc_dev {
|
|
357 vdev_t *l2ad_vdev; /* vdev */
|
|
358 spa_t *l2ad_spa; /* spa */
|
|
359 uint64_t l2ad_hand; /* next write location */
|
|
360 uint64_t l2ad_start; /* first addr on device */
|
|
361 uint64_t l2ad_end; /* last addr on device */
|
|
362 boolean_t l2ad_first; /* first sweep through */
|
|
363 boolean_t l2ad_writing; /* currently writing */
|
|
364 kmutex_t l2ad_mtx; /* lock for buffer list */
|
|
365 list_t l2ad_buflist; /* buffer list */
|
|
366 list_node_t l2ad_node; /* device list node */
|
|
367 zfs_refcount_t l2ad_alloc; /* allocated bytes */
|
|
368 /*
|
|
369 * Persistence-related stuff
|
|
370 */
|
|
371 l2arc_dev_hdr_phys_t *l2ad_dev_hdr; /* persistent device header */
|
|
372 uint64_t l2ad_dev_hdr_asize; /* aligned hdr size */
|
|
373 l2arc_log_blk_phys_t l2ad_log_blk; /* currently open log block */
|
|
374 int l2ad_log_ent_idx; /* index into cur log blk */
|
|
375 /* Number of bytes in current log block's payload */
|
|
376 uint64_t l2ad_log_blk_payload_asize;
|
|
377 /*
|
|
378 * Offset (in bytes) of the first buffer in current log block's
|
|
379 * payload.
|
|
380 */
|
|
381 uint64_t l2ad_log_blk_payload_start;
|
|
382 /* Flag indicating whether a rebuild is scheduled or is going on */
|
|
383 boolean_t l2ad_rebuild;
|
|
384 boolean_t l2ad_rebuild_cancel;
|
|
385 boolean_t l2ad_rebuild_began;
|
|
386 uint64_t l2ad_log_entries; /* entries per log blk */
|
|
387 uint64_t l2ad_evict; /* evicted offset in bytes */
|
|
388 /* List of pointers to log blocks present in the L2ARC device */
|
|
389 list_t l2ad_lbptr_list;
|
|
390 /*
|
|
391 * Aligned size of all log blocks as accounted by vdev_space_update().
|
|
392 */
|
|
393 zfs_refcount_t l2ad_lb_asize;
|
|
394 /*
|
|
395 * Number of log blocks present on the device.
|
|
396 */
|
|
397 zfs_refcount_t l2ad_lb_count;
|
|
398 } l2arc_dev_t;
|
|
399
|
|
400 /*
|
|
401 * Encrypted blocks will need to be stored encrypted on the L2ARC
|
|
402 * disk as they appear in the main pool. In order for this to work we
|
|
403 * need to pass around the encryption parameters so they can be used
|
|
404 * to write data to the L2ARC. This struct is only defined in the
|
|
405 * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
|
|
406 * flag set.
|
|
407 */
|
|
408 typedef struct arc_buf_hdr_crypt {
|
|
409 abd_t *b_rabd; /* raw encrypted data */
|
|
410 dmu_object_type_t b_ot; /* object type */
|
|
411 uint32_t b_ebufcnt; /* number or encryped buffers */
|
|
412
|
|
413 /* dsobj for looking up encryption key for l2arc encryption */
|
|
414 uint64_t b_dsobj; /* for looking up key */
|
|
415
|
|
416 /* encryption parameters */
|
|
417 uint8_t b_salt[ZIO_DATA_SALT_LEN];
|
|
418 uint8_t b_iv[ZIO_DATA_IV_LEN];
|
|
419
|
|
420 /*
|
|
421 * Technically this could be removed since we will always be able to
|
|
422 * get the mac from the bp when we need it. However, it is inconvenient
|
|
423 * for callers of arc code to have to pass a bp in all the time. This
|
|
424 * also allows us to assert that L2ARC data is properly encrypted to
|
|
425 * match the data in the main storage pool.
|
|
426 */
|
|
427 uint8_t b_mac[ZIO_DATA_MAC_LEN];
|
|
428 } arc_buf_hdr_crypt_t;
|
|
429
|
|
430 typedef struct l2arc_buf_hdr {
|
|
431 /* protected by arc_buf_hdr mutex */
|
|
432 l2arc_dev_t *b_dev; /* L2ARC device */
|
|
433 uint64_t b_daddr; /* disk address, offset byte */
|
|
434
|
|
435 list_node_t b_l2node;
|
|
436 } l2arc_buf_hdr_t;
|
|
437
|
|
438 typedef struct l2arc_write_callback {
|
|
439 l2arc_dev_t *l2wcb_dev; /* device info */
|
|
440 arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
|
|
441 /* in-flight list of log blocks */
|
|
442 list_t l2wcb_abd_list;
|
|
443 } l2arc_write_callback_t;
|
|
444
|
|
445 struct arc_buf_hdr {
|
|
446 /* protected by hash lock */
|
|
447 dva_t b_dva;
|
|
448 uint64_t b_birth;
|
|
449
|
|
450 arc_buf_contents_t b_type;
|
|
451 arc_buf_hdr_t *b_hash_next;
|
|
452 arc_flags_t b_flags;
|
|
453
|
|
454 /*
|
|
455 * This field stores the size of the data buffer after
|
|
456 * compression, and is set in the arc's zio completion handlers.
|
|
457 * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes).
|
|
458 *
|
|
459 * While the block pointers can store up to 32MB in their psize
|
|
460 * field, we can only store up to 32MB minus 512B. This is due
|
|
461 * to the bp using a bias of 1, whereas we use a bias of 0 (i.e.
|
|
462 * a field of zeros represents 512B in the bp). We can't use a
|
|
463 * bias of 1 since we need to reserve a psize of zero, here, to
|
|
464 * represent holes and embedded blocks.
|
|
465 *
|
|
466 * This isn't a problem in practice, since the maximum size of a
|
|
467 * buffer is limited to 16MB, so we never need to store 32MB in
|
|
468 * this field.
|
|
469 */
|
|
470 uint16_t b_psize;
|
|
471
|
|
472 /*
|
|
473 * This field stores the size of the data buffer before
|
|
474 * compression, and cannot change once set. It is in units
|
|
475 * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes)
|
|
476 */
|
|
477 uint16_t b_lsize; /* immutable */
|
|
478 uint64_t b_spa; /* immutable */
|
|
479
|
|
480 /* L2ARC fields. Undefined when not in L2ARC. */
|
|
481 l2arc_buf_hdr_t b_l2hdr;
|
|
482 /* L1ARC fields. Undefined when in l2arc_only state */
|
|
483 l1arc_buf_hdr_t b_l1hdr;
|
|
484 /*
|
|
485 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
|
|
486 * is set and the L1 header exists.
|
|
487 */
|
|
488 arc_buf_hdr_crypt_t b_crypt_hdr;
|
|
489 };
|
|
490
|
|
491 typedef struct arc_stats {
|
|
492 kstat_named_t arcstat_hits;
|
|
493 kstat_named_t arcstat_misses;
|
|
494 kstat_named_t arcstat_demand_data_hits;
|
|
495 kstat_named_t arcstat_demand_data_misses;
|
|
496 kstat_named_t arcstat_demand_metadata_hits;
|
|
497 kstat_named_t arcstat_demand_metadata_misses;
|
|
498 kstat_named_t arcstat_prefetch_data_hits;
|
|
499 kstat_named_t arcstat_prefetch_data_misses;
|
|
500 kstat_named_t arcstat_prefetch_metadata_hits;
|
|
501 kstat_named_t arcstat_prefetch_metadata_misses;
|
|
502 kstat_named_t arcstat_mru_hits;
|
|
503 kstat_named_t arcstat_mru_ghost_hits;
|
|
504 kstat_named_t arcstat_mfu_hits;
|
|
505 kstat_named_t arcstat_mfu_ghost_hits;
|
|
506 kstat_named_t arcstat_deleted;
|
|
507 /*
|
|
508 * Number of buffers that could not be evicted because the hash lock
|
|
509 * was held by another thread. The lock may not necessarily be held
|
|
510 * by something using the same buffer, since hash locks are shared
|
|
511 * by multiple buffers.
|
|
512 */
|
|
513 kstat_named_t arcstat_mutex_miss;
|
|
514 /*
|
|
515 * Number of buffers skipped when updating the access state due to the
|
|
516 * header having already been released after acquiring the hash lock.
|
|
517 */
|
|
518 kstat_named_t arcstat_access_skip;
|
|
519 /*
|
|
520 * Number of buffers skipped because they have I/O in progress, are
|
|
521 * indirect prefetch buffers that have not lived long enough, or are
|
|
522 * not from the spa we're trying to evict from.
|
|
523 */
|
|
524 kstat_named_t arcstat_evict_skip;
|
|
525 /*
|
|
526 * Number of times arc_evict_state() was unable to evict enough
|
|
527 * buffers to reach its target amount.
|
|
528 */
|
|
529 kstat_named_t arcstat_evict_not_enough;
|
|
530 kstat_named_t arcstat_evict_l2_cached;
|
|
531 kstat_named_t arcstat_evict_l2_eligible;
|
|
532 kstat_named_t arcstat_evict_l2_ineligible;
|
|
533 kstat_named_t arcstat_evict_l2_skip;
|
|
534 kstat_named_t arcstat_hash_elements;
|
|
535 kstat_named_t arcstat_hash_elements_max;
|
|
536 kstat_named_t arcstat_hash_collisions;
|
|
537 kstat_named_t arcstat_hash_chains;
|
|
538 kstat_named_t arcstat_hash_chain_max;
|
|
539 kstat_named_t arcstat_p;
|
|
540 kstat_named_t arcstat_c;
|
|
541 kstat_named_t arcstat_c_min;
|
|
542 kstat_named_t arcstat_c_max;
|
|
543 /* Not updated directly; only synced in arc_kstat_update. */
|
|
544 kstat_named_t arcstat_size;
|
|
545 /*
|
|
546 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
|
|
547 * Note that the compressed bytes may match the uncompressed bytes
|
|
548 * if the block is either not compressed or compressed arc is disabled.
|
|
549 */
|
|
550 kstat_named_t arcstat_compressed_size;
|
|
551 /*
|
|
552 * Uncompressed size of the data stored in b_pabd. If compressed
|
|
553 * arc is disabled then this value will be identical to the stat
|
|
554 * above.
|
|
555 */
|
|
556 kstat_named_t arcstat_uncompressed_size;
|
|
557 /*
|
|
558 * Number of bytes stored in all the arc_buf_t's. This is classified
|
|
559 * as "overhead" since this data is typically short-lived and will
|
|
560 * be evicted from the arc when it becomes unreferenced unless the
|
|
561 * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level
|
|
562 * values have been set (see comment in dbuf.c for more information).
|
|
563 */
|
|
564 kstat_named_t arcstat_overhead_size;
|
|
565 /*
|
|
566 * Number of bytes consumed by internal ARC structures necessary
|
|
567 * for tracking purposes; these structures are not actually
|
|
568 * backed by ARC buffers. This includes arc_buf_hdr_t structures
|
|
569 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
|
|
570 * caches), and arc_buf_t structures (allocated via arc_buf_t
|
|
571 * cache).
|
|
572 * Not updated directly; only synced in arc_kstat_update.
|
|
573 */
|
|
574 kstat_named_t arcstat_hdr_size;
|
|
575 /*
|
|
576 * Number of bytes consumed by ARC buffers of type equal to
|
|
577 * ARC_BUFC_DATA. This is generally consumed by buffers backing
|
|
578 * on disk user data (e.g. plain file contents).
|
|
579 * Not updated directly; only synced in arc_kstat_update.
|
|
580 */
|
|
581 kstat_named_t arcstat_data_size;
|
|
582 /*
|
|
583 * Number of bytes consumed by ARC buffers of type equal to
|
|
584 * ARC_BUFC_METADATA. This is generally consumed by buffers
|
|
585 * backing on disk data that is used for internal ZFS
|
|
586 * structures (e.g. ZAP, dnode, indirect blocks, etc).
|
|
587 * Not updated directly; only synced in arc_kstat_update.
|
|
588 */
|
|
589 kstat_named_t arcstat_metadata_size;
|
|
590 /*
|
|
591 * Number of bytes consumed by various buffers and structures
|
|
592 * not actually backed with ARC buffers. This includes bonus
|
|
593 * buffers (allocated directly via zio_buf_* functions),
|
|
594 * dmu_buf_impl_t structures (allocated via dmu_buf_impl_t
|
|
595 * cache), and dnode_t structures (allocated via dnode_t cache).
|
|
596 * Not updated directly; only synced in arc_kstat_update.
|
|
597 */
|
|
598 kstat_named_t arcstat_other_size;
|
|
599 /*
|
|
600 * Total number of bytes consumed by ARC buffers residing in the
|
|
601 * arc_anon state. This includes *all* buffers in the arc_anon
|
|
602 * state; e.g. data, metadata, evictable, and unevictable buffers
|
|
603 * are all included in this value.
|
|
604 * Not updated directly; only synced in arc_kstat_update.
|
|
605 */
|
|
606 kstat_named_t arcstat_anon_size;
|
|
607 /*
|
|
608 * Number of bytes consumed by ARC buffers that meet the
|
|
609 * following criteria: backing buffers of type ARC_BUFC_DATA,
|
|
610 * residing in the arc_anon state, and are eligible for eviction
|
|
611 * (e.g. have no outstanding holds on the buffer).
|
|
612 * Not updated directly; only synced in arc_kstat_update.
|
|
613 */
|
|
614 kstat_named_t arcstat_anon_evictable_data;
|
|
615 /*
|
|
616 * Number of bytes consumed by ARC buffers that meet the
|
|
617 * following criteria: backing buffers of type ARC_BUFC_METADATA,
|
|
618 * residing in the arc_anon state, and are eligible for eviction
|
|
619 * (e.g. have no outstanding holds on the buffer).
|
|
620 * Not updated directly; only synced in arc_kstat_update.
|
|
621 */
|
|
622 kstat_named_t arcstat_anon_evictable_metadata;
|
|
623 /*
|
|
624 * Total number of bytes consumed by ARC buffers residing in the
|
|
625 * arc_mru state. This includes *all* buffers in the arc_mru
|
|
626 * state; e.g. data, metadata, evictable, and unevictable buffers
|
|
627 * are all included in this value.
|
|
628 * Not updated directly; only synced in arc_kstat_update.
|
|
629 */
|
|
630 kstat_named_t arcstat_mru_size;
|
|
631 /*
|
|
632 * Number of bytes consumed by ARC buffers that meet the
|
|
633 * following criteria: backing buffers of type ARC_BUFC_DATA,
|
|
634 * residing in the arc_mru state, and are eligible for eviction
|
|
635 * (e.g. have no outstanding holds on the buffer).
|
|
636 * Not updated directly; only synced in arc_kstat_update.
|
|
637 */
|
|
638 kstat_named_t arcstat_mru_evictable_data;
|
|
639 /*
|
|
640 * Number of bytes consumed by ARC buffers that meet the
|
|
641 * following criteria: backing buffers of type ARC_BUFC_METADATA,
|
|
642 * residing in the arc_mru state, and are eligible for eviction
|
|
643 * (e.g. have no outstanding holds on the buffer).
|
|
644 * Not updated directly; only synced in arc_kstat_update.
|
|
645 */
|
|
646 kstat_named_t arcstat_mru_evictable_metadata;
|
|
647 /*
|
|
648 * Total number of bytes that *would have been* consumed by ARC
|
|
649 * buffers in the arc_mru_ghost state. The key thing to note
|
|
650 * here, is the fact that this size doesn't actually indicate
|
|
651 * RAM consumption. The ghost lists only consist of headers and
|
|
652 * don't actually have ARC buffers linked off of these headers.
|
|
653 * Thus, *if* the headers had associated ARC buffers, these
|
|
654 * buffers *would have* consumed this number of bytes.
|
|
655 * Not updated directly; only synced in arc_kstat_update.
|
|
656 */
|
|
657 kstat_named_t arcstat_mru_ghost_size;
|
|
658 /*
|
|
659 * Number of bytes that *would have been* consumed by ARC
|
|
660 * buffers that are eligible for eviction, of type
|
|
661 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
|
|
662 * Not updated directly; only synced in arc_kstat_update.
|
|
663 */
|
|
664 kstat_named_t arcstat_mru_ghost_evictable_data;
|
|
665 /*
|
|
666 * Number of bytes that *would have been* consumed by ARC
|
|
667 * buffers that are eligible for eviction, of type
|
|
668 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
|
|
669 * Not updated directly; only synced in arc_kstat_update.
|
|
670 */
|
|
671 kstat_named_t arcstat_mru_ghost_evictable_metadata;
|
|
672 /*
|
|
673 * Total number of bytes consumed by ARC buffers residing in the
|
|
674 * arc_mfu state. This includes *all* buffers in the arc_mfu
|
|
675 * state; e.g. data, metadata, evictable, and unevictable buffers
|
|
676 * are all included in this value.
|
|
677 * Not updated directly; only synced in arc_kstat_update.
|
|
678 */
|
|
679 kstat_named_t arcstat_mfu_size;
|
|
680 /*
|
|
681 * Number of bytes consumed by ARC buffers that are eligible for
|
|
682 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
|
|
683 * state.
|
|
684 * Not updated directly; only synced in arc_kstat_update.
|
|
685 */
|
|
686 kstat_named_t arcstat_mfu_evictable_data;
|
|
687 /*
|
|
688 * Number of bytes consumed by ARC buffers that are eligible for
|
|
689 * eviction, of type ARC_BUFC_METADATA, and reside in the
|
|
690 * arc_mfu state.
|
|
691 * Not updated directly; only synced in arc_kstat_update.
|
|
692 */
|
|
693 kstat_named_t arcstat_mfu_evictable_metadata;
|
|
694 /*
|
|
695 * Total number of bytes that *would have been* consumed by ARC
|
|
696 * buffers in the arc_mfu_ghost state. See the comment above
|
|
697 * arcstat_mru_ghost_size for more details.
|
|
698 * Not updated directly; only synced in arc_kstat_update.
|
|
699 */
|
|
700 kstat_named_t arcstat_mfu_ghost_size;
|
|
701 /*
|
|
702 * Number of bytes that *would have been* consumed by ARC
|
|
703 * buffers that are eligible for eviction, of type
|
|
704 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
|
|
705 * Not updated directly; only synced in arc_kstat_update.
|
|
706 */
|
|
707 kstat_named_t arcstat_mfu_ghost_evictable_data;
|
|
708 /*
|
|
709 * Number of bytes that *would have been* consumed by ARC
|
|
710 * buffers that are eligible for eviction, of type
|
|
711 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
|
|
712 * Not updated directly; only synced in arc_kstat_update.
|
|
713 */
|
|
714 kstat_named_t arcstat_mfu_ghost_evictable_metadata;
|
|
715 kstat_named_t arcstat_l2_hits;
|
|
716 kstat_named_t arcstat_l2_misses;
|
|
717 kstat_named_t arcstat_l2_feeds;
|
|
718 kstat_named_t arcstat_l2_rw_clash;
|
|
719 kstat_named_t arcstat_l2_read_bytes;
|
|
720 kstat_named_t arcstat_l2_write_bytes;
|
|
721 kstat_named_t arcstat_l2_writes_sent;
|
|
722 kstat_named_t arcstat_l2_writes_done;
|
|
723 kstat_named_t arcstat_l2_writes_error;
|
|
724 kstat_named_t arcstat_l2_writes_lock_retry;
|
|
725 kstat_named_t arcstat_l2_evict_lock_retry;
|
|
726 kstat_named_t arcstat_l2_evict_reading;
|
|
727 kstat_named_t arcstat_l2_evict_l1cached;
|
|
728 kstat_named_t arcstat_l2_free_on_write;
|
|
729 kstat_named_t arcstat_l2_abort_lowmem;
|
|
730 kstat_named_t arcstat_l2_cksum_bad;
|
|
731 kstat_named_t arcstat_l2_io_error;
|
|
732 kstat_named_t arcstat_l2_lsize;
|
|
733 kstat_named_t arcstat_l2_psize;
|
|
734 /* Not updated directly; only synced in arc_kstat_update. */
|
|
735 kstat_named_t arcstat_l2_hdr_size;
|
|
736 /*
|
|
737 * Number of L2ARC log blocks written. These are used for restoring the
|
|
738 * L2ARC. Updated during writing of L2ARC log blocks.
|
|
739 */
|
|
740 kstat_named_t arcstat_l2_log_blk_writes;
|
|
741 /*
|
|
742 * Moving average of the aligned size of the L2ARC log blocks, in
|
|
743 * bytes. Updated during L2ARC rebuild and during writing of L2ARC
|
|
744 * log blocks.
|
|
745 */
|
|
746 kstat_named_t arcstat_l2_log_blk_avg_asize;
|
|
747 /* Aligned size of L2ARC log blocks on L2ARC devices. */
|
|
748 kstat_named_t arcstat_l2_log_blk_asize;
|
|
749 /* Number of L2ARC log blocks present on L2ARC devices. */
|
|
750 kstat_named_t arcstat_l2_log_blk_count;
|
|
751 /*
|
|
752 * Moving average of the aligned size of L2ARC restored data, in bytes,
|
|
753 * to the aligned size of their metadata in L2ARC, in bytes.
|
|
754 * Updated during L2ARC rebuild and during writing of L2ARC log blocks.
|
|
755 */
|
|
756 kstat_named_t arcstat_l2_data_to_meta_ratio;
|
|
757 /*
|
|
758 * Number of times the L2ARC rebuild was successful for an L2ARC device.
|
|
759 */
|
|
760 kstat_named_t arcstat_l2_rebuild_success;
|
|
761 /*
|
|
762 * Number of times the L2ARC rebuild failed because the device header
|
|
763 * was in an unsupported format or corrupted.
|
|
764 */
|
|
765 kstat_named_t arcstat_l2_rebuild_abort_unsupported;
|
|
766 /*
|
|
767 * Number of times the L2ARC rebuild failed because of IO errors
|
|
768 * while reading a log block.
|
|
769 */
|
|
770 kstat_named_t arcstat_l2_rebuild_abort_io_errors;
|
|
771 /*
|
|
772 * Number of times the L2ARC rebuild failed because of IO errors when
|
|
773 * reading the device header.
|
|
774 */
|
|
775 kstat_named_t arcstat_l2_rebuild_abort_dh_errors;
|
|
776 /*
|
|
777 * Number of L2ARC log blocks which failed to be restored due to
|
|
778 * checksum errors.
|
|
779 */
|
|
780 kstat_named_t arcstat_l2_rebuild_abort_cksum_lb_errors;
|
|
781 /*
|
|
782 * Number of times the L2ARC rebuild was aborted due to low system
|
|
783 * memory.
|
|
784 */
|
|
785 kstat_named_t arcstat_l2_rebuild_abort_lowmem;
|
|
786 /* Logical size of L2ARC restored data, in bytes. */
|
|
787 kstat_named_t arcstat_l2_rebuild_size;
|
|
788 /* Aligned size of L2ARC restored data, in bytes. */
|
|
789 kstat_named_t arcstat_l2_rebuild_asize;
|
|
790 /*
|
|
791 * Number of L2ARC log entries (buffers) that were successfully
|
|
792 * restored in ARC.
|
|
793 */
|
|
794 kstat_named_t arcstat_l2_rebuild_bufs;
|
|
795 /*
|
|
796 * Number of L2ARC log entries (buffers) already cached in ARC. These
|
|
797 * were not restored again.
|
|
798 */
|
|
799 kstat_named_t arcstat_l2_rebuild_bufs_precached;
|
|
800 /*
|
|
801 * Number of L2ARC log blocks that were restored successfully. Each
|
|
802 * log block may hold up to L2ARC_LOG_BLK_MAX_ENTRIES buffers.
|
|
803 */
|
|
804 kstat_named_t arcstat_l2_rebuild_log_blks;
|
|
805 kstat_named_t arcstat_memory_throttle_count;
|
|
806 /* Not updated directly; only synced in arc_kstat_update. */
|
|
807 kstat_named_t arcstat_meta_used;
|
|
808 kstat_named_t arcstat_meta_limit;
|
|
809 kstat_named_t arcstat_meta_max;
|
|
810 kstat_named_t arcstat_meta_min;
|
|
811 kstat_named_t arcstat_async_upgrade_sync;
|
|
812 kstat_named_t arcstat_demand_hit_predictive_prefetch;
|
|
813 kstat_named_t arcstat_demand_hit_prescient_prefetch;
|
|
814 } arc_stats_t;
|
|
815
|
|
816 #define ARCSTAT(stat) (arc_stats.stat.value.ui64)
|
|
817
|
|
818 #define ARCSTAT_INCR(stat, val) \
|
|
819 atomic_add_64(&arc_stats.stat.value.ui64, (val))
|
|
820
|
|
821 #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1)
|
|
822 #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
|
|
823
|
|
824 /*
|
|
825 * There are several ARC variables that are critical to export as kstats --
|
|
826 * but we don't want to have to grovel around in the kstat whenever we wish to
|
|
827 * manipulate them. For these variables, we therefore define them to be in
|
|
828 * terms of the statistic variable. This assures that we are not introducing
|
|
829 * the possibility of inconsistency by having shadow copies of the variables,
|
|
830 * while still allowing the code to be readable.
|
|
831 */
|
|
832 #define arc_p ARCSTAT(arcstat_p) /* target size of MRU */
|
|
833 #define arc_c ARCSTAT(arcstat_c) /* target size of cache */
|
|
834 #define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
|
|
835 #define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
|
|
836 #define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */
|
|
837 #define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
|
|
838 #define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
|
|
839
|
|
840 /* compressed size of entire arc */
|
|
841 #define arc_compressed_size ARCSTAT(arcstat_compressed_size)
|
|
842 /* uncompressed size of entire arc */
|
|
843 #define arc_uncompressed_size ARCSTAT(arcstat_uncompressed_size)
|
|
844 /* number of bytes in the arc from arc_buf_t's */
|
|
845 #define arc_overhead_size ARCSTAT(arcstat_overhead_size)
|
|
846
|
|
847 extern arc_stats_t arc_stats;
|
|
848
|
|
849 /* used in zdb.c */
|
|
850 boolean_t l2arc_log_blkptr_valid(l2arc_dev_t *dev,
|
|
851 const l2arc_log_blkptr_t *lbp);
|
|
852
|
|
853 #ifdef __cplusplus
|
|
854 }
|
|
855 #endif
|
|
856
|
|
857 #endif /* _SYS_ARC_IMPL_H */
|