Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/dmu.c @ 2743:632c24f376ff
6461438 zfs send/recv code should live in its own file
author | ahrens |
---|---|
date | Fri, 15 Sep 2006 11:21:46 -0700 |
parents | 12e2ad22b970 |
children | c0259887ebbc |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
1544 | 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. |
789 | 23 * Use is subject to license terms. |
24 */ | |
25 | |
26 #pragma ident "%Z%%M% %I% %E% SMI" | |
27 | |
28 #include <sys/dmu.h> | |
29 #include <sys/dmu_impl.h> | |
30 #include <sys/dmu_tx.h> | |
31 #include <sys/dbuf.h> | |
32 #include <sys/dnode.h> | |
33 #include <sys/zfs_context.h> | |
34 #include <sys/dmu_objset.h> | |
35 #include <sys/dmu_traverse.h> | |
36 #include <sys/dsl_dataset.h> | |
37 #include <sys/dsl_dir.h> | |
38 #include <sys/dsl_pool.h> | |
2199 | 39 #include <sys/dsl_synctask.h> |
789 | 40 #include <sys/dmu_zfetch.h> |
41 #include <sys/zfs_ioctl.h> | |
42 #include <sys/zap.h> | |
1544 | 43 #include <sys/zio_checksum.h> |
2688
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
44 #ifdef _KERNEL |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
45 #include <sys/vmsystm.h> |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
46 #endif |
789 | 47 |
48 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { | |
49 { byteswap_uint8_array, TRUE, "unallocated" }, | |
50 { zap_byteswap, TRUE, "object directory" }, | |
51 { byteswap_uint64_array, TRUE, "object array" }, | |
52 { byteswap_uint8_array, TRUE, "packed nvlist" }, | |
53 { byteswap_uint64_array, TRUE, "packed nvlist size" }, | |
54 { byteswap_uint64_array, TRUE, "bplist" }, | |
55 { byteswap_uint64_array, TRUE, "bplist header" }, | |
56 { byteswap_uint64_array, TRUE, "SPA space map header" }, | |
57 { byteswap_uint64_array, TRUE, "SPA space map" }, | |
58 { byteswap_uint64_array, TRUE, "ZIL intent log" }, | |
59 { dnode_buf_byteswap, TRUE, "DMU dnode" }, | |
60 { dmu_objset_byteswap, TRUE, "DMU objset" }, | |
61 { byteswap_uint64_array, TRUE, "DSL directory" }, | |
62 { zap_byteswap, TRUE, "DSL directory child map"}, | |
63 { zap_byteswap, TRUE, "DSL dataset snap map" }, | |
64 { zap_byteswap, TRUE, "DSL props" }, | |
65 { byteswap_uint64_array, TRUE, "DSL dataset" }, | |
66 { zfs_znode_byteswap, TRUE, "ZFS znode" }, | |
67 { zfs_acl_byteswap, TRUE, "ZFS ACL" }, | |
68 { byteswap_uint8_array, FALSE, "ZFS plain file" }, | |
69 { zap_byteswap, TRUE, "ZFS directory" }, | |
70 { zap_byteswap, TRUE, "ZFS master node" }, | |
71 { zap_byteswap, TRUE, "ZFS delete queue" }, | |
72 { byteswap_uint8_array, FALSE, "zvol object" }, | |
73 { zap_byteswap, TRUE, "zvol prop" }, | |
74 { byteswap_uint8_array, FALSE, "other uint8[]" }, | |
75 { byteswap_uint64_array, FALSE, "other uint64[]" }, | |
76 { zap_byteswap, TRUE, "other ZAP" }, | |
1544 | 77 { zap_byteswap, TRUE, "persistent error log" }, |
789 | 78 }; |
79 | |
80 int | |
1544 | 81 dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, |
82 void *tag, dmu_buf_t **dbp) | |
789 | 83 { |
84 dnode_t *dn; | |
85 uint64_t blkid; | |
86 dmu_buf_impl_t *db; | |
1544 | 87 int err; |
789 | 88 |
1544 | 89 err = dnode_hold(os->os, object, FTAG, &dn); |
90 if (err) | |
91 return (err); | |
789 | 92 blkid = dbuf_whichblock(dn, offset); |
93 rw_enter(&dn->dn_struct_rwlock, RW_READER); | |
1544 | 94 db = dbuf_hold(dn, blkid, tag); |
789 | 95 rw_exit(&dn->dn_struct_rwlock); |
1544 | 96 if (db == NULL) { |
97 err = EIO; | |
98 } else { | |
99 err = dbuf_read(db, NULL, DB_RF_CANFAIL); | |
100 if (err) { | |
101 dbuf_rele(db, tag); | |
102 db = NULL; | |
103 } | |
104 } | |
105 | |
789 | 106 dnode_rele(dn, FTAG); |
1544 | 107 *dbp = &db->db; |
108 return (err); | |
789 | 109 } |
110 | |
111 int | |
112 dmu_bonus_max(void) | |
113 { | |
114 return (DN_MAX_BONUSLEN); | |
115 } | |
116 | |
117 /* | |
1544 | 118 * returns ENOENT, EIO, or 0. |
789 | 119 */ |
1544 | 120 int |
121 dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) | |
789 | 122 { |
1544 | 123 dnode_t *dn; |
124 int err, count; | |
789 | 125 dmu_buf_impl_t *db; |
126 | |
1544 | 127 err = dnode_hold(os->os, object, FTAG, &dn); |
128 if (err) | |
129 return (err); | |
789 | 130 |
1544 | 131 rw_enter(&dn->dn_struct_rwlock, RW_READER); |
132 if (dn->dn_bonus == NULL) { | |
789 | 133 rw_exit(&dn->dn_struct_rwlock); |
1544 | 134 rw_enter(&dn->dn_struct_rwlock, RW_WRITER); |
135 if (dn->dn_bonus == NULL) | |
136 dn->dn_bonus = dbuf_create_bonus(dn); | |
789 | 137 } |
1544 | 138 db = dn->dn_bonus; |
139 rw_exit(&dn->dn_struct_rwlock); | |
140 mutex_enter(&db->db_mtx); | |
141 count = refcount_add(&db->db_holds, tag); | |
142 mutex_exit(&db->db_mtx); | |
143 if (count == 1) | |
144 dnode_add_ref(dn, db); | |
789 | 145 dnode_rele(dn, FTAG); |
1544 | 146 |
147 VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED)); | |
148 | |
149 *dbp = &db->db; | |
150 return (0); | |
789 | 151 } |
152 | |
2391 | 153 /* |
154 * Note: longer-term, we should modify all of the dmu_buf_*() interfaces | |
155 * to take a held dnode rather than <os, object> -- the lookup is wasteful, | |
156 * and can induce severe lock contention when writing to several files | |
157 * whose dnodes are in the same block. | |
158 */ | |
159 static int | |
160 dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, | |
1544 | 161 uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) |
789 | 162 { |
163 dmu_buf_t **dbp; | |
164 uint64_t blkid, nblks, i; | |
1544 | 165 uint32_t flags; |
166 int err; | |
167 zio_t *zio; | |
168 | |
169 ASSERT(length <= DMU_MAX_ACCESS); | |
789 | 170 |
1544 | 171 flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; |
1731
1efa8b3d1296
6402598 'zfs destroy <fs>' can take a long time, stopping up the txg train
bonwick
parents:
1630
diff
changeset
|
172 if (length > zfetch_array_rd_sz) |
1544 | 173 flags |= DB_RF_NOPREFETCH; |
174 | |
789 | 175 rw_enter(&dn->dn_struct_rwlock, RW_READER); |
176 if (dn->dn_datablkshift) { | |
177 int blkshift = dn->dn_datablkshift; | |
178 nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - | |
179 P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; | |
180 } else { | |
181 ASSERT3U(offset + length, <=, dn->dn_datablksz); | |
182 nblks = 1; | |
183 } | |
1544 | 184 dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); |
789 | 185 |
1544 | 186 zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, TRUE); |
789 | 187 blkid = dbuf_whichblock(dn, offset); |
188 for (i = 0; i < nblks; i++) { | |
1544 | 189 dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); |
190 if (db == NULL) { | |
191 rw_exit(&dn->dn_struct_rwlock); | |
192 dmu_buf_rele_array(dbp, nblks, tag); | |
193 zio_nowait(zio); | |
194 return (EIO); | |
195 } | |
196 /* initiate async i/o */ | |
2391 | 197 if (read) { |
1544 | 198 rw_exit(&dn->dn_struct_rwlock); |
199 (void) dbuf_read(db, zio, flags); | |
200 rw_enter(&dn->dn_struct_rwlock, RW_READER); | |
201 } | |
202 dbp[i] = &db->db; | |
789 | 203 } |
204 rw_exit(&dn->dn_struct_rwlock); | |
205 | |
1544 | 206 /* wait for async i/o */ |
207 err = zio_wait(zio); | |
208 if (err) { | |
209 dmu_buf_rele_array(dbp, nblks, tag); | |
210 return (err); | |
789 | 211 } |
212 | |
1544 | 213 /* wait for other io to complete */ |
214 if (read) { | |
215 for (i = 0; i < nblks; i++) { | |
216 dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; | |
217 mutex_enter(&db->db_mtx); | |
218 while (db->db_state == DB_READ || | |
219 db->db_state == DB_FILL) | |
220 cv_wait(&db->db_changed, &db->db_mtx); | |
221 if (db->db_state == DB_UNCACHED) | |
222 err = EIO; | |
223 mutex_exit(&db->db_mtx); | |
224 if (err) { | |
225 dmu_buf_rele_array(dbp, nblks, tag); | |
226 return (err); | |
227 } | |
228 } | |
229 } | |
789 | 230 |
1544 | 231 *numbufsp = nblks; |
232 *dbpp = dbp; | |
233 return (0); | |
789 | 234 } |
235 | |
2391 | 236 int |
237 dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, | |
238 uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) | |
239 { | |
240 dnode_t *dn; | |
241 int err; | |
242 | |
243 err = dnode_hold(os->os, object, FTAG, &dn); | |
244 if (err) | |
245 return (err); | |
246 | |
247 err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, | |
248 numbufsp, dbpp); | |
249 | |
250 dnode_rele(dn, FTAG); | |
251 | |
252 return (err); | |
253 } | |
254 | |
255 int | |
256 dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, | |
257 uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) | |
258 { | |
259 dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; | |
260 int err; | |
261 | |
262 err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, | |
263 numbufsp, dbpp); | |
264 | |
265 return (err); | |
266 } | |
267 | |
789 | 268 void |
1544 | 269 dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) |
789 | 270 { |
271 int i; | |
272 dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; | |
273 | |
274 if (numbufs == 0) | |
275 return; | |
276 | |
1544 | 277 for (i = 0; i < numbufs; i++) { |
278 if (dbp[i]) | |
279 dbuf_rele(dbp[i], tag); | |
280 } | |
789 | 281 |
282 kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); | |
283 } | |
284 | |
285 void | |
286 dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) | |
287 { | |
288 dnode_t *dn; | |
289 uint64_t blkid; | |
1544 | 290 int nblks, i, err; |
789 | 291 |
292 if (len == 0) { /* they're interested in the bonus buffer */ | |
293 dn = os->os->os_meta_dnode; | |
294 | |
295 if (object == 0 || object >= DN_MAX_OBJECT) | |
296 return; | |
297 | |
298 rw_enter(&dn->dn_struct_rwlock, RW_READER); | |
299 blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); | |
300 dbuf_prefetch(dn, blkid); | |
301 rw_exit(&dn->dn_struct_rwlock); | |
302 return; | |
303 } | |
304 | |
305 /* | |
306 * XXX - Note, if the dnode for the requested object is not | |
307 * already cached, we will do a *synchronous* read in the | |
308 * dnode_hold() call. The same is true for any indirects. | |
309 */ | |
1544 | 310 err = dnode_hold(os->os, object, FTAG, &dn); |
311 if (err != 0) | |
789 | 312 return; |
313 | |
314 rw_enter(&dn->dn_struct_rwlock, RW_READER); | |
315 if (dn->dn_datablkshift) { | |
316 int blkshift = dn->dn_datablkshift; | |
317 nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - | |
318 P2ALIGN(offset, 1<<blkshift)) >> blkshift; | |
319 } else { | |
320 nblks = (offset < dn->dn_datablksz); | |
321 } | |
322 | |
323 if (nblks != 0) { | |
324 blkid = dbuf_whichblock(dn, offset); | |
325 for (i = 0; i < nblks; i++) | |
326 dbuf_prefetch(dn, blkid+i); | |
327 } | |
328 | |
329 rw_exit(&dn->dn_struct_rwlock); | |
330 | |
331 dnode_rele(dn, FTAG); | |
332 } | |
333 | |
1544 | 334 int |
789 | 335 dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, |
336 uint64_t size, dmu_tx_t *tx) | |
337 { | |
1544 | 338 dnode_t *dn; |
339 int err = dnode_hold(os->os, object, FTAG, &dn); | |
340 if (err) | |
341 return (err); | |
789 | 342 ASSERT(offset < UINT64_MAX); |
343 ASSERT(size == -1ULL || size <= UINT64_MAX - offset); | |
344 dnode_free_range(dn, offset, size, tx); | |
345 dnode_rele(dn, FTAG); | |
1544 | 346 return (0); |
789 | 347 } |
348 | |
1544 | 349 int |
350 dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, | |
351 void *buf) | |
789 | 352 { |
353 dnode_t *dn; | |
354 dmu_buf_t **dbp; | |
1544 | 355 int numbufs, i, err; |
789 | 356 |
1544 | 357 /* |
358 * Deal with odd block sizes, where there can't be data past the | |
359 * first block. | |
360 */ | |
361 err = dnode_hold(os->os, object, FTAG, &dn); | |
362 if (err) | |
363 return (err); | |
789 | 364 if (dn->dn_datablkshift == 0) { |
365 int newsz = offset > dn->dn_datablksz ? 0 : | |
366 MIN(size, dn->dn_datablksz - offset); | |
367 bzero((char *)buf + newsz, size - newsz); | |
368 size = newsz; | |
369 } | |
370 dnode_rele(dn, FTAG); | |
371 | |
372 while (size > 0) { | |
373 uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); | |
374 int err; | |
375 | |
376 /* | |
377 * NB: we could do this block-at-a-time, but it's nice | |
378 * to be reading in parallel. | |
379 */ | |
1544 | 380 err = dmu_buf_hold_array(os, object, offset, mylen, |
381 TRUE, FTAG, &numbufs, &dbp); | |
382 if (err) | |
789 | 383 return (err); |
384 | |
385 for (i = 0; i < numbufs; i++) { | |
386 int tocpy; | |
387 int bufoff; | |
388 dmu_buf_t *db = dbp[i]; | |
389 | |
390 ASSERT(size > 0); | |
391 | |
392 bufoff = offset - db->db_offset; | |
393 tocpy = (int)MIN(db->db_size - bufoff, size); | |
394 | |
395 bcopy((char *)db->db_data + bufoff, buf, tocpy); | |
396 | |
397 offset += tocpy; | |
398 size -= tocpy; | |
399 buf = (char *)buf + tocpy; | |
400 } | |
1544 | 401 dmu_buf_rele_array(dbp, numbufs, FTAG); |
789 | 402 } |
403 return (0); | |
404 } | |
405 | |
406 void | |
407 dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, | |
408 const void *buf, dmu_tx_t *tx) | |
409 { | |
410 dmu_buf_t **dbp; | |
411 int numbufs, i; | |
412 | |
2391 | 413 if (size == 0) |
414 return; | |
415 | |
1544 | 416 VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, |
417 FALSE, FTAG, &numbufs, &dbp)); | |
789 | 418 |
419 for (i = 0; i < numbufs; i++) { | |
420 int tocpy; | |
421 int bufoff; | |
422 dmu_buf_t *db = dbp[i]; | |
423 | |
424 ASSERT(size > 0); | |
425 | |
426 bufoff = offset - db->db_offset; | |
427 tocpy = (int)MIN(db->db_size - bufoff, size); | |
428 | |
429 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); | |
430 | |
431 if (tocpy == db->db_size) | |
432 dmu_buf_will_fill(db, tx); | |
433 else | |
434 dmu_buf_will_dirty(db, tx); | |
435 | |
436 bcopy(buf, (char *)db->db_data + bufoff, tocpy); | |
437 | |
438 if (tocpy == db->db_size) | |
439 dmu_buf_fill_done(db, tx); | |
440 | |
441 offset += tocpy; | |
442 size -= tocpy; | |
443 buf = (char *)buf + tocpy; | |
444 } | |
1544 | 445 dmu_buf_rele_array(dbp, numbufs, FTAG); |
789 | 446 } |
447 | |
448 #ifdef _KERNEL | |
449 int | |
450 dmu_write_uio(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, | |
451 uio_t *uio, dmu_tx_t *tx) | |
452 { | |
453 dmu_buf_t **dbp; | |
454 int numbufs, i; | |
455 int err = 0; | |
456 | |
2391 | 457 if (size == 0) |
458 return (0); | |
459 | |
1544 | 460 err = dmu_buf_hold_array(os, object, offset, size, |
461 FALSE, FTAG, &numbufs, &dbp); | |
462 if (err) | |
463 return (err); | |
789 | 464 |
465 for (i = 0; i < numbufs; i++) { | |
466 int tocpy; | |
467 int bufoff; | |
468 dmu_buf_t *db = dbp[i]; | |
469 | |
470 ASSERT(size > 0); | |
471 | |
472 bufoff = offset - db->db_offset; | |
473 tocpy = (int)MIN(db->db_size - bufoff, size); | |
474 | |
475 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); | |
476 | |
477 if (tocpy == db->db_size) | |
478 dmu_buf_will_fill(db, tx); | |
479 else | |
480 dmu_buf_will_dirty(db, tx); | |
481 | |
482 /* | |
483 * XXX uiomove could block forever (eg. nfs-backed | |
484 * pages). There needs to be a uiolockdown() function | |
485 * to lock the pages in memory, so that uiomove won't | |
486 * block. | |
487 */ | |
488 err = uiomove((char *)db->db_data + bufoff, tocpy, | |
489 UIO_WRITE, uio); | |
490 | |
491 if (tocpy == db->db_size) | |
492 dmu_buf_fill_done(db, tx); | |
493 | |
494 if (err) | |
495 break; | |
496 | |
497 offset += tocpy; | |
498 size -= tocpy; | |
499 } | |
1544 | 500 dmu_buf_rele_array(dbp, numbufs, FTAG); |
789 | 501 return (err); |
502 } | |
2688
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
503 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
504 int |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
505 dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
506 page_t *pp, dmu_tx_t *tx) |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
507 { |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
508 dmu_buf_t **dbp; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
509 int numbufs, i; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
510 int err; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
511 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
512 if (size == 0) |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
513 return (0); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
514 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
515 err = dmu_buf_hold_array(os, object, offset, size, |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
516 FALSE, FTAG, &numbufs, &dbp); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
517 if (err) |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
518 return (err); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
519 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
520 for (i = 0; i < numbufs; i++) { |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
521 int tocpy, copied, thiscpy; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
522 int bufoff; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
523 dmu_buf_t *db = dbp[i]; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
524 caddr_t va; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
525 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
526 ASSERT(size > 0); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
527 ASSERT3U(db->db_size, >=, PAGESIZE); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
528 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
529 bufoff = offset - db->db_offset; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
530 tocpy = (int)MIN(db->db_size - bufoff, size); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
531 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
532 ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
533 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
534 if (tocpy == db->db_size) |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
535 dmu_buf_will_fill(db, tx); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
536 else |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
537 dmu_buf_will_dirty(db, tx); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
538 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
539 for (copied = 0; copied < tocpy; copied += PAGESIZE) { |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
540 ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
541 thiscpy = MIN(PAGESIZE, tocpy - copied); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
542 va = ppmapin(pp, PROT_READ, (caddr_t)-1); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
543 bcopy(va, (char *)db->db_data + bufoff, thiscpy); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
544 ppmapout(va); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
545 pp = pp->p_next; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
546 bufoff += PAGESIZE; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
547 } |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
548 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
549 if (tocpy == db->db_size) |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
550 dmu_buf_fill_done(db, tx); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
551 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
552 if (err) |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
553 break; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
554 |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
555 offset += tocpy; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
556 size -= tocpy; |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
557 } |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
558 dmu_buf_rele_array(dbp, numbufs, FTAG); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
559 return (err); |
12e2ad22b970
6447701 ZFS hangs when iSCSI Target attempts to initialize its backing store
maybee
parents:
2676
diff
changeset
|
560 } |
789 | 561 #endif |
562 | |
2237 | 563 typedef struct { |
564 uint64_t txg; | |
565 dmu_buf_impl_t *db; | |
566 dmu_sync_cb_t *done; | |
567 void *arg; | |
568 } dmu_sync_cbin_t; | |
569 | |
570 typedef union { | |
571 dmu_sync_cbin_t data; | |
572 blkptr_t blk; | |
573 } dmu_sync_cbarg_t; | |
574 | |
575 /* ARGSUSED */ | |
576 static void | |
577 dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) | |
578 { | |
579 dmu_sync_cbin_t *in = (dmu_sync_cbin_t *)varg; | |
580 dmu_buf_impl_t *db = in->db; | |
581 uint64_t txg = in->txg; | |
582 dmu_sync_cb_t *done = in->done; | |
583 void *arg = in->arg; | |
584 blkptr_t *blk = (blkptr_t *)varg; | |
585 | |
586 if (!BP_IS_HOLE(zio->io_bp)) { | |
587 zio->io_bp->blk_fill = 1; | |
588 BP_SET_TYPE(zio->io_bp, db->db_dnode->dn_type); | |
589 BP_SET_LEVEL(zio->io_bp, 0); | |
590 } | |
591 | |
592 *blk = *zio->io_bp; /* structure assignment */ | |
593 | |
594 mutex_enter(&db->db_mtx); | |
595 ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == IN_DMU_SYNC); | |
596 db->db_d.db_overridden_by[txg&TXG_MASK] = blk; | |
597 cv_broadcast(&db->db_changed); | |
598 mutex_exit(&db->db_mtx); | |
599 | |
600 if (done) | |
601 done(&(db->db), arg); | |
602 } | |
603 | |
789 | 604 /* |
2237 | 605 * Intent log support: sync the block associated with db to disk. |
606 * N.B. and XXX: the caller is responsible for making sure that the | |
607 * data isn't changing while dmu_sync() is writing it. | |
789 | 608 * |
609 * Return values: | |
610 * | |
2237 | 611 * EEXIST: this txg has already been synced, so there's nothing to to. |
789 | 612 * The caller should not log the write. |
613 * | |
614 * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. | |
615 * The caller should not log the write. | |
616 * | |
2237 | 617 * EALREADY: this block is already in the process of being synced. |
618 * The caller should track its progress (somehow). | |
789 | 619 * |
2237 | 620 * EINPROGRESS: the IO has been initiated. |
621 * The caller should log this blkptr in the callback. | |
789 | 622 * |
2237 | 623 * 0: completed. Sets *bp to the blkptr just written. |
624 * The caller should log this blkptr immediately. | |
789 | 625 */ |
626 int | |
2237 | 627 dmu_sync(zio_t *pio, dmu_buf_t *db_fake, |
628 blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg) | |
789 | 629 { |
2237 | 630 dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; |
631 objset_impl_t *os = db->db_objset; | |
632 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; | |
789 | 633 tx_state_t *tx = &dp->dp_tx; |
2237 | 634 dmu_sync_cbin_t *in; |
789 | 635 blkptr_t *blk; |
2237 | 636 zbookmark_t zb; |
637 uint32_t arc_flag; | |
789 | 638 int err; |
639 | |
640 ASSERT(BP_IS_HOLE(bp)); | |
641 ASSERT(txg != 0); | |
642 | |
2237 | 643 |
789 | 644 dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n", |
645 txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg); | |
646 | |
647 /* | |
2237 | 648 * XXX - would be nice if we could do this without suspending... |
1544 | 649 */ |
2237 | 650 txg_suspend(dp); |
1544 | 651 |
652 /* | |
789 | 653 * If this txg already synced, there's nothing to do. |
654 */ | |
655 if (txg <= tx->tx_synced_txg) { | |
2237 | 656 txg_resume(dp); |
789 | 657 /* |
658 * If we're running ziltest, we need the blkptr regardless. | |
659 */ | |
660 if (txg > spa_freeze_txg(dp->dp_spa)) { | |
661 /* if db_blkptr == NULL, this was an empty write */ | |
662 if (db->db_blkptr) | |
663 *bp = *db->db_blkptr; /* structure assignment */ | |
664 return (0); | |
665 } | |
2237 | 666 return (EEXIST); |
789 | 667 } |
668 | |
669 mutex_enter(&db->db_mtx); | |
670 | |
2237 | 671 blk = db->db_d.db_overridden_by[txg&TXG_MASK]; |
672 if (blk == IN_DMU_SYNC) { | |
673 /* | |
674 * We have already issued a sync write for this buffer. | |
675 */ | |
676 mutex_exit(&db->db_mtx); | |
677 txg_resume(dp); | |
678 return (EALREADY); | |
679 } else if (blk != NULL) { | |
680 /* | |
681 * This buffer had already been synced. It could not | |
682 * have been dirtied since, or we would have cleared blk. | |
683 */ | |
684 *bp = *blk; /* structure assignment */ | |
789 | 685 mutex_exit(&db->db_mtx); |
2237 | 686 txg_resume(dp); |
687 return (0); | |
688 } | |
689 | |
690 if (txg == tx->tx_syncing_txg) { | |
691 while (db->db_data_pending) { | |
692 /* | |
693 * IO is in-progress. Wait for it to finish. | |
694 * XXX - would be nice to be able to somehow "attach" | |
695 * this zio to the parent zio passed in. | |
696 */ | |
697 cv_wait(&db->db_changed, &db->db_mtx); | |
2391 | 698 if (!db->db_data_pending && |
699 db->db_blkptr && BP_IS_HOLE(db->db_blkptr)) { | |
700 /* | |
701 * IO was compressed away | |
702 */ | |
703 *bp = *db->db_blkptr; /* structure assignment */ | |
704 mutex_exit(&db->db_mtx); | |
705 txg_resume(dp); | |
706 return (0); | |
707 } | |
2237 | 708 ASSERT(db->db_data_pending || |
709 (db->db_blkptr && db->db_blkptr->blk_birth == txg)); | |
710 } | |
711 | |
712 if (db->db_blkptr && db->db_blkptr->blk_birth == txg) { | |
713 /* | |
714 * IO is already completed. | |
715 */ | |
716 *bp = *db->db_blkptr; /* structure assignment */ | |
717 mutex_exit(&db->db_mtx); | |
718 txg_resume(dp); | |
719 return (0); | |
720 } | |
721 } | |
722 | |
723 if (db->db_d.db_data_old[txg&TXG_MASK] == NULL) { | |
724 /* | |
725 * This dbuf isn't dirty, must have been free_range'd. | |
726 * There's no need to log writes to freed blocks, so we're done. | |
727 */ | |
728 mutex_exit(&db->db_mtx); | |
729 txg_resume(dp); | |
789 | 730 return (ENOENT); |
731 } | |
732 | |
2237 | 733 ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == NULL); |
734 db->db_d.db_overridden_by[txg&TXG_MASK] = IN_DMU_SYNC; | |
789 | 735 /* |
2237 | 736 * XXX - a little ugly to stash the blkptr in the callback |
737 * buffer. We always need to make sure the following is true: | |
738 * ASSERT(sizeof(blkptr_t) >= sizeof(dmu_sync_cbin_t)); | |
789 | 739 */ |
2237 | 740 in = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); |
741 in->db = db; | |
742 in->txg = txg; | |
743 in->done = done; | |
744 in->arg = arg; | |
745 mutex_exit(&db->db_mtx); | |
746 txg_resume(dp); | |
789 | 747 |
2237 | 748 arc_flag = pio == NULL ? ARC_WAIT : ARC_NOWAIT; |
749 zb.zb_objset = os->os_dsl_dataset->ds_object; | |
1544 | 750 zb.zb_object = db->db.db_object; |
751 zb.zb_level = db->db_level; | |
752 zb.zb_blkid = db->db_blkid; | |
2237 | 753 err = arc_write(pio, os->os_spa, |
754 zio_checksum_select(db->db_dnode->dn_checksum, os->os_checksum), | |
755 zio_compress_select(db->db_dnode->dn_compress, os->os_compress), | |
756 dmu_get_replication_level(os->os_spa, &zb, db->db_dnode->dn_type), | |
757 txg, bp, db->db_d.db_data_old[txg&TXG_MASK], dmu_sync_done, in, | |
758 ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, arc_flag, &zb); | |
789 | 759 ASSERT(err == 0); |
760 | |
2237 | 761 return (arc_flag == ARC_NOWAIT ? EINPROGRESS : 0); |
789 | 762 } |
763 | |
764 uint64_t | |
765 dmu_object_max_nonzero_offset(objset_t *os, uint64_t object) | |
766 { | |
1544 | 767 dnode_t *dn; |
768 | |
769 /* XXX assumes dnode_hold will not get an i/o error */ | |
770 (void) dnode_hold(os->os, object, FTAG, &dn); | |
789 | 771 uint64_t rv = dnode_max_nonzero_offset(dn); |
772 dnode_rele(dn, FTAG); | |
773 return (rv); | |
774 } | |
775 | |
776 int | |
777 dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, | |
778 dmu_tx_t *tx) | |
779 { | |
1544 | 780 dnode_t *dn; |
781 int err; | |
782 | |
783 err = dnode_hold(os->os, object, FTAG, &dn); | |
784 if (err) | |
785 return (err); | |
786 err = dnode_set_blksz(dn, size, ibs, tx); | |
789 | 787 dnode_rele(dn, FTAG); |
788 return (err); | |
789 } | |
790 | |
791 void | |
792 dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, | |
793 dmu_tx_t *tx) | |
794 { | |
1544 | 795 dnode_t *dn; |
796 | |
797 /* XXX assumes dnode_hold will not get an i/o error */ | |
798 (void) dnode_hold(os->os, object, FTAG, &dn); | |
789 | 799 ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); |
800 dn->dn_checksum = checksum; | |
801 dnode_setdirty(dn, tx); | |
802 dnode_rele(dn, FTAG); | |
803 } | |
804 | |
805 void | |
806 dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, | |
807 dmu_tx_t *tx) | |
808 { | |
1544 | 809 dnode_t *dn; |
810 | |
811 /* XXX assumes dnode_hold will not get an i/o error */ | |
812 (void) dnode_hold(os->os, object, FTAG, &dn); | |
789 | 813 ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); |
814 dn->dn_compress = compress; | |
815 dnode_setdirty(dn, tx); | |
816 dnode_rele(dn, FTAG); | |
817 } | |
818 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
819 /* |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
820 * XXX - eventually, this should take into account per-dataset (or |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
821 * even per-object?) user requests for higher levels of replication. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
822 */ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
823 int |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
824 dmu_get_replication_level(spa_t *spa, zbookmark_t *zb, dmu_object_type_t ot) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
825 { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
826 int ncopies = 1; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
827 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
828 if (dmu_ot[ot].ot_metadata) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
829 ncopies++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
830 if (zb->zb_level != 0) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
831 ncopies++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
832 if (zb->zb_objset == 0 && zb->zb_object == 0) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
833 ncopies++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
834 return (MIN(ncopies, spa_max_replication(spa))); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
835 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1731
diff
changeset
|
836 |
789 | 837 int |
838 dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) | |
839 { | |
840 dnode_t *dn; | |
841 int i, err; | |
842 | |
1544 | 843 err = dnode_hold(os->os, object, FTAG, &dn); |
844 if (err) | |
845 return (err); | |
789 | 846 /* |
847 * Sync any current changes before | |
848 * we go trundling through the block pointers. | |
849 */ | |
850 for (i = 0; i < TXG_SIZE; i++) { | |
1596
2e2377ccbf85
6395371 ASSERT in dmu_tx_count_free: blkid + i < dn->dn_phys->dn_nblkptr
ahrens
parents:
1544
diff
changeset
|
851 if (list_link_active(&dn->dn_dirty_link[i])) |
789 | 852 break; |
853 } | |
854 if (i != TXG_SIZE) { | |
855 dnode_rele(dn, FTAG); | |
856 txg_wait_synced(dmu_objset_pool(os), 0); | |
1544 | 857 err = dnode_hold(os->os, object, FTAG, &dn); |
858 if (err) | |
859 return (err); | |
789 | 860 } |
861 | |
862 err = dnode_next_offset(dn, hole, off, 1, 1); | |
863 dnode_rele(dn, FTAG); | |
864 | |
865 return (err); | |
866 } | |
867 | |
868 void | |
869 dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) | |
870 { | |
871 rw_enter(&dn->dn_struct_rwlock, RW_READER); | |
872 mutex_enter(&dn->dn_mtx); | |
873 | |
874 doi->doi_data_block_size = dn->dn_datablksz; | |
875 doi->doi_metadata_block_size = dn->dn_indblkshift ? | |
876 1ULL << dn->dn_indblkshift : 0; | |
877 doi->doi_indirection = dn->dn_nlevels; | |
878 doi->doi_checksum = dn->dn_checksum; | |
879 doi->doi_compress = dn->dn_compress; | |
2082 | 880 doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + |
881 SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; | |
789 | 882 doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; |
883 doi->doi_type = dn->dn_type; | |
884 doi->doi_bonus_size = dn->dn_bonuslen; | |
885 doi->doi_bonus_type = dn->dn_bonustype; | |
886 | |
887 mutex_exit(&dn->dn_mtx); | |
888 rw_exit(&dn->dn_struct_rwlock); | |
889 } | |
890 | |
891 /* | |
892 * Get information on a DMU object. | |
893 * If doi is NULL, just indicates whether the object exists. | |
894 */ | |
895 int | |
896 dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) | |
897 { | |
1544 | 898 dnode_t *dn; |
899 int err = dnode_hold(os->os, object, FTAG, &dn); | |
789 | 900 |
1544 | 901 if (err) |
902 return (err); | |
789 | 903 |
904 if (doi != NULL) | |
905 dmu_object_info_from_dnode(dn, doi); | |
906 | |
907 dnode_rele(dn, FTAG); | |
908 return (0); | |
909 } | |
910 | |
911 /* | |
912 * As above, but faster; can be used when you have a held dbuf in hand. | |
913 */ | |
914 void | |
915 dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi) | |
916 { | |
917 dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi); | |
918 } | |
919 | |
920 /* | |
921 * Faster still when you only care about the size. | |
922 * This is specifically optimized for zfs_getattr(). | |
923 */ | |
924 void | |
925 dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) | |
926 { | |
927 dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; | |
928 | |
929 *blksize = dn->dn_datablksz; | |
2082 | 930 /* add 1 for dnode space */ |
931 *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> | |
932 SPA_MINBLOCKSHIFT) + 1; | |
789 | 933 } |
934 | |
1544 | 935 /* |
936 * Given a bookmark, return the name of the dataset, object, and range in | |
937 * human-readable format. | |
938 */ | |
939 int | |
2676 | 940 spa_bookmark_name(spa_t *spa, zbookmark_t *zb, nvlist_t *nvl) |
1544 | 941 { |
942 dsl_pool_t *dp; | |
943 dsl_dataset_t *ds = NULL; | |
944 objset_t *os = NULL; | |
945 dnode_t *dn = NULL; | |
946 int err, shift; | |
2676 | 947 char dsname[MAXNAMELEN]; |
948 char objname[32]; | |
949 char range[64]; | |
1544 | 950 |
951 dp = spa_get_dsl(spa); | |
952 if (zb->zb_objset != 0) { | |
953 rw_enter(&dp->dp_config_rwlock, RW_READER); | |
954 err = dsl_dataset_open_obj(dp, zb->zb_objset, | |
955 NULL, DS_MODE_NONE, FTAG, &ds); | |
956 if (err) { | |
957 rw_exit(&dp->dp_config_rwlock); | |
958 return (err); | |
959 } | |
960 dsl_dataset_name(ds, dsname); | |
961 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); | |
962 rw_exit(&dp->dp_config_rwlock); | |
963 | |
964 err = dmu_objset_open(dsname, DMU_OST_ANY, DS_MODE_NONE, &os); | |
965 if (err) | |
966 goto out; | |
967 | |
968 } else { | |
969 dsl_dataset_name(NULL, dsname); | |
970 os = dp->dp_meta_objset; | |
971 } | |
972 | |
973 | |
974 if (zb->zb_object == DMU_META_DNODE_OBJECT) { | |
2676 | 975 (void) strncpy(objname, "mdn", sizeof (objname)); |
1544 | 976 } else { |
2676 | 977 (void) snprintf(objname, sizeof (objname), "%lld", |
1544 | 978 (longlong_t)zb->zb_object); |
979 } | |
980 | |
981 err = dnode_hold(os->os, zb->zb_object, FTAG, &dn); | |
982 if (err) | |
983 goto out; | |
984 | |
985 shift = (dn->dn_datablkshift?dn->dn_datablkshift:SPA_MAXBLOCKSHIFT) + | |
986 zb->zb_level * (dn->dn_indblkshift - SPA_BLKPTRSHIFT); | |
2676 | 987 (void) snprintf(range, sizeof (range), "%llu-%llu", |
1544 | 988 (u_longlong_t)(zb->zb_blkid << shift), |
989 (u_longlong_t)((zb->zb_blkid+1) << shift)); | |
990 | |
2676 | 991 if ((err = nvlist_add_string(nvl, ZPOOL_ERR_DATASET, dsname)) != 0 || |
992 (err = nvlist_add_string(nvl, ZPOOL_ERR_OBJECT, objname)) != 0 || | |
993 (err = nvlist_add_string(nvl, ZPOOL_ERR_RANGE, range)) != 0) | |
994 goto out; | |
995 | |
1544 | 996 out: |
997 if (dn) | |
998 dnode_rele(dn, FTAG); | |
999 if (os && os != dp->dp_meta_objset) | |
1000 dmu_objset_close(os); | |
1001 return (err); | |
1002 } | |
1003 | |
789 | 1004 void |
1005 byteswap_uint64_array(void *vbuf, size_t size) | |
1006 { | |
1007 uint64_t *buf = vbuf; | |
1008 size_t count = size >> 3; | |
1009 int i; | |
1010 | |
1011 ASSERT((size & 7) == 0); | |
1012 | |
1013 for (i = 0; i < count; i++) | |
1014 buf[i] = BSWAP_64(buf[i]); | |
1015 } | |
1016 | |
1017 void | |
1018 byteswap_uint32_array(void *vbuf, size_t size) | |
1019 { | |
1020 uint32_t *buf = vbuf; | |
1021 size_t count = size >> 2; | |
1022 int i; | |
1023 | |
1024 ASSERT((size & 3) == 0); | |
1025 | |
1026 for (i = 0; i < count; i++) | |
1027 buf[i] = BSWAP_32(buf[i]); | |
1028 } | |
1029 | |
1030 void | |
1031 byteswap_uint16_array(void *vbuf, size_t size) | |
1032 { | |
1033 uint16_t *buf = vbuf; | |
1034 size_t count = size >> 1; | |
1035 int i; | |
1036 | |
1037 ASSERT((size & 1) == 0); | |
1038 | |
1039 for (i = 0; i < count; i++) | |
1040 buf[i] = BSWAP_16(buf[i]); | |
1041 } | |
1042 | |
1043 /* ARGSUSED */ | |
1044 void | |
1045 byteswap_uint8_array(void *vbuf, size_t size) | |
1046 { | |
1047 } | |
1048 | |
1049 void | |
1050 dmu_init(void) | |
1051 { | |
1052 dbuf_init(); | |
1053 dnode_init(); | |
1054 arc_init(); | |
1055 } | |
1056 | |
1057 void | |
1058 dmu_fini(void) | |
1059 { | |
1060 arc_fini(); | |
1061 dnode_fini(); | |
1062 dbuf_fini(); | |
1063 } |