Mercurial > illumos > illumos-gate
comparison usr/src/uts/common/fs/zfs/dmu.c @ 14164:dceb17481b99
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Ned Bass <bass6@llnl.gov>
Reviewed by: Brendan Gregg <brendan.gregg@joyent.com>
Approved by: Robert Mustacchi <rm@joyent.com>
author | Matthew Ahrens <mahrens@delphix.com> |
---|---|
date | Mon, 26 Aug 2013 13:13:26 -0800 |
parents | dc75c925d8aa |
children | be36a38bac3d |
comparison
equal
deleted
inserted
replaced
14163:712ede127bb4 | 14164:dceb17481b99 |
---|---|
369 */ | 369 */ |
370 static int | 370 static int |
371 dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, | 371 dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, |
372 int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) | 372 int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) |
373 { | 373 { |
374 dsl_pool_t *dp = NULL; | |
375 dmu_buf_t **dbp; | 374 dmu_buf_t **dbp; |
376 uint64_t blkid, nblks, i; | 375 uint64_t blkid, nblks, i; |
377 uint32_t dbuf_flags; | 376 uint32_t dbuf_flags; |
378 int err; | 377 int err; |
379 zio_t *zio; | 378 zio_t *zio; |
380 hrtime_t start; | |
381 | 379 |
382 ASSERT(length <= DMU_MAX_ACCESS); | 380 ASSERT(length <= DMU_MAX_ACCESS); |
383 | 381 |
384 dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT; | 382 dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT; |
385 if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz) | 383 if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz) |
403 } | 401 } |
404 nblks = 1; | 402 nblks = 1; |
405 } | 403 } |
406 dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); | 404 dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); |
407 | 405 |
408 if (dn->dn_objset->os_dsl_dataset) | |
409 dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool; | |
410 start = gethrtime(); | |
411 zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); | 406 zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); |
412 blkid = dbuf_whichblock(dn, offset); | 407 blkid = dbuf_whichblock(dn, offset); |
413 for (i = 0; i < nblks; i++) { | 408 for (i = 0; i < nblks; i++) { |
414 dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); | 409 dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); |
415 if (db == NULL) { | 410 if (db == NULL) { |
426 } | 421 } |
427 rw_exit(&dn->dn_struct_rwlock); | 422 rw_exit(&dn->dn_struct_rwlock); |
428 | 423 |
429 /* wait for async i/o */ | 424 /* wait for async i/o */ |
430 err = zio_wait(zio); | 425 err = zio_wait(zio); |
431 /* track read overhead when we are in sync context */ | |
432 if (dp && dsl_pool_sync_context(dp)) | |
433 dp->dp_read_overhead += gethrtime() - start; | |
434 if (err) { | 426 if (err) { |
435 dmu_buf_rele_array(dbp, nblks, tag); | 427 dmu_buf_rele_array(dbp, nblks, tag); |
436 return (err); | 428 return (err); |
437 } | 429 } |
438 | 430 |
510 } | 502 } |
511 | 503 |
512 kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); | 504 kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); |
513 } | 505 } |
514 | 506 |
507 /* | |
508 * Issue prefetch i/os for the given blocks. | |
509 * | |
510 * Note: The assumption is that we *know* these blocks will be needed | |
511 * almost immediately. Therefore, the prefetch i/os will be issued at | |
512 * ZIO_PRIORITY_SYNC_READ | |
513 * | |
514 * Note: indirect blocks and other metadata will be read synchronously, | |
515 * causing this function to block if they are not already cached. | |
516 */ | |
515 void | 517 void |
516 dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) | 518 dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) |
517 { | 519 { |
518 dnode_t *dn; | 520 dnode_t *dn; |
519 uint64_t blkid; | 521 uint64_t blkid; |
520 int nblks, i, err; | 522 int nblks, err; |
521 | 523 |
522 if (zfs_prefetch_disable) | 524 if (zfs_prefetch_disable) |
523 return; | 525 return; |
524 | 526 |
525 if (len == 0) { /* they're interested in the bonus buffer */ | 527 if (len == 0) { /* they're interested in the bonus buffer */ |
528 if (object == 0 || object >= DN_MAX_OBJECT) | 530 if (object == 0 || object >= DN_MAX_OBJECT) |
529 return; | 531 return; |
530 | 532 |
531 rw_enter(&dn->dn_struct_rwlock, RW_READER); | 533 rw_enter(&dn->dn_struct_rwlock, RW_READER); |
532 blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); | 534 blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); |
533 dbuf_prefetch(dn, blkid); | 535 dbuf_prefetch(dn, blkid, ZIO_PRIORITY_SYNC_READ); |
534 rw_exit(&dn->dn_struct_rwlock); | 536 rw_exit(&dn->dn_struct_rwlock); |
535 return; | 537 return; |
536 } | 538 } |
537 | 539 |
538 /* | 540 /* |
545 return; | 547 return; |
546 | 548 |
547 rw_enter(&dn->dn_struct_rwlock, RW_READER); | 549 rw_enter(&dn->dn_struct_rwlock, RW_READER); |
548 if (dn->dn_datablkshift) { | 550 if (dn->dn_datablkshift) { |
549 int blkshift = dn->dn_datablkshift; | 551 int blkshift = dn->dn_datablkshift; |
550 nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - | 552 nblks = (P2ROUNDUP(offset + len, 1 << blkshift) - |
551 P2ALIGN(offset, 1<<blkshift)) >> blkshift; | 553 P2ALIGN(offset, 1 << blkshift)) >> blkshift; |
552 } else { | 554 } else { |
553 nblks = (offset < dn->dn_datablksz); | 555 nblks = (offset < dn->dn_datablksz); |
554 } | 556 } |
555 | 557 |
556 if (nblks != 0) { | 558 if (nblks != 0) { |
557 blkid = dbuf_whichblock(dn, offset); | 559 blkid = dbuf_whichblock(dn, offset); |
558 for (i = 0; i < nblks; i++) | 560 for (int i = 0; i < nblks; i++) |
559 dbuf_prefetch(dn, blkid+i); | 561 dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_SYNC_READ); |
560 } | 562 } |
561 | 563 |
562 rw_exit(&dn->dn_struct_rwlock); | 564 rw_exit(&dn->dn_struct_rwlock); |
563 | 565 |
564 dnode_rele(dn, FTAG); | 566 dnode_rele(dn, FTAG); |
1354 dsa->dsa_zgd = zgd; | 1356 dsa->dsa_zgd = zgd; |
1355 dsa->dsa_tx = tx; | 1357 dsa->dsa_tx = tx; |
1356 | 1358 |
1357 zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, | 1359 zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, |
1358 zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp, | 1360 zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp, |
1359 dmu_sync_late_arrival_ready, dmu_sync_late_arrival_done, dsa, | 1361 dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa, |
1360 ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); | 1362 ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); |
1361 | 1363 |
1362 return (0); | 1364 return (0); |
1363 } | 1365 } |
1364 | 1366 |
1494 dsa->dsa_zgd = zgd; | 1496 dsa->dsa_zgd = zgd; |
1495 dsa->dsa_tx = NULL; | 1497 dsa->dsa_tx = NULL; |
1496 | 1498 |
1497 zio_nowait(arc_write(pio, os->os_spa, txg, | 1499 zio_nowait(arc_write(pio, os->os_spa, txg, |
1498 bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), | 1500 bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), |
1499 DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready, dmu_sync_done, | 1501 DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready, |
1500 dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); | 1502 NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, |
1503 ZIO_FLAG_CANFAIL, &zb)); | |
1501 | 1504 |
1502 return (0); | 1505 return (0); |
1503 } | 1506 } |
1504 | 1507 |
1505 int | 1508 int |