comparison usr/src/uts/common/fs/zfs/dmu.c @ 14164:dceb17481b99

4045 zfs write throttle & i/o scheduler performance work Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Ned Bass <bass6@llnl.gov> Reviewed by: Brendan Gregg <brendan.gregg@joyent.com> Approved by: Robert Mustacchi <rm@joyent.com>
author Matthew Ahrens <mahrens@delphix.com>
date Mon, 26 Aug 2013 13:13:26 -0800
parents dc75c925d8aa
children be36a38bac3d
comparison
equal deleted inserted replaced
14163:712ede127bb4 14164:dceb17481b99
369 */ 369 */
370 static int 370 static int
371 dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, 371 dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
372 int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) 372 int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
373 { 373 {
374 dsl_pool_t *dp = NULL;
375 dmu_buf_t **dbp; 374 dmu_buf_t **dbp;
376 uint64_t blkid, nblks, i; 375 uint64_t blkid, nblks, i;
377 uint32_t dbuf_flags; 376 uint32_t dbuf_flags;
378 int err; 377 int err;
379 zio_t *zio; 378 zio_t *zio;
380 hrtime_t start;
381 379
382 ASSERT(length <= DMU_MAX_ACCESS); 380 ASSERT(length <= DMU_MAX_ACCESS);
383 381
384 dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT; 382 dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
385 if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz) 383 if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
403 } 401 }
404 nblks = 1; 402 nblks = 1;
405 } 403 }
406 dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); 404 dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
407 405
408 if (dn->dn_objset->os_dsl_dataset)
409 dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool;
410 start = gethrtime();
411 zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); 406 zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
412 blkid = dbuf_whichblock(dn, offset); 407 blkid = dbuf_whichblock(dn, offset);
413 for (i = 0; i < nblks; i++) { 408 for (i = 0; i < nblks; i++) {
414 dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); 409 dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
415 if (db == NULL) { 410 if (db == NULL) {
426 } 421 }
427 rw_exit(&dn->dn_struct_rwlock); 422 rw_exit(&dn->dn_struct_rwlock);
428 423
429 /* wait for async i/o */ 424 /* wait for async i/o */
430 err = zio_wait(zio); 425 err = zio_wait(zio);
431 /* track read overhead when we are in sync context */
432 if (dp && dsl_pool_sync_context(dp))
433 dp->dp_read_overhead += gethrtime() - start;
434 if (err) { 426 if (err) {
435 dmu_buf_rele_array(dbp, nblks, tag); 427 dmu_buf_rele_array(dbp, nblks, tag);
436 return (err); 428 return (err);
437 } 429 }
438 430
510 } 502 }
511 503
512 kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); 504 kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs);
513 } 505 }
514 506
507 /*
508 * Issue prefetch i/os for the given blocks.
509 *
510 * Note: The assumption is that we *know* these blocks will be needed
511 * almost immediately. Therefore, the prefetch i/os will be issued at
512 * ZIO_PRIORITY_SYNC_READ
513 *
514 * Note: indirect blocks and other metadata will be read synchronously,
515 * causing this function to block if they are not already cached.
516 */
515 void 517 void
516 dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) 518 dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
517 { 519 {
518 dnode_t *dn; 520 dnode_t *dn;
519 uint64_t blkid; 521 uint64_t blkid;
520 int nblks, i, err; 522 int nblks, err;
521 523
522 if (zfs_prefetch_disable) 524 if (zfs_prefetch_disable)
523 return; 525 return;
524 526
525 if (len == 0) { /* they're interested in the bonus buffer */ 527 if (len == 0) { /* they're interested in the bonus buffer */
528 if (object == 0 || object >= DN_MAX_OBJECT) 530 if (object == 0 || object >= DN_MAX_OBJECT)
529 return; 531 return;
530 532
531 rw_enter(&dn->dn_struct_rwlock, RW_READER); 533 rw_enter(&dn->dn_struct_rwlock, RW_READER);
532 blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); 534 blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t));
533 dbuf_prefetch(dn, blkid); 535 dbuf_prefetch(dn, blkid, ZIO_PRIORITY_SYNC_READ);
534 rw_exit(&dn->dn_struct_rwlock); 536 rw_exit(&dn->dn_struct_rwlock);
535 return; 537 return;
536 } 538 }
537 539
538 /* 540 /*
545 return; 547 return;
546 548
547 rw_enter(&dn->dn_struct_rwlock, RW_READER); 549 rw_enter(&dn->dn_struct_rwlock, RW_READER);
548 if (dn->dn_datablkshift) { 550 if (dn->dn_datablkshift) {
549 int blkshift = dn->dn_datablkshift; 551 int blkshift = dn->dn_datablkshift;
550 nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - 552 nblks = (P2ROUNDUP(offset + len, 1 << blkshift) -
551 P2ALIGN(offset, 1<<blkshift)) >> blkshift; 553 P2ALIGN(offset, 1 << blkshift)) >> blkshift;
552 } else { 554 } else {
553 nblks = (offset < dn->dn_datablksz); 555 nblks = (offset < dn->dn_datablksz);
554 } 556 }
555 557
556 if (nblks != 0) { 558 if (nblks != 0) {
557 blkid = dbuf_whichblock(dn, offset); 559 blkid = dbuf_whichblock(dn, offset);
558 for (i = 0; i < nblks; i++) 560 for (int i = 0; i < nblks; i++)
559 dbuf_prefetch(dn, blkid+i); 561 dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_SYNC_READ);
560 } 562 }
561 563
562 rw_exit(&dn->dn_struct_rwlock); 564 rw_exit(&dn->dn_struct_rwlock);
563 565
564 dnode_rele(dn, FTAG); 566 dnode_rele(dn, FTAG);
1354 dsa->dsa_zgd = zgd; 1356 dsa->dsa_zgd = zgd;
1355 dsa->dsa_tx = tx; 1357 dsa->dsa_tx = tx;
1356 1358
1357 zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, 1359 zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
1358 zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp, 1360 zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
1359 dmu_sync_late_arrival_ready, dmu_sync_late_arrival_done, dsa, 1361 dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa,
1360 ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); 1362 ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
1361 1363
1362 return (0); 1364 return (0);
1363 } 1365 }
1364 1366
1494 dsa->dsa_zgd = zgd; 1496 dsa->dsa_zgd = zgd;
1495 dsa->dsa_tx = NULL; 1497 dsa->dsa_tx = NULL;
1496 1498
1497 zio_nowait(arc_write(pio, os->os_spa, txg, 1499 zio_nowait(arc_write(pio, os->os_spa, txg,
1498 bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), 1500 bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
1499 DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready, dmu_sync_done, 1501 DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready,
1500 dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); 1502 NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
1503 ZIO_FLAG_CANFAIL, &zb));
1501 1504
1502 return (0); 1505 return (0);
1503 } 1506 }
1504 1507
1505 int 1508 int