Mercurial > illumos > illumos-gate
changeset 6987:877c018eb06c
6628282 arc_stats.arcstat_l2_writes_sent.value.ui64) == (arc_stats.arcstat_l2_writes_done.value.ui64)
6672464 L2ARC doesn't need startup grace interval
6693918 l2arc_fini() list->list_head.list_next == node, file: ../../common/os/list. c, line: 77
6701480 zpool command hang while add or remove cache from storage pool
6703384 assertion failed: lold->list_next != 0L, file: ../../common/os/list.c, line: 116
6709301 An empty L2ARC cache device is slow to warm up
6710331 L2ARC can hang during heavy error load
author | brendan |
---|---|
date | Tue, 01 Jul 2008 10:31:58 -0700 |
parents | 1e7638a44ce6 |
children | 6e7ccf1f1466 |
files | usr/src/uts/common/fs/zfs/arc.c usr/src/uts/common/fs/zfs/sys/spa.h usr/src/uts/common/fs/zfs/sys/spa_impl.h |
diffstat | 3 files changed, 219 insertions(+), 129 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/arc.c Tue Jul 01 10:11:12 2008 -0700 +++ b/usr/src/uts/common/fs/zfs/arc.c Tue Jul 01 10:31:58 2008 -0700 @@ -163,6 +163,11 @@ static int arc_dead; /* + * The arc has filled available memory and has now warmed up. + */ +static boolean_t arc_warm; + +/* * These tunables are for performance analysis. */ uint64_t zfs_arc_max; @@ -466,10 +471,9 @@ #define ARC_INDIRECT (1 << 14) /* this is an indirect block */ #define ARC_FREE_IN_PROGRESS (1 << 15) /* hdr about to be freed */ #define ARC_DONT_L2CACHE (1 << 16) /* originated by prefetch */ -#define ARC_L2_READING (1 << 17) /* L2ARC read in progress */ -#define ARC_L2_WRITING (1 << 18) /* L2ARC write in progress */ -#define ARC_L2_EVICTED (1 << 19) /* evicted during I/O */ -#define ARC_L2_WRITE_HEAD (1 << 20) /* head of write list */ +#define ARC_L2_WRITING (1 << 17) /* L2ARC write in progress */ +#define ARC_L2_EVICTED (1 << 18) /* evicted during I/O */ +#define ARC_L2_WRITE_HEAD (1 << 19) /* head of write list */ #define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_IN_HASH_TABLE) #define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS) @@ -478,7 +482,8 @@ #define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_BUF_AVAILABLE) #define HDR_FREE_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FREE_IN_PROGRESS) #define HDR_DONT_L2CACHE(hdr) ((hdr)->b_flags & ARC_DONT_L2CACHE) -#define HDR_L2_READING(hdr) ((hdr)->b_flags & ARC_L2_READING) +#define HDR_L2_READING(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS && \ + (hdr)->b_l2hdr != NULL) #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_L2_WRITING) #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_L2_EVICTED) #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_L2_WRITE_HEAD) @@ -527,7 +532,6 @@ #define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */ #define L2ARC_HEADROOM 4 /* num of writes */ -#define L2ARC_FEED_DELAY 180 /* starting grace */ #define L2ARC_FEED_SECS 1 /* caching interval */ #define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent) @@ -537,6 +541,7 @@ * L2ARC Performance Tunables */ uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* default max write size */ +uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra write during warmup */ uint64_t l2arc_headroom = L2ARC_HEADROOM; /* number of dev writes */ uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ boolean_t l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ @@ -549,6 +554,7 @@ spa_t *l2ad_spa; /* spa */ uint64_t l2ad_hand; /* next write location */ uint64_t l2ad_write; /* desired write size, bytes */ + uint64_t l2ad_boost; /* warmup write boost, bytes */ uint64_t l2ad_start; /* first addr on device */ uint64_t l2ad_end; /* last addr on device */ uint64_t l2ad_evict; /* last addr eviction reached */ @@ -1885,6 +1891,7 @@ growtime = lbolt + (arc_grow_retry * hz); arc_kmem_reap_now(last_reclaim); + arc_warm = B_TRUE; } else if (arc_no_grow && lbolt >= growtime) { arc_no_grow = FALSE; @@ -2282,7 +2289,7 @@ (found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) || (found == hdr && HDR_L2_READING(hdr))); - hdr->b_flags &= ~(ARC_L2_READING|ARC_L2_EVICTED); + hdr->b_flags &= ~ARC_L2_EVICTED; if (l2arc_noprefetch && (hdr->b_flags & ARC_PREFETCH)) hdr->b_flags |= ARC_DONT_L2CACHE; @@ -2471,6 +2478,8 @@ } else { uint64_t size = BP_GET_LSIZE(bp); arc_callback_t *acb; + vdev_t *vd = NULL; + daddr_t addr; if (hdr == NULL) { /* this block is not in the cache */ @@ -2544,6 +2553,13 @@ if (GHOST_STATE(hdr->b_state)) arc_access(hdr, hash_lock); + if (hdr->b_l2hdr != NULL) { + vd = hdr->b_l2hdr->b_dev->l2ad_vdev; + addr = hdr->b_l2hdr->b_daddr; + } + + mutex_exit(hash_lock); + ASSERT3U(hdr->b_size, ==, size); DTRACE_PROBE3(arc__miss, blkptr_t *, bp, uint64_t, size, zbookmark_t *, zb); @@ -2554,20 +2570,24 @@ if (l2arc_ndev != 0) { /* - * Read from the L2ARC if the following are true: - * 1. This buffer has L2ARC metadata. - * 2. This buffer isn't currently writing to the L2ARC. + * Lock out device removal. */ - if (hdr->b_l2hdr != NULL && !HDR_L2_WRITING(hdr)) { - vdev_t *vd = hdr->b_l2hdr->b_dev->l2ad_vdev; - daddr_t addr = hdr->b_l2hdr->b_daddr; + spa_config_enter(spa, RW_READER, FTAG); + + /* + * Read from the L2ARC if the following are true: + * 1. The L2ARC vdev was previously cached. + * 2. This buffer still has L2ARC metadata. + * 3. This buffer isn't currently writing to the L2ARC. + * 4. The L2ARC entry wasn't evicted, which may + * also have invalidated the vdev. + */ + if (vd != NULL && hdr->b_l2hdr != NULL && + !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr)) { l2arc_read_callback_t *cb; if (vdev_is_dead(vd)) - goto skip_l2arc; - - hdr->b_flags |= ARC_L2_READING; - mutex_exit(hash_lock); + goto l2skip; DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_l2_hits); @@ -2585,29 +2605,34 @@ */ rzio = zio_read_phys(pio, vd, addr, size, buf->b_data, ZIO_CHECKSUM_OFF, - l2arc_read_done, cb, priority, - flags | ZIO_FLAG_DONT_CACHE, B_FALSE); + l2arc_read_done, cb, priority, flags | + ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL, + B_FALSE); DTRACE_PROBE2(l2arc__read, vdev_t *, vd, zio_t *, rzio); - - if (*arc_flags & ARC_WAIT) - return (zio_wait(rzio)); - - ASSERT(*arc_flags & ARC_NOWAIT); - zio_nowait(rzio); - return (0); + spa_config_exit(spa, FTAG); + + if (*arc_flags & ARC_NOWAIT) { + zio_nowait(rzio); + return (0); + } + + ASSERT(*arc_flags & ARC_WAIT); + if (zio_wait(rzio) == 0) + return (0); + + /* l2arc read error; goto zio_read() */ } else { DTRACE_PROBE1(l2arc__miss, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_l2_misses); if (HDR_L2_WRITING(hdr)) ARCSTAT_BUMP(arcstat_l2_rw_clash); +l2skip: + spa_config_exit(spa, FTAG); } } -skip_l2arc: - mutex_exit(hash_lock); - rzio = zio_read(pio, spa, bp, buf->b_data, size, arc_read_done, buf, priority, flags, zb); @@ -3326,6 +3351,7 @@ TS_RUN, minclsyspri); arc_dead = FALSE; + arc_warm = B_FALSE; if (zfs_write_limit_max == 0) zfs_write_limit_max = physmem * PAGESIZE >> @@ -3466,21 +3492,33 @@ * the potential for the L2ARC to churn if it attempts to cache content too * quickly, such as during backups of the entire pool. * - * 5. Writes to the L2ARC devices are grouped and sent in-sequence, so that + * 5. After system boot and before the ARC has filled main memory, there are + * no evictions from the ARC and so the tails of the ARC_mfu and ARC_mru + * lists can remain mostly static. Instead of searching from tail of these + * lists as pictured, the l2arc_feed_thread() will search from the list heads + * for eligible buffers, greatly increasing its chance of finding them. + * + * The L2ARC device write speed is also boosted during this time so that + * the L2ARC warms up faster. Since there have been no ARC evictions yet, + * there are no L2ARC reads, and no fear of degrading read performance + * through increased writes. + * + * 6. Writes to the L2ARC devices are grouped and sent in-sequence, so that * the vdev queue can aggregate them into larger and fewer writes. Each * device is written to in a rotor fashion, sweeping writes through * available space then repeating. * - * 6. The L2ARC does not store dirty content. It never needs to flush + * 7. The L2ARC does not store dirty content. It never needs to flush * write buffers back to disk based storage. * - * 7. If an ARC buffer is written (and dirtied) which also exists in the + * 8. If an ARC buffer is written (and dirtied) which also exists in the * L2ARC, the now stale L2ARC buffer is immediately dropped. * * The performance of the L2ARC can be tweaked by a number of tunables, which * may be necessary for different workloads: * * l2arc_write_max max write bytes per interval + * l2arc_write_boost extra write bytes during device warmup * l2arc_noprefetch skip caching prefetched buffers * l2arc_headroom number of max device writes to precache * l2arc_feed_secs seconds between L2ARC writing @@ -3505,16 +3543,24 @@ /* * Cycle through L2ARC devices. This is how L2ARC load balances. - * This is called with l2arc_dev_mtx held, which also locks out spa removal. + * If a device is returned, this also returns holding the spa config lock. */ static l2arc_dev_t * l2arc_dev_get_next(void) { - l2arc_dev_t *next, *first; + l2arc_dev_t *first, *next = NULL; + + /* + * Lock out the removal of spas (spa_namespace_lock), then removal + * of cache devices (l2arc_dev_mtx). Once a device has been selected, + * both locks will be dropped and a spa config lock held instead. + */ + mutex_enter(&spa_namespace_lock); + mutex_enter(&l2arc_dev_mtx); /* if there are no vdevs, there is nothing to do */ if (l2arc_ndev == 0) - return (NULL); + goto out; first = NULL; next = l2arc_dev_last; @@ -3538,14 +3584,49 @@ /* if we were unable to find any usable vdevs, return NULL */ if (vdev_is_dead(next->l2ad_vdev)) - return (NULL); + next = NULL; l2arc_dev_last = next; +out: + mutex_exit(&l2arc_dev_mtx); + + /* + * Grab the config lock to prevent the 'next' device from being + * removed while we are writing to it. + */ + if (next != NULL) + spa_config_enter(next->l2ad_spa, RW_READER, next); + mutex_exit(&spa_namespace_lock); + return (next); } /* + * Free buffers that were tagged for destruction. + */ +static void +l2arc_do_free_on_write() +{ + list_t *buflist; + l2arc_data_free_t *df, *df_prev; + + mutex_enter(&l2arc_free_on_write_mtx); + buflist = l2arc_free_on_write; + + for (df = list_tail(buflist); df; df = df_prev) { + df_prev = list_prev(buflist, df); + ASSERT(df->l2df_data != NULL); + ASSERT(df->l2df_func != NULL); + df->l2df_func(df->l2df_data, df->l2df_size); + list_remove(buflist, df); + kmem_free(df, sizeof (l2arc_data_free_t)); + } + + mutex_exit(&l2arc_free_on_write_mtx); +} + +/* * A write to a cache device has completed. Update all headers to allow * reads from these buffers to begin. */ @@ -3555,8 +3636,8 @@ l2arc_write_callback_t *cb; l2arc_dev_t *dev; list_t *buflist; - l2arc_data_free_t *df, *df_prev; arc_buf_hdr_t *head, *ab, *ab_prev; + l2arc_buf_hdr_t *abl2; kmutex_t *hash_lock; cb = zio->io_private; @@ -3594,9 +3675,13 @@ if (zio->io_error != 0) { /* - * Error - invalidate L2ARC entry. + * Error - drop L2ARC entry. */ + list_remove(buflist, ab); + abl2 = ab->b_l2hdr; ab->b_l2hdr = NULL; + kmem_free(abl2, sizeof (l2arc_buf_hdr_t)); + ARCSTAT_INCR(arcstat_l2_size, -ab->b_size); } /* @@ -3612,20 +3697,7 @@ kmem_cache_free(hdr_cache, head); mutex_exit(&l2arc_buflist_mtx); - /* - * Free buffers that were tagged for destruction. - */ - mutex_enter(&l2arc_free_on_write_mtx); - buflist = l2arc_free_on_write; - for (df = list_tail(buflist); df; df = df_prev) { - df_prev = list_prev(buflist, df); - ASSERT(df->l2df_data != NULL); - ASSERT(df->l2df_func != NULL); - df->l2df_func(df->l2df_data, df->l2df_size); - list_remove(buflist, df); - kmem_free(df, sizeof (l2arc_data_free_t)); - } - mutex_exit(&l2arc_free_on_write_mtx); + l2arc_do_free_on_write(); kmem_free(cb, sizeof (l2arc_write_callback_t)); } @@ -3642,7 +3714,7 @@ arc_buf_t *buf; zio_t *rzio; kmutex_t *hash_lock; - int equal, err = 0; + int equal; cb = zio->io_private; ASSERT(cb != NULL); @@ -3668,29 +3740,27 @@ * Buffer didn't survive caching. Increment stats and * reissue to the original storage device. */ - if (zio->io_error != 0) + if (zio->io_error != 0) { ARCSTAT_BUMP(arcstat_l2_io_error); + } else { + zio->io_error = EIO; + } if (!equal) ARCSTAT_BUMP(arcstat_l2_cksum_bad); - zio->io_flags &= ~ZIO_FLAG_DONT_CACHE; - rzio = zio_read(NULL, cb->l2rcb_spa, &cb->l2rcb_bp, - buf->b_data, zio->io_size, arc_read_done, buf, - zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb); - - /* - * Since this is a seperate thread, we can wait on this - * I/O whether there is an io_waiter or not. - */ - err = zio_wait(rzio); - - /* - * Let the resent I/O call arc_read_done() instead. - * io_error is set to the reissued I/O error status. - */ - zio->io_done = NULL; - zio->io_waiter = NULL; - zio->io_error = err; + if (zio->io_waiter == NULL) { + /* + * Let the resent I/O call arc_read_done() instead. + */ + zio->io_done = NULL; + zio->io_flags &= ~ZIO_FLAG_DONT_CACHE; + + rzio = zio_read(NULL, cb->l2rcb_spa, &cb->l2rcb_bp, + buf->b_data, zio->io_size, arc_read_done, buf, + zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb); + + (void) zio_nowait(rzio); + } } kmem_free(cb, sizeof (l2arc_read_callback_t)); @@ -3752,8 +3822,6 @@ kmutex_t *hash_lock; uint64_t taddr; - ASSERT(MUTEX_HELD(&l2arc_dev_mtx)); - buflist = dev->l2ad_buflist; if (buflist == NULL) @@ -3767,7 +3835,7 @@ return; } - if (dev->l2ad_hand >= (dev->l2ad_end - (2 * dev->l2ad_write))) { + if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) { /* * When nearing the end of the device, evict to the end * before the device write hand jumps to the start. @@ -3836,6 +3904,16 @@ arc_hdr_destroy(ab); } else { /* + * Invalidate issued or about to be issued + * reads, since we may be about to write + * over this location. + */ + if (HDR_L2_READING(ab)) { + ARCSTAT_BUMP(arcstat_l2_evict_reading); + ab->b_flags |= ARC_L2_EVICTED; + } + + /* * Tell ARC this no longer exists in L2ARC. */ if (ab->b_l2hdr != NULL) { @@ -3851,16 +3929,6 @@ * failed write. */ ab->b_flags &= ~ARC_L2_WRITING; - - /* - * Invalidate issued or about to be issued - * reads, since we may be about to write - * over this location. - */ - if (HDR_L2_READING(ab)) { - ARCSTAT_BUMP(arcstat_l2_evict_reading); - ab->b_flags |= ARC_L2_EVICTED; - } } mutex_exit(hash_lock); } @@ -3877,21 +3945,18 @@ * for reading until they have completed writing. */ static void -l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev) +l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) { arc_buf_hdr_t *ab, *ab_prev, *head; l2arc_buf_hdr_t *hdrl2; list_t *list; - uint64_t passed_sz, write_sz, buf_sz; - uint64_t target_sz = dev->l2ad_write; - uint64_t headroom = dev->l2ad_write * l2arc_headroom; + uint64_t passed_sz, write_sz, buf_sz, headroom; void *buf_data; kmutex_t *hash_lock, *list_lock; boolean_t have_lock, full; l2arc_write_callback_t *cb; zio_t *pio, *wzio; - ASSERT(MUTEX_HELD(&l2arc_dev_mtx)); ASSERT(dev->l2ad_vdev != NULL); pio = NULL; @@ -3908,8 +3973,23 @@ list = l2arc_list_locked(try, &list_lock); passed_sz = 0; - for (ab = list_tail(list); ab; ab = ab_prev) { - ab_prev = list_prev(list, ab); + /* + * L2ARC fast warmup. + * + * Until the ARC is warm and starts to evict, read from the + * head of the ARC lists rather than the tail. + */ + headroom = target_sz * l2arc_headroom; + if (arc_warm == B_FALSE) + ab = list_head(list); + else + ab = list_tail(list); + + for (; ab; ab = ab_prev) { + if (arc_warm == B_FALSE) + ab_prev = list_next(list, ab); + else + ab_prev = list_prev(list, ab); hash_lock = HDR_LOCK(ab); have_lock = MUTEX_HELD(hash_lock); @@ -4032,7 +4112,7 @@ * Bump device hand to the device start if it is approaching the end. * l2arc_evict() will already have evicted ahead for this case. */ - if (dev->l2ad_hand >= (dev->l2ad_end - dev->l2ad_write)) { + if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) { spa_l2cache_space_update(dev->l2ad_vdev, 0, dev->l2ad_end - dev->l2ad_hand); dev->l2ad_hand = dev->l2ad_start; @@ -4053,8 +4133,7 @@ callb_cpr_t cpr; l2arc_dev_t *dev; spa_t *spa; - int interval; - boolean_t startup = B_TRUE; + uint64_t size; CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG); @@ -4062,57 +4141,64 @@ while (l2arc_thread_exit == 0) { /* - * Initially pause for L2ARC_FEED_DELAY seconds as a grace - * interval during boot, followed by l2arc_feed_secs seconds - * thereafter. + * Pause for l2arc_feed_secs seconds between writes. */ CALLB_CPR_SAFE_BEGIN(&cpr); - if (startup) { - interval = L2ARC_FEED_DELAY; - startup = B_FALSE; - } else { - interval = l2arc_feed_secs; + (void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock, + lbolt + (hz * l2arc_feed_secs)); + CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock); + + /* + * Quick check for L2ARC devices. + */ + mutex_enter(&l2arc_dev_mtx); + if (l2arc_ndev == 0) { + mutex_exit(&l2arc_dev_mtx); + continue; } - (void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock, - lbolt + (hz * interval)); - CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock); - - mutex_enter(&l2arc_dev_mtx); + mutex_exit(&l2arc_dev_mtx); /* * This selects the next l2arc device to write to, and in * doing so the next spa to feed from: dev->l2ad_spa. This - * will return NULL if there are no l2arc devices or if they - * are all faulted. + * will return NULL if there are now no l2arc devices or if + * they are all faulted. + * + * If a device is returned, its spa's config lock is also + * held to prevent device removal. l2arc_dev_get_next() + * will grab and release l2arc_dev_mtx. */ - if ((dev = l2arc_dev_get_next()) == NULL) { - mutex_exit(&l2arc_dev_mtx); + if ((dev = l2arc_dev_get_next()) == NULL) continue; - } + + spa = dev->l2ad_spa; + ASSERT(spa != NULL); /* * Avoid contributing to memory pressure. */ if (arc_reclaim_needed()) { ARCSTAT_BUMP(arcstat_l2_abort_lowmem); - mutex_exit(&l2arc_dev_mtx); + spa_config_exit(spa, dev); continue; } - spa = dev->l2ad_spa; - ASSERT(spa != NULL); ARCSTAT_BUMP(arcstat_l2_feeds); + size = dev->l2ad_write; + if (arc_warm == B_FALSE) + size += dev->l2ad_boost; + /* * Evict L2ARC buffers that will be overwritten. */ - l2arc_evict(dev, dev->l2ad_write, B_FALSE); + l2arc_evict(dev, size, B_FALSE); /* * Write ARC buffers. */ - l2arc_write_buffers(spa, dev); - mutex_exit(&l2arc_dev_mtx); + l2arc_write_buffers(spa, dev, size); + spa_config_exit(spa, dev); } l2arc_thread_exit = 0; @@ -4155,6 +4241,7 @@ adddev->l2ad_spa = spa; adddev->l2ad_vdev = vd; adddev->l2ad_write = l2arc_write_max; + adddev->l2ad_boost = l2arc_write_boost; adddev->l2ad_start = start; adddev->l2ad_end = end; adddev->l2ad_hand = adddev->l2ad_start; @@ -4190,12 +4277,6 @@ l2arc_dev_t *dev, *nextdev, *remdev = NULL; /* - * We can only grab the spa config lock when cache device writes - * complete. - */ - ASSERT3U(l2arc_writes_sent, ==, l2arc_writes_done); - - /* * Find the device by vdev */ mutex_enter(&l2arc_dev_mtx); @@ -4213,6 +4294,8 @@ */ list_remove(l2arc_dev_list, remdev); l2arc_dev_last = NULL; /* may have been invalidated */ + atomic_dec_64(&l2arc_ndev); + mutex_exit(&l2arc_dev_mtx); /* * Clear all buflists and ARC references. L2ARC device flush. @@ -4221,9 +4304,6 @@ list_destroy(remdev->l2ad_buflist); kmem_free(remdev->l2ad_buflist, sizeof (list_t)); kmem_free(remdev, sizeof (l2arc_dev_t)); - - atomic_dec_64(&l2arc_ndev); - mutex_exit(&l2arc_dev_mtx); } void @@ -4254,6 +4334,12 @@ void l2arc_fini() { + /* + * This is called from dmu_fini(), which is called from spa_fini(); + * Because of this, we can assume that all l2arc devices have + * already been removed when the pools themselves were removed. + */ + mutex_enter(&l2arc_feed_thr_lock); cv_signal(&l2arc_feed_thr_cv); /* kick thread out of startup */ l2arc_thread_exit = 1; @@ -4261,6 +4347,8 @@ cv_wait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock); mutex_exit(&l2arc_feed_thr_lock); + l2arc_do_free_on_write(); + mutex_destroy(&l2arc_feed_thr_lock); cv_destroy(&l2arc_feed_thr_cv); mutex_destroy(&l2arc_dev_mtx);
--- a/usr/src/uts/common/fs/zfs/sys/spa.h Tue Jul 01 10:11:12 2008 -0700 +++ b/usr/src/uts/common/fs/zfs/sys/spa.h Tue Jul 01 10:31:58 2008 -0700 @@ -380,6 +380,9 @@ extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */ extern void spa_sync_allpools(void); +/* spa namespace global mutex */ +extern kmutex_t spa_namespace_lock; + /* * SPA configuration functions in spa_config.c */