annotate usr/src/uts/common/fs/zfs/metaslab.c @ 13966:0e1d84ebb004

3578 transferring the freed map to the defer map should be constant time 3579 ztest trips assertion in metaslab_weight() Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Richard Elling <richard.elling@dey-sys.com> Approved by: Dan McDonald <danmcd@nexenta.com>
author George Wilson <george.wilson@delphix.com>
date Wed, 20 Feb 2013 13:30:36 -0800
parents e03e14ddfb4c
children e4988c7d0403
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1 /*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
2 * CDDL HEADER START
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
3 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
4 * The contents of this file are subject to the terms of the
1544
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
5 * Common Development and Distribution License (the "License").
938876158511 PSARC 2006/077 zpool clear
eschrock
parents: 789
diff changeset
6 * You may not use this file except in compliance with the License.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
7 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
9 * or http://www.opensolaris.org/os/licensing.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
10 * See the License for the specific language governing permissions
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
11 * and limitations under the License.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
12 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
13 * When distributing Covered Code, include this CDDL HEADER in each
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
15 * If applicable, add the following below this CDDL HEADER, with the
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
16 * fields enclosed by brackets "[]" replaced with your own identifying
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
17 * information: Portions Copyright [yyyy] [name of copyright owner]
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
18 *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
19 * CDDL HEADER END
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
20 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
21 /*
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
13572
85c66b89d5f2 1909 disk sync write perf regression when slog is used post oi_148
George Wilson <george.wilson@delphix.com>
parents: 13379
diff changeset
23 * Copyright (c) 2012 by Delphix. All rights reserved.
13945
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
25 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
26
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
27 #include <sys/zfs_context.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
28 #include <sys/dmu.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
29 #include <sys/dmu_tx.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
30 #include <sys/space_map.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
31 #include <sys/metaslab_impl.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
32 #include <sys/vdev_impl.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
33 #include <sys/zio.h>
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
34
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
35 /*
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
36 * Allow allocations to switch to gang blocks quickly. We do this to
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
37 * avoid having to load lots of space_maps in a given txg. There are,
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
38 * however, some cases where we want to avoid "fast" ganging and instead
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
39 * we want to do an exhaustive search of all metaslabs on this device.
13572
85c66b89d5f2 1909 disk sync write perf regression when slog is used post oi_148
George Wilson <george.wilson@delphix.com>
parents: 13379
diff changeset
40 * Currently we don't allow any gang, zil, or dump device related allocations
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
41 * to "fast" gang.
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
42 */
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
43 #define CAN_FASTGANG(flags) \
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
44 (!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
45 METASLAB_GANG_AVOID)))
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
46
2391
2fa3fd1db808 6447377 ZFS prefetch is inconsistant
maybee
parents: 1807
diff changeset
47 uint64_t metaslab_aliquot = 512ULL << 10;
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
48 uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
2391
2fa3fd1db808 6447377 ZFS prefetch is inconsistant
maybee
parents: 1807
diff changeset
49
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
50 /*
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
51 * The in-core space map representation is more compact than its on-disk form.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
52 * The zfs_condense_pct determines how much more compact the in-core
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
53 * space_map representation must be before we compact it on-disk.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
54 * Values should be greater than or equal to 100.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
55 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
56 int zfs_condense_pct = 200;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
57
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
58 /*
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
59 * This value defines the number of allowed allocation failures per vdev.
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
60 * If a device reaches this threshold in a given txg then we consider skipping
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
61 * allocations on that device.
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
62 */
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
63 int zfs_mg_alloc_failures;
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
64
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
65 /*
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
66 * Metaslab debugging: when set, keeps all space maps in core to verify frees.
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
67 */
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
68 static int metaslab_debug = 0;
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
69
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
70 /*
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
71 * Minimum size which forces the dynamic allocator to change
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
72 * it's allocation strategy. Once the space map cannot satisfy
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
73 * an allocation of this size then it switches to using more
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
74 * aggressive strategy (i.e search by size rather than offset).
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
75 */
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
76 uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
77
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
78 /*
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
79 * The minimum free space, in percent, which must be available
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
80 * in a space map to continue allocations in a first-fit fashion.
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
81 * Once the space_map's free space drops below this level we dynamically
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
82 * switch to using best-fit allocations.
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
83 */
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
84 int metaslab_df_free_pct = 4;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
85
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
86 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
87 * A metaslab is considered "free" if it contains a contiguous
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
88 * segment which is greater than metaslab_min_alloc_size.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
89 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
90 uint64_t metaslab_min_alloc_size = DMU_MAX_ACCESS;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
91
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
92 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
93 * Max number of space_maps to prefetch.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
94 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
95 int metaslab_prefetch_limit = SPA_DVAS_PER_BP;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
96
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
97 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
98 * Percentage bonus multiplier for metaslabs that are in the bonus area.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
99 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
100 int metaslab_smo_bonus_pct = 150;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
101
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
102 /*
13945
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
103 * Should we be willing to write data to degraded vdevs?
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
104 */
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
105 boolean_t zfs_write_to_degraded = B_FALSE;
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
106
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
107 /*
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
108 * ==========================================================================
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
109 * Metaslab classes
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
110 * ==========================================================================
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
111 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
112 metaslab_class_t *
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
113 metaslab_class_create(spa_t *spa, space_map_ops_t *ops)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
114 {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
115 metaslab_class_t *mc;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
116
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
117 mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
118
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
119 mc->mc_spa = spa;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
120 mc->mc_rotor = NULL;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
121 mc->mc_ops = ops;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
122
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
123 return (mc);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
124 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
125
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
126 void
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
127 metaslab_class_destroy(metaslab_class_t *mc)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
128 {
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
129 ASSERT(mc->mc_rotor == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
130 ASSERT(mc->mc_alloc == 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
131 ASSERT(mc->mc_deferred == 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
132 ASSERT(mc->mc_space == 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
133 ASSERT(mc->mc_dspace == 0);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
134
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
135 kmem_free(mc, sizeof (metaslab_class_t));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
136 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
137
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
138 int
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
139 metaslab_class_validate(metaslab_class_t *mc)
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
140 {
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
141 metaslab_group_t *mg;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
142 vdev_t *vd;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
143
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
144 /*
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
145 * Must hold one of the spa_config locks.
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
146 */
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
147 ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) ||
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
148 spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER));
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
149
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
150 if ((mg = mc->mc_rotor) == NULL)
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
151 return (0);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
152
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
153 do {
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
154 vd = mg->mg_vd;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
155 ASSERT(vd->vdev_mg != NULL);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
156 ASSERT3P(vd->vdev_top, ==, vd);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
157 ASSERT3P(mg->mg_class, ==, mc);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
158 ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
159 } while ((mg = mg->mg_next) != mc->mc_rotor);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
160
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
161 return (0);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
162 }
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
163
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
164 void
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
165 metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta,
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
166 int64_t defer_delta, int64_t space_delta, int64_t dspace_delta)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
167 {
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
168 atomic_add_64(&mc->mc_alloc, alloc_delta);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
169 atomic_add_64(&mc->mc_deferred, defer_delta);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
170 atomic_add_64(&mc->mc_space, space_delta);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
171 atomic_add_64(&mc->mc_dspace, dspace_delta);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
172 }
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
173
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
174 uint64_t
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
175 metaslab_class_get_alloc(metaslab_class_t *mc)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
176 {
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
177 return (mc->mc_alloc);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
178 }
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
179
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
180 uint64_t
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
181 metaslab_class_get_deferred(metaslab_class_t *mc)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
182 {
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
183 return (mc->mc_deferred);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
184 }
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
185
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
186 uint64_t
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
187 metaslab_class_get_space(metaslab_class_t *mc)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
188 {
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
189 return (mc->mc_space);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
190 }
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
191
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
192 uint64_t
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
193 metaslab_class_get_dspace(metaslab_class_t *mc)
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
194 {
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
195 return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
196 }
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
197
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
198 /*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
199 * ==========================================================================
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
200 * Metaslab groups
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
201 * ==========================================================================
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
202 */
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
203 static int
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
204 metaslab_compare(const void *x1, const void *x2)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
205 {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
206 const metaslab_t *m1 = x1;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
207 const metaslab_t *m2 = x2;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
208
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
209 if (m1->ms_weight < m2->ms_weight)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
210 return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
211 if (m1->ms_weight > m2->ms_weight)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
212 return (-1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
213
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
214 /*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
215 * If the weights are identical, use the offset to force uniqueness.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
216 */
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
217 if (m1->ms_map->sm_start < m2->ms_map->sm_start)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
218 return (-1);
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
219 if (m1->ms_map->sm_start > m2->ms_map->sm_start)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
220 return (1);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
221
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
222 ASSERT3P(m1, ==, m2);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
223
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
224 return (0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
225 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
226
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
227 metaslab_group_t *
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
228 metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
229 {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
230 metaslab_group_t *mg;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
231
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
232 mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
233 mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
234 avl_create(&mg->mg_metaslab_tree, metaslab_compare,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
235 sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
236 mg->mg_vd = vd;
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
237 mg->mg_class = mc;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
238 mg->mg_activation_count = 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
239
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
240 return (mg);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
241 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
242
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
243 void
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
244 metaslab_group_destroy(metaslab_group_t *mg)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
245 {
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
246 ASSERT(mg->mg_prev == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
247 ASSERT(mg->mg_next == NULL);
11026
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
248 /*
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
249 * We may have gone below zero with the activation count
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
250 * either because we never activated in the first place or
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
251 * because we're done, and possibly removing the vdev.
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
252 */
e8e10df16a8f 6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents: 10974
diff changeset
253 ASSERT(mg->mg_activation_count <= 0);
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
254
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
255 avl_destroy(&mg->mg_metaslab_tree);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
256 mutex_destroy(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
257 kmem_free(mg, sizeof (metaslab_group_t));
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
258 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
259
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
260 void
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
261 metaslab_group_activate(metaslab_group_t *mg)
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
262 {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
263 metaslab_class_t *mc = mg->mg_class;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
264 metaslab_group_t *mgprev, *mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
265
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
266 ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER));
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
267
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
268 ASSERT(mc->mc_rotor != mg);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
269 ASSERT(mg->mg_prev == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
270 ASSERT(mg->mg_next == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
271 ASSERT(mg->mg_activation_count <= 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
272
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
273 if (++mg->mg_activation_count <= 0)
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
274 return;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
275
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
276 mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
277
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
278 if ((mgprev = mc->mc_rotor) == NULL) {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
279 mg->mg_prev = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
280 mg->mg_next = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
281 } else {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
282 mgnext = mgprev->mg_next;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
283 mg->mg_prev = mgprev;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
284 mg->mg_next = mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
285 mgprev->mg_next = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
286 mgnext->mg_prev = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
287 }
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
288 mc->mc_rotor = mg;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
289 }
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
290
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
291 void
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
292 metaslab_group_passivate(metaslab_group_t *mg)
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
293 {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
294 metaslab_class_t *mc = mg->mg_class;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
295 metaslab_group_t *mgprev, *mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
296
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
297 ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER));
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
298
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
299 if (--mg->mg_activation_count != 0) {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
300 ASSERT(mc->mc_rotor != mg);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
301 ASSERT(mg->mg_prev == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
302 ASSERT(mg->mg_next == NULL);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
303 ASSERT(mg->mg_activation_count < 0);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
304 return;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
305 }
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
306
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
307 mgprev = mg->mg_prev;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
308 mgnext = mg->mg_next;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
309
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
310 if (mg == mgnext) {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
311 mc->mc_rotor = NULL;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
312 } else {
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
313 mc->mc_rotor = mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
314 mgprev->mg_next = mgnext;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
315 mgnext->mg_prev = mgprev;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
316 }
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
317
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
318 mg->mg_prev = NULL;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
319 mg->mg_next = NULL;
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
320 }
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
321
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
322 static void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
323 metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
324 {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
325 mutex_enter(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
326 ASSERT(msp->ms_group == NULL);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
327 msp->ms_group = mg;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
328 msp->ms_weight = 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
329 avl_add(&mg->mg_metaslab_tree, msp);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
330 mutex_exit(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
331 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
332
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
333 static void
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
334 metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
335 {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
336 mutex_enter(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
337 ASSERT(msp->ms_group == mg);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
338 avl_remove(&mg->mg_metaslab_tree, msp);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
339 msp->ms_group = NULL;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
340 mutex_exit(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
341 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
342
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
343 static void
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
344 metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
345 {
2459
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
346 /*
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
347 * Although in principle the weight can be any value, in
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
348 * practice we do not use values in the range [1, 510].
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
349 */
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
350 ASSERT(weight >= SPA_MINBLOCKSIZE-1 || weight == 0);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
351 ASSERT(MUTEX_HELD(&msp->ms_lock));
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
352
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
353 mutex_enter(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
354 ASSERT(msp->ms_group == mg);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
355 avl_remove(&mg->mg_metaslab_tree, msp);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
356 msp->ms_weight = weight;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
357 avl_add(&mg->mg_metaslab_tree, msp);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
358 mutex_exit(&mg->mg_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
359 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
360
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
361 /*
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
362 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
363 * Common allocator routines
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
364 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
365 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
366 static int
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
367 metaslab_segsize_compare(const void *x1, const void *x2)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
368 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
369 const space_seg_t *s1 = x1;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
370 const space_seg_t *s2 = x2;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
371 uint64_t ss_size1 = s1->ss_end - s1->ss_start;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
372 uint64_t ss_size2 = s2->ss_end - s2->ss_start;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
373
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
374 if (ss_size1 < ss_size2)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
375 return (-1);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
376 if (ss_size1 > ss_size2)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
377 return (1);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
378
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
379 if (s1->ss_start < s2->ss_start)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
380 return (-1);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
381 if (s1->ss_start > s2->ss_start)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
382 return (1);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
383
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
384 return (0);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
385 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
386
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
387 /*
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
388 * This is a helper function that can be used by the allocator to find
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
389 * a suitable block to allocate. This will search the specified AVL
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
390 * tree looking for a block that matches the specified criteria.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
391 */
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
392 static uint64_t
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
393 metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size,
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
394 uint64_t align)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
395 {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
396 space_seg_t *ss, ssearch;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
397 avl_index_t where;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
398
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
399 ssearch.ss_start = *cursor;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
400 ssearch.ss_end = *cursor + size;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
401
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
402 ss = avl_find(t, &ssearch, &where);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
403 if (ss == NULL)
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
404 ss = avl_nearest(t, where, AVL_AFTER);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
405
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
406 while (ss != NULL) {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
407 uint64_t offset = P2ROUNDUP(ss->ss_start, align);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
408
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
409 if (offset + size <= ss->ss_end) {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
410 *cursor = offset + size;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
411 return (offset);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
412 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
413 ss = AVL_NEXT(t, ss);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
414 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
415
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
416 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
417 * If we know we've searched the whole map (*cursor == 0), give up.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
418 * Otherwise, reset the cursor to the beginning and try again.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
419 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
420 if (*cursor == 0)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
421 return (-1ULL);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
422
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
423 *cursor = 0;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
424 return (metaslab_block_picker(t, cursor, size, align));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
425 }
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
426
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
427 static void
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
428 metaslab_pp_load(space_map_t *sm)
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
429 {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
430 space_seg_t *ss;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
431
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
432 ASSERT(sm->sm_ppd == NULL);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
433 sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
434
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
435 sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
436 avl_create(sm->sm_pp_root, metaslab_segsize_compare,
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
437 sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
438
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
439 for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
440 avl_add(sm->sm_pp_root, ss);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
441 }
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
442
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
443 static void
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
444 metaslab_pp_unload(space_map_t *sm)
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
445 {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
446 void *cookie = NULL;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
447
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
448 kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
449 sm->sm_ppd = NULL;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
450
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
451 while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
452 /* tear down the tree */
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
453 }
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
454
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
455 avl_destroy(sm->sm_pp_root);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
456 kmem_free(sm->sm_pp_root, sizeof (avl_tree_t));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
457 sm->sm_pp_root = NULL;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
458 }
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
459
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
460 /* ARGSUSED */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
461 static void
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
462 metaslab_pp_claim(space_map_t *sm, uint64_t start, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
463 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
464 /* No need to update cursor */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
465 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
466
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
467 /* ARGSUSED */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
468 static void
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
469 metaslab_pp_free(space_map_t *sm, uint64_t start, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
470 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
471 /* No need to update cursor */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
472 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
473
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
474 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
475 * Return the maximum contiguous segment within the metaslab.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
476 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
477 uint64_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
478 metaslab_pp_maxsize(space_map_t *sm)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
479 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
480 avl_tree_t *t = sm->sm_pp_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
481 space_seg_t *ss;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
482
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
483 if (t == NULL || (ss = avl_last(t)) == NULL)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
484 return (0ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
485
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
486 return (ss->ss_end - ss->ss_start);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
487 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
488
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
489 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
490 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
491 * The first-fit block allocator
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
492 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
493 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
494 static uint64_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
495 metaslab_ff_alloc(space_map_t *sm, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
496 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
497 avl_tree_t *t = &sm->sm_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
498 uint64_t align = size & -size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
499 uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
500
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
501 return (metaslab_block_picker(t, cursor, size, align));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
502 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
503
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
504 /* ARGSUSED */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
505 boolean_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
506 metaslab_ff_fragmented(space_map_t *sm)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
507 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
508 return (B_TRUE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
509 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
510
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
511 static space_map_ops_t metaslab_ff_ops = {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
512 metaslab_pp_load,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
513 metaslab_pp_unload,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
514 metaslab_ff_alloc,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
515 metaslab_pp_claim,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
516 metaslab_pp_free,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
517 metaslab_pp_maxsize,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
518 metaslab_ff_fragmented
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
519 };
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
520
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
521 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
522 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
523 * Dynamic block allocator -
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
524 * Uses the first fit allocation scheme until space get low and then
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
525 * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
526 * and metaslab_df_free_pct to determine when to switch the allocation scheme.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
527 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
528 */
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
529 static uint64_t
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
530 metaslab_df_alloc(space_map_t *sm, uint64_t size)
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
531 {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
532 avl_tree_t *t = &sm->sm_root;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
533 uint64_t align = size & -size;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
534 uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
535 uint64_t max_size = metaslab_pp_maxsize(sm);
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
536 int free_pct = sm->sm_space * 100 / sm->sm_size;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
537
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
538 ASSERT(MUTEX_HELD(sm->sm_lock));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
539 ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
540
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
541 if (max_size < size)
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
542 return (-1ULL);
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
543
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
544 /*
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
545 * If we're running low on space switch to using the size
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
546 * sorted AVL tree (best-fit).
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
547 */
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
548 if (max_size < metaslab_df_alloc_threshold ||
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
549 free_pct < metaslab_df_free_pct) {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
550 t = sm->sm_pp_root;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
551 *cursor = 0;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
552 }
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
553
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
554 return (metaslab_block_picker(t, cursor, size, 1ULL));
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
555 }
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
556
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
557 static boolean_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
558 metaslab_df_fragmented(space_map_t *sm)
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
559 {
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
560 uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
561 int free_pct = sm->sm_space * 100 / sm->sm_size;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
562
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
563 if (max_size >= metaslab_df_alloc_threshold &&
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
564 free_pct >= metaslab_df_free_pct)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
565 return (B_FALSE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
566
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
567 return (B_TRUE);
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
568 }
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
569
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
570 static space_map_ops_t metaslab_df_ops = {
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
571 metaslab_pp_load,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
572 metaslab_pp_unload,
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
573 metaslab_df_alloc,
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
574 metaslab_pp_claim,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
575 metaslab_pp_free,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
576 metaslab_pp_maxsize,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
577 metaslab_df_fragmented
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
578 };
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
579
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
580 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
581 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
582 * Other experimental allocators
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
583 * ==========================================================================
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
584 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
585 static uint64_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
586 metaslab_cdf_alloc(space_map_t *sm, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
587 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
588 avl_tree_t *t = &sm->sm_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
589 uint64_t *cursor = (uint64_t *)sm->sm_ppd;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
590 uint64_t *extent_end = (uint64_t *)sm->sm_ppd + 1;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
591 uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
592 uint64_t rsize = size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
593 uint64_t offset = 0;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
594
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
595 ASSERT(MUTEX_HELD(sm->sm_lock));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
596 ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
597
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
598 if (max_size < size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
599 return (-1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
600
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
601 ASSERT3U(*extent_end, >=, *cursor);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
602
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
603 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
604 * If we're running low on space switch to using the size
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
605 * sorted AVL tree (best-fit).
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
606 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
607 if ((*cursor + size) > *extent_end) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
608
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
609 t = sm->sm_pp_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
610 *cursor = *extent_end = 0;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
611
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
612 if (max_size > 2 * SPA_MAXBLOCKSIZE)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
613 rsize = MIN(metaslab_min_alloc_size, max_size);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
614 offset = metaslab_block_picker(t, extent_end, rsize, 1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
615 if (offset != -1)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
616 *cursor = offset + size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
617 } else {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
618 offset = metaslab_block_picker(t, cursor, rsize, 1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
619 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
620 ASSERT3U(*cursor, <=, *extent_end);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
621 return (offset);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
622 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
623
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
624 static boolean_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
625 metaslab_cdf_fragmented(space_map_t *sm)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
626 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
627 uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
628
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
629 if (max_size > (metaslab_min_alloc_size * 10))
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
630 return (B_FALSE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
631 return (B_TRUE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
632 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
633
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
634 static space_map_ops_t metaslab_cdf_ops = {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
635 metaslab_pp_load,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
636 metaslab_pp_unload,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
637 metaslab_cdf_alloc,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
638 metaslab_pp_claim,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
639 metaslab_pp_free,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
640 metaslab_pp_maxsize,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
641 metaslab_cdf_fragmented
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
642 };
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
643
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
644 uint64_t metaslab_ndf_clump_shift = 4;
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
645
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
646 static uint64_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
647 metaslab_ndf_alloc(space_map_t *sm, uint64_t size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
648 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
649 avl_tree_t *t = &sm->sm_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
650 avl_index_t where;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
651 space_seg_t *ss, ssearch;
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
652 uint64_t hbit = highbit(size);
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
653 uint64_t *cursor = (uint64_t *)sm->sm_ppd + hbit - 1;
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
654 uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
655
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
656 ASSERT(MUTEX_HELD(sm->sm_lock));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
657 ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
658
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
659 if (max_size < size)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
660 return (-1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
661
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
662 ssearch.ss_start = *cursor;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
663 ssearch.ss_end = *cursor + size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
664
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
665 ss = avl_find(t, &ssearch, &where);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
666 if (ss == NULL || (ss->ss_start + size > ss->ss_end)) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
667 t = sm->sm_pp_root;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
668
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
669 ssearch.ss_start = 0;
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
670 ssearch.ss_end = MIN(max_size,
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
671 1ULL << (hbit + metaslab_ndf_clump_shift));
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
672 ss = avl_find(t, &ssearch, &where);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
673 if (ss == NULL)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
674 ss = avl_nearest(t, where, AVL_AFTER);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
675 ASSERT(ss != NULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
676 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
677
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
678 if (ss != NULL) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
679 if (ss->ss_start + size <= ss->ss_end) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
680 *cursor = ss->ss_start + size;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
681 return (ss->ss_start);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
682 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
683 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
684 return (-1ULL);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
685 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
686
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
687 static boolean_t
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
688 metaslab_ndf_fragmented(space_map_t *sm)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
689 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
690 uint64_t max_size = metaslab_pp_maxsize(sm);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
691
12047
7c1fcc8419ca 6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents: 11146
diff changeset
692 if (max_size > (metaslab_min_alloc_size << metaslab_ndf_clump_shift))
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
693 return (B_FALSE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
694 return (B_TRUE);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
695 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
696
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
697
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
698 static space_map_ops_t metaslab_ndf_ops = {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
699 metaslab_pp_load,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
700 metaslab_pp_unload,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
701 metaslab_ndf_alloc,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
702 metaslab_pp_claim,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
703 metaslab_pp_free,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
704 metaslab_pp_maxsize,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
705 metaslab_ndf_fragmented
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
706 };
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
707
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
708 space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
709
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
710 /*
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
711 * ==========================================================================
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
712 * Metaslabs
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
713 * ==========================================================================
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
714 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
715 metaslab_t *
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
716 metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
717 uint64_t start, uint64_t size, uint64_t txg)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
718 {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
719 vdev_t *vd = mg->mg_vd;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
720 metaslab_t *msp;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
721
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
722 msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP);
2856
6f4d5ee1906a 6463348 ZFS code could be more portable
nd150628
parents: 2459
diff changeset
723 mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
724
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
725 msp->ms_smo_syncing = *smo;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
726
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
727 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
728 * We create the main space map here, but we don't create the
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
729 * allocmaps and freemaps until metaslab_sync_done(). This serves
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
730 * two purposes: it allows metaslab_sync_done() to detect the
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
731 * addition of new space; and for debugging, it ensures that we'd
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
732 * data fault on any attempt to use this metaslab before it's ready.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
733 */
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
734 msp->ms_map = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
735 space_map_create(msp->ms_map, start, size,
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
736 vd->vdev_ashift, &msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
737
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
738 metaslab_group_add(mg, msp);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
739
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
740 if (metaslab_debug && smo->smo_object != 0) {
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
741 mutex_enter(&msp->ms_lock);
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
742 VERIFY(space_map_load(msp->ms_map, mg->mg_class->mc_ops,
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
743 SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
744 mutex_exit(&msp->ms_lock);
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
745 }
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
746
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
747 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
748 * If we're opening an existing pool (txg == 0) or creating
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
749 * a new one (txg == TXG_INITIAL), all space is available now.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
750 * If we're adding space to an existing pool, the new space
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
751 * does not become available until after this txg has synced.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
752 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
753 if (txg <= TXG_INITIAL)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
754 metaslab_sync_done(msp, 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
755
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
756 if (txg != 0) {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
757 vdev_dirty(vd, 0, NULL, txg);
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
758 vdev_dirty(vd, VDD_METASLAB, msp, txg);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
759 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
760
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
761 return (msp);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
762 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
763
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
764 void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
765 metaslab_fini(metaslab_t *msp)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
766 {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
767 metaslab_group_t *mg = msp->ms_group;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
768
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
769 vdev_space_update(mg->mg_vd,
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
770 -msp->ms_smo.smo_alloc, 0, -msp->ms_map->sm_size);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
771
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
772 metaslab_group_remove(mg, msp);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
773
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
774 mutex_enter(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
775
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
776 space_map_unload(msp->ms_map);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
777 space_map_destroy(msp->ms_map);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
778 kmem_free(msp->ms_map, sizeof (*msp->ms_map));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
779
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
780 for (int t = 0; t < TXG_SIZE; t++) {
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
781 space_map_destroy(msp->ms_allocmap[t]);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
782 space_map_destroy(msp->ms_freemap[t]);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
783 kmem_free(msp->ms_allocmap[t], sizeof (*msp->ms_allocmap[t]));
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
784 kmem_free(msp->ms_freemap[t], sizeof (*msp->ms_freemap[t]));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
785 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
786
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
787 for (int t = 0; t < TXG_DEFER_SIZE; t++) {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
788 space_map_destroy(msp->ms_defermap[t]);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
789 kmem_free(msp->ms_defermap[t], sizeof (*msp->ms_defermap[t]));
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
790 }
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
791
13805
e3a9ae14a119 3006 VERIFY[S,U,P] and ASSERT[S,U,P] frequently check if first argument is zero
Madhav Suresh <madhav.suresh@delphix.com>
parents: 13765
diff changeset
792 ASSERT0(msp->ms_deferspace);
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
793
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
794 mutex_exit(&msp->ms_lock);
2856
6f4d5ee1906a 6463348 ZFS code could be more portable
nd150628
parents: 2459
diff changeset
795 mutex_destroy(&msp->ms_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
796
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
797 kmem_free(msp, sizeof (metaslab_t));
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
798 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
799
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
800 #define METASLAB_WEIGHT_PRIMARY (1ULL << 63)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
801 #define METASLAB_WEIGHT_SECONDARY (1ULL << 62)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
802 #define METASLAB_ACTIVE_MASK \
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
803 (METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
804
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
805 static uint64_t
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
806 metaslab_weight(metaslab_t *msp)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
807 {
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
808 metaslab_group_t *mg = msp->ms_group;
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
809 space_map_t *sm = msp->ms_map;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
810 space_map_obj_t *smo = &msp->ms_smo;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
811 vdev_t *vd = mg->mg_vd;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
812 uint64_t weight, space;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
813
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
814 ASSERT(MUTEX_HELD(&msp->ms_lock));
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
815
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
816 /*
13966
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
817 * This vdev is in the process of being removed so there is nothing
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
818 * for us to do here.
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
819 */
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
820 if (vd->vdev_removing) {
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
821 ASSERT0(smo->smo_alloc);
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
822 ASSERT0(vd->vdev_ms_shift);
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
823 return (0);
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
824 }
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
825
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
826 /*
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
827 * The baseline weight is the metaslab's free space.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
828 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
829 space = sm->sm_size - smo->smo_alloc;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
830 weight = space;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
831
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
832 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
833 * Modern disks have uniform bit density and constant angular velocity.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
834 * Therefore, the outer recording zones are faster (higher bandwidth)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
835 * than the inner zones by the ratio of outer to inner track diameter,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
836 * which is typically around 2:1. We account for this by assigning
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
837 * higher weight to lower metaslabs (multiplier ranging from 2x to 1x).
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
838 * In effect, this means that we'll select the metaslab with the most
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
839 * free bandwidth rather than simply the one with the most free space.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
840 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
841 weight = 2 * weight -
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
842 ((sm->sm_start >> vd->vdev_ms_shift) * weight) / vd->vdev_ms_count;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
843 ASSERT(weight >= space && weight <= 2 * space);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
844
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
845 /*
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
846 * For locality, assign higher weight to metaslabs which have
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
847 * a lower offset than what we've already activated.
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
848 */
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
849 if (sm->sm_start <= mg->mg_bonus_area)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
850 weight *= (metaslab_smo_bonus_pct / 100);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
851 ASSERT(weight >= space &&
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
852 weight <= 2 * (metaslab_smo_bonus_pct / 100) * space);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
853
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
854 if (sm->sm_loaded && !sm->sm_ops->smop_fragmented(sm)) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
855 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
856 * If this metaslab is one we're actively using, adjust its
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
857 * weight to make it preferable to any inactive metaslab so
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
858 * we'll polish it off.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
859 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
860 weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
861 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
862 return (weight);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
863 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
864
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
865 static void
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
866 metaslab_prefetch(metaslab_group_t *mg)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
867 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
868 spa_t *spa = mg->mg_vd->vdev_spa;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
869 metaslab_t *msp;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
870 avl_tree_t *t = &mg->mg_metaslab_tree;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
871 int m;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
872
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
873 mutex_enter(&mg->mg_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
874
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
875 /*
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
876 * Prefetch the next potential metaslabs
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
877 */
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
878 for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) {
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
879 space_map_t *sm = msp->ms_map;
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
880 space_map_obj_t *smo = &msp->ms_smo;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
881
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
882 /* If we have reached our prefetch limit then we're done */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
883 if (m >= metaslab_prefetch_limit)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
884 break;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
885
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
886 if (!sm->sm_loaded && smo->smo_object != 0) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
887 mutex_exit(&mg->mg_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
888 dmu_prefetch(spa_meta_objset(spa), smo->smo_object,
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
889 0ULL, smo->smo_objsize);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
890 mutex_enter(&mg->mg_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
891 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
892 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
893 mutex_exit(&mg->mg_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
894 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
895
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
896 static int
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
897 metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
898 {
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
899 metaslab_group_t *mg = msp->ms_group;
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
900 space_map_t *sm = msp->ms_map;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
901 space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
902
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
903 ASSERT(MUTEX_HELD(&msp->ms_lock));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
904
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
905 if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
906 space_map_load_wait(sm);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
907 if (!sm->sm_loaded) {
13879
4eac7a87eff2 3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb()
George Wilson <george.wilson@delphix.com>
parents: 13805
diff changeset
908 space_map_obj_t *smo = &msp->ms_smo;
4eac7a87eff2 3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb()
George Wilson <george.wilson@delphix.com>
parents: 13805
diff changeset
909
4eac7a87eff2 3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb()
George Wilson <george.wilson@delphix.com>
parents: 13805
diff changeset
910 int error = space_map_load(sm, sm_ops, SM_FREE, smo,
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
911 spa_meta_objset(msp->ms_group->mg_vd->vdev_spa));
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
912 if (error) {
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
913 metaslab_group_sort(msp->ms_group, msp, 0);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
914 return (error);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
915 }
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
916 for (int t = 0; t < TXG_DEFER_SIZE; t++)
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
917 space_map_walk(msp->ms_defermap[t],
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
918 space_map_claim, sm);
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
919
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
920 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
921
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
922 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
923 * Track the bonus area as we activate new metaslabs.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
924 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
925 if (sm->sm_start > mg->mg_bonus_area) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
926 mutex_enter(&mg->mg_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
927 mg->mg_bonus_area = sm->sm_start;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
928 mutex_exit(&mg->mg_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
929 }
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
930
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
931 metaslab_group_sort(msp->ms_group, msp,
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
932 msp->ms_weight | activation_weight);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
933 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
934 ASSERT(sm->sm_loaded);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
935 ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
936
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
937 return (0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
938 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
939
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
940 static void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
941 metaslab_passivate(metaslab_t *msp, uint64_t size)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
942 {
2459
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
943 /*
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
944 * If size < SPA_MINBLOCKSIZE, then we will not allocate from
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
945 * this metaslab again. In that case, it had better be empty,
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
946 * or we would be leaving space on the table.
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
947 */
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
948 ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map->sm_space == 0);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
949 metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size));
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
950 ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
951 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
952
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
953 /*
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
954 * Determine if the in-core space map representation can be condensed on-disk.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
955 * We would like to use the following criteria to make our decision:
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
956 *
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
957 * 1. The size of the space map object should not dramatically increase as a
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
958 * result of writing out our in-core free map.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
959 *
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
960 * 2. The minimal on-disk space map representation is zfs_condense_pct/100
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
961 * times the size than the in-core representation (i.e. zfs_condense_pct = 110
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
962 * and in-core = 1MB, minimal = 1.1.MB).
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
963 *
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
964 * Checking the first condition is tricky since we don't want to walk
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
965 * the entire AVL tree calculating the estimated on-disk size. Instead we
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
966 * use the size-ordered AVL tree in the space map and calculate the
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
967 * size required for the largest segment in our in-core free map. If the
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
968 * size required to represent that segment on disk is larger than the space
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
969 * map object then we avoid condensing this map.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
970 *
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
971 * To determine the second criterion we use a best-case estimate and assume
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
972 * each segment can be represented on-disk as a single 64-bit entry. We refer
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
973 * to this best-case estimate as the space map's minimal form.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
974 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
975 static boolean_t
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
976 metaslab_should_condense(metaslab_t *msp)
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
977 {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
978 space_map_t *sm = msp->ms_map;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
979 space_map_obj_t *smo = &msp->ms_smo_syncing;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
980 space_seg_t *ss;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
981 uint64_t size, entries, segsz;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
982
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
983 ASSERT(MUTEX_HELD(&msp->ms_lock));
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
984 ASSERT(sm->sm_loaded);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
985
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
986 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
987 * Use the sm_pp_root AVL tree, which is ordered by size, to obtain
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
988 * the largest segment in the in-core free map. If the tree is
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
989 * empty then we should condense the map.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
990 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
991 ss = avl_last(sm->sm_pp_root);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
992 if (ss == NULL)
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
993 return (B_TRUE);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
994
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
995 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
996 * Calculate the number of 64-bit entries this segment would
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
997 * require when written to disk. If this single segment would be
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
998 * larger on-disk than the entire current on-disk structure, then
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
999 * clearly condensing will increase the on-disk structure size.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1000 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1001 size = (ss->ss_end - ss->ss_start) >> sm->sm_shift;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1002 entries = size / (MIN(size, SM_RUN_MAX));
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1003 segsz = entries * sizeof (uint64_t);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1004
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1005 return (segsz <= smo->smo_objsize &&
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1006 smo->smo_objsize >= (zfs_condense_pct *
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1007 sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) / 100);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1008 }
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1009
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1010 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1011 * Condense the on-disk space map representation to its minimized form.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1012 * The minimized form consists of a small number of allocations followed by
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1013 * the in-core free map.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1014 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1015 static void
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1016 metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1017 {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1018 spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1019 space_map_t *freemap = msp->ms_freemap[txg & TXG_MASK];
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1020 space_map_t condense_map;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1021 space_map_t *sm = msp->ms_map;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1022 objset_t *mos = spa_meta_objset(spa);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1023 space_map_obj_t *smo = &msp->ms_smo_syncing;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1024
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1025 ASSERT(MUTEX_HELD(&msp->ms_lock));
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1026 ASSERT3U(spa_sync_pass(spa), ==, 1);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1027 ASSERT(sm->sm_loaded);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1028
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1029 spa_dbgmsg(spa, "condensing: txg %llu, msp[%llu] %p, "
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1030 "smo size %llu, segments %lu", txg,
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1031 (msp->ms_map->sm_start / msp->ms_map->sm_size), msp,
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1032 smo->smo_objsize, avl_numnodes(&sm->sm_root));
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1033
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1034 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1035 * Create an map that is a 100% allocated map. We remove segments
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1036 * that have been freed in this txg, any deferred frees that exist,
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1037 * and any allocation in the future. Removing segments should be
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1038 * a relatively inexpensive operation since we expect these maps to
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1039 * a small number of nodes.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1040 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1041 space_map_create(&condense_map, sm->sm_start, sm->sm_size,
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1042 sm->sm_shift, sm->sm_lock);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1043 space_map_add(&condense_map, condense_map.sm_start,
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1044 condense_map.sm_size);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1045
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1046 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1047 * Remove what's been freed in this txg from the condense_map.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1048 * Since we're in sync_pass 1, we know that all the frees from
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1049 * this txg are in the freemap.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1050 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1051 space_map_walk(freemap, space_map_remove, &condense_map);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1052
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1053 for (int t = 0; t < TXG_DEFER_SIZE; t++)
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1054 space_map_walk(msp->ms_defermap[t],
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1055 space_map_remove, &condense_map);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1056
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1057 for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1058 space_map_walk(msp->ms_allocmap[(txg + t) & TXG_MASK],
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1059 space_map_remove, &condense_map);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1060
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1061 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1062 * We're about to drop the metaslab's lock thus allowing
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1063 * other consumers to change it's content. Set the
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1064 * space_map's sm_condensing flag to ensure that
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1065 * allocations on this metaslab do not occur while we're
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1066 * in the middle of committing it to disk. This is only critical
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1067 * for the ms_map as all other space_maps use per txg
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1068 * views of their content.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1069 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1070 sm->sm_condensing = B_TRUE;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1071
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1072 mutex_exit(&msp->ms_lock);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1073 space_map_truncate(smo, mos, tx);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1074 mutex_enter(&msp->ms_lock);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1075
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1076 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1077 * While we would ideally like to create a space_map representation
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1078 * that consists only of allocation records, doing so can be
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1079 * prohibitively expensive because the in-core free map can be
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1080 * large, and therefore computationally expensive to subtract
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1081 * from the condense_map. Instead we sync out two maps, a cheap
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1082 * allocation only map followed by the in-core free map. While not
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1083 * optimal, this is typically close to optimal, and much cheaper to
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1084 * compute.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1085 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1086 space_map_sync(&condense_map, SM_ALLOC, smo, mos, tx);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1087 space_map_vacate(&condense_map, NULL, NULL);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1088 space_map_destroy(&condense_map);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1089
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1090 space_map_sync(sm, SM_FREE, smo, mos, tx);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1091 sm->sm_condensing = B_FALSE;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1092
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1093 spa_dbgmsg(spa, "condensed: txg %llu, msp[%llu] %p, "
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1094 "smo size %llu", txg,
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1095 (msp->ms_map->sm_start / msp->ms_map->sm_size), msp,
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1096 smo->smo_objsize);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1097 }
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1098
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1099 /*
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1100 * Write a metaslab to disk in the context of the specified transaction group.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1101 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1102 void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1103 metaslab_sync(metaslab_t *msp, uint64_t txg)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1104 {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1105 vdev_t *vd = msp->ms_group->mg_vd;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1106 spa_t *spa = vd->vdev_spa;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1107 objset_t *mos = spa_meta_objset(spa);
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1108 space_map_t *allocmap = msp->ms_allocmap[txg & TXG_MASK];
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1109 space_map_t **freemap = &msp->ms_freemap[txg & TXG_MASK];
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1110 space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1111 space_map_t *sm = msp->ms_map;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1112 space_map_obj_t *smo = &msp->ms_smo_syncing;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1113 dmu_buf_t *db;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1114 dmu_tx_t *tx;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1115
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1116 ASSERT(!vd->vdev_ishole);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1117
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1118 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1119 * This metaslab has just been added so there's no work to do now.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1120 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1121 if (*freemap == NULL) {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1122 ASSERT3P(allocmap, ==, NULL);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1123 return;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1124 }
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1125
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1126 ASSERT3P(allocmap, !=, NULL);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1127 ASSERT3P(*freemap, !=, NULL);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1128 ASSERT3P(*freed_map, !=, NULL);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1129
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1130 if (allocmap->sm_space == 0 && (*freemap)->sm_space == 0)
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1131 return;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1132
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1133 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1134 * The only state that can actually be changing concurrently with
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1135 * metaslab_sync() is the metaslab's ms_map. No other thread can
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1136 * be modifying this txg's allocmap, freemap, freed_map, or smo.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1137 * Therefore, we only hold ms_lock to satify space_map ASSERTs.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1138 * We drop it whenever we call into the DMU, because the DMU
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1139 * can call down to us (e.g. via zio_free()) at any time.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1140 */
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1141
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1142 tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1143
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1144 if (smo->smo_object == 0) {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1145 ASSERT(smo->smo_objsize == 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1146 ASSERT(smo->smo_alloc == 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1147 smo->smo_object = dmu_object_alloc(mos,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1148 DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT,
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1149 DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1150 ASSERT(smo->smo_object != 0);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1151 dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) *
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1152 (sm->sm_start >> vd->vdev_ms_shift),
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1153 sizeof (uint64_t), &smo->smo_object, tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1154 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1155
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1156 mutex_enter(&msp->ms_lock);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1157
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1158 if (sm->sm_loaded && spa_sync_pass(spa) == 1 &&
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1159 metaslab_should_condense(msp)) {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1160 metaslab_condense(msp, txg, tx);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1161 } else {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1162 space_map_sync(allocmap, SM_ALLOC, smo, mos, tx);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1163 space_map_sync(*freemap, SM_FREE, smo, mos, tx);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1164 }
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1165
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1166 space_map_vacate(allocmap, NULL, NULL);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1167
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1168 /*
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1169 * For sync pass 1, we avoid walking the entire space map and
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1170 * instead will just swap the pointers for freemap and
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1171 * freed_map. We can safely do this since the freed_map is
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1172 * guaranteed to be empty on the initial pass.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1173 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1174 if (spa_sync_pass(spa) == 1) {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1175 ASSERT0((*freed_map)->sm_space);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1176 ASSERT0(avl_numnodes(&(*freed_map)->sm_root));
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1177 space_map_swap(freemap, freed_map);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1178 } else {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1179 space_map_vacate(*freemap, space_map_add, *freed_map);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1180 }
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1181
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1182 ASSERT0(msp->ms_allocmap[txg & TXG_MASK]->sm_space);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1183 ASSERT0(msp->ms_freemap[txg & TXG_MASK]->sm_space);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1184
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1185 mutex_exit(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1186
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1187 VERIFY0(dmu_bonus_hold(mos, smo->smo_object, FTAG, &db));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1188 dmu_buf_will_dirty(db, tx);
4944
96d96f8de974 6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents: 4527
diff changeset
1189 ASSERT3U(db->db_size, >=, sizeof (*smo));
96d96f8de974 6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents: 4527
diff changeset
1190 bcopy(smo, db->db_data, sizeof (*smo));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1191 dmu_buf_rele(db, FTAG);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1192
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1193 dmu_tx_commit(tx);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1194 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1195
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1196 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1197 * Called after a transaction group has completely synced to mark
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1198 * all of the metaslab's free space as usable.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1199 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1200 void
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1201 metaslab_sync_done(metaslab_t *msp, uint64_t txg)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1202 {
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1203 space_map_obj_t *smo = &msp->ms_smo;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1204 space_map_obj_t *smosync = &msp->ms_smo_syncing;
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1205 space_map_t *sm = msp->ms_map;
13966
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1206 space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1207 space_map_t **defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1208 metaslab_group_t *mg = msp->ms_group;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1209 vdev_t *vd = mg->mg_vd;
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1210 int64_t alloc_delta, defer_delta;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1211
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1212 ASSERT(!vd->vdev_ishole);
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1213
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1214 mutex_enter(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1215
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1216 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1217 * If this metaslab is just becoming available, initialize its
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1218 * allocmaps, freemaps, and defermap and add its capacity to the vdev.
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1219 */
13966
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1220 if (*freed_map == NULL) {
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1221 ASSERT(*defer_map == NULL);
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1222 for (int t = 0; t < TXG_SIZE; t++) {
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1223 msp->ms_allocmap[t] = kmem_zalloc(sizeof (space_map_t),
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1224 KM_SLEEP);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1225 space_map_create(msp->ms_allocmap[t], sm->sm_start,
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1226 sm->sm_size, sm->sm_shift, sm->sm_lock);
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1227 msp->ms_freemap[t] = kmem_zalloc(sizeof (space_map_t),
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1228 KM_SLEEP);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1229 space_map_create(msp->ms_freemap[t], sm->sm_start,
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1230 sm->sm_size, sm->sm_shift, sm->sm_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1231 }
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1232
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1233 for (int t = 0; t < TXG_DEFER_SIZE; t++) {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1234 msp->ms_defermap[t] = kmem_zalloc(sizeof (space_map_t),
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1235 KM_SLEEP);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1236 space_map_create(msp->ms_defermap[t], sm->sm_start,
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1237 sm->sm_size, sm->sm_shift, sm->sm_lock);
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1238 }
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1239
13966
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1240 freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK];
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1241 defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE];
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1242
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1243 vdev_space_update(vd, 0, 0, sm->sm_size);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1244 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1245
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1246 alloc_delta = smosync->smo_alloc - smo->smo_alloc;
13966
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1247 defer_delta = (*freed_map)->sm_space - (*defer_map)->sm_space;
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1248
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1249 vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1250
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1251 ASSERT(msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1252 ASSERT(msp->ms_freemap[txg & TXG_MASK]->sm_space == 0);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1253
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1254 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1255 * If there's a space_map_load() in progress, wait for it to complete
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1256 * so that we have a consistent view of the in-core space map.
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1257 */
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1258 space_map_load_wait(sm);
13966
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1259
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1260 /*
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1261 * Move the frees from the defer_map to this map (if it's loaded).
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1262 * Swap the freed_map and the defer_map -- this is safe to do
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1263 * because we've just emptied out the defer_map.
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1264 */
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1265 space_map_vacate(*defer_map, sm->sm_loaded ? space_map_free : NULL, sm);
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1266 ASSERT0((*defer_map)->sm_space);
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1267 ASSERT0(avl_numnodes(&(*defer_map)->sm_root));
0e1d84ebb004 3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents: 13959
diff changeset
1268 space_map_swap(freed_map, defer_map);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1269
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1270 *smo = *smosync;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1271
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1272 msp->ms_deferspace += defer_delta;
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1273 ASSERT3S(msp->ms_deferspace, >=, 0);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1274 ASSERT3S(msp->ms_deferspace, <=, sm->sm_size);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1275 if (msp->ms_deferspace != 0) {
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1276 /*
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1277 * Keep syncing this metaslab until all deferred frees
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1278 * are back in circulation.
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1279 */
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1280 vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1281 }
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1282
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1283 /*
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1284 * If the map is loaded but no longer active, evict it as soon as all
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1285 * future allocations have synced. (If we unloaded it now and then
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1286 * loaded a moment later, the map wouldn't reflect those allocations.)
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1287 */
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1288 if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1289 int evictable = 1;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1290
10921
8aac17999e4d PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents: 10594
diff changeset
1291 for (int t = 1; t < TXG_CONCURRENT_STATES; t++)
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1292 if (msp->ms_allocmap[(txg + t) & TXG_MASK]->sm_space)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1293 evictable = 0;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1294
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1295 if (evictable && !metaslab_debug)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1296 space_map_unload(sm);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1297 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1298
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1299 metaslab_group_sort(mg, msp, metaslab_weight(msp));
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1300
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1301 mutex_exit(&msp->ms_lock);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1302 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1303
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1304 void
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1305 metaslab_sync_reassess(metaslab_group_t *mg)
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1306 {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1307 vdev_t *vd = mg->mg_vd;
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1308 int64_t failures = mg->mg_alloc_failures;
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1309
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1310 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1311 * Re-evaluate all metaslabs which have lower offsets than the
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1312 * bonus area.
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1313 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1314 for (int m = 0; m < vd->vdev_ms_count; m++) {
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1315 metaslab_t *msp = vd->vdev_ms[m];
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1316
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1317 if (msp->ms_map->sm_start > mg->mg_bonus_area)
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1318 break;
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1319
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1320 mutex_enter(&msp->ms_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1321 metaslab_group_sort(mg, msp, metaslab_weight(msp));
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1322 mutex_exit(&msp->ms_lock);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1323 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1324
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1325 atomic_add_64(&mg->mg_alloc_failures, -failures);
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1326
11146
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1327 /*
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1328 * Prefetch the next potential metaslabs
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1329 */
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1330 metaslab_prefetch(mg);
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1331 }
7e58f40bcb1c 6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents: 11066
diff changeset
1332
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1333 static uint64_t
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1334 metaslab_distance(metaslab_t *msp, dva_t *dva)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1335 {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1336 uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1337 uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift;
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1338 uint64_t start = msp->ms_map->sm_start >> ms_shift;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1339
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1340 if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva))
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1341 return (1ULL << 63);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1342
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1343 if (offset < start)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1344 return ((start - offset) << ms_shift);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1345 if (offset > start)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1346 return ((offset - start) << ms_shift);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1347 return (0);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1348 }
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1349
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1350 static uint64_t
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1351 metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1352 uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1353 {
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1354 spa_t *spa = mg->mg_vd->vdev_spa;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1355 metaslab_t *msp = NULL;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1356 uint64_t offset = -1ULL;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1357 avl_tree_t *t = &mg->mg_metaslab_tree;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1358 uint64_t activation_weight;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1359 uint64_t target_distance;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1360 int i;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1361
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1362 activation_weight = METASLAB_WEIGHT_PRIMARY;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1363 for (i = 0; i < d; i++) {
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1364 if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) {
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1365 activation_weight = METASLAB_WEIGHT_SECONDARY;
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1366 break;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1367 }
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1368 }
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1369
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1370 for (;;) {
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1371 boolean_t was_active;
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1372
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1373 mutex_enter(&mg->mg_lock);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1374 for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1375 if (msp->ms_weight < asize) {
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1376 spa_dbgmsg(spa, "%s: failed to meet weight "
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1377 "requirement: vdev %llu, txg %llu, mg %p, "
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1378 "msp %p, psize %llu, asize %llu, "
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1379 "failures %llu, weight %llu",
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1380 spa_name(spa), mg->mg_vd->vdev_id, txg,
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1381 mg, msp, psize, asize,
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1382 mg->mg_alloc_failures, msp->ms_weight);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1383 mutex_exit(&mg->mg_lock);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1384 return (-1ULL);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1385 }
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1386 was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1387 if (activation_weight == METASLAB_WEIGHT_PRIMARY)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1388 break;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1389
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1390 target_distance = min_distance +
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1391 (msp->ms_smo.smo_alloc ? 0 : min_distance >> 1);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1392
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1393 for (i = 0; i < d; i++)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1394 if (metaslab_distance(msp, &dva[i]) <
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1395 target_distance)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1396 break;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1397 if (i == d)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1398 break;
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1399 }
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1400 mutex_exit(&mg->mg_lock);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1401 if (msp == NULL)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1402 return (-1ULL);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1403
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1404 /*
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1405 * If we've already reached the allowable number of failed
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1406 * allocation attempts on this metaslab group then we
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1407 * consider skipping it. We skip it only if we're allowed
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1408 * to "fast" gang, the physical size is larger than
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1409 * a gang block, and we're attempting to allocate from
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1410 * the primary metaslab.
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1411 */
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1412 if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1413 CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1414 activation_weight == METASLAB_WEIGHT_PRIMARY) {
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1415 spa_dbgmsg(spa, "%s: skipping metaslab group: "
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1416 "vdev %llu, txg %llu, mg %p, psize %llu, "
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1417 "asize %llu, failures %llu", spa_name(spa),
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1418 mg->mg_vd->vdev_id, txg, mg, psize, asize,
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1419 mg->mg_alloc_failures);
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1420 return (-1ULL);
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1421 }
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1422
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1423 mutex_enter(&msp->ms_lock);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1424
3848
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1425 /*
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1426 * If this metaslab is currently condensing then pick again as
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1427 * we can't manipulate this metaslab until it's committed
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1428 * to disk.
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1429 */
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1430 if (msp->ms_map->sm_condensing) {
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1431 mutex_exit(&msp->ms_lock);
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1432 continue;
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1433 }
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1434
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1435 /*
3848
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1436 * Ensure that the metaslab we have selected is still
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1437 * capable of handling our request. It's possible that
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1438 * another thread may have changed the weight while we
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1439 * were blocked on the metaslab lock.
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1440 */
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1441 if (msp->ms_weight < asize || (was_active &&
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1442 !(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1443 activation_weight == METASLAB_WEIGHT_PRIMARY)) {
3848
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1444 mutex_exit(&msp->ms_lock);
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1445 continue;
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1446 }
abf146257cf9 6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents: 3713
diff changeset
1447
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1448 if ((msp->ms_weight & METASLAB_WEIGHT_SECONDARY) &&
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1449 activation_weight == METASLAB_WEIGHT_PRIMARY) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1450 metaslab_passivate(msp,
2459
7511d9859fcd 6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents: 2391
diff changeset
1451 msp->ms_weight & ~METASLAB_ACTIVE_MASK);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1452 mutex_exit(&msp->ms_lock);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1453 continue;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1454 }
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1455
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1456 if (metaslab_activate(msp, activation_weight) != 0) {
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1457 mutex_exit(&msp->ms_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1458 continue;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1459 }
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1460
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1461 if ((offset = space_map_alloc(msp->ms_map, asize)) != -1ULL)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1462 break;
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1463
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1464 atomic_inc_64(&mg->mg_alloc_failures);
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1465
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1466 metaslab_passivate(msp, space_map_maxsize(msp->ms_map));
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1467
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1468 mutex_exit(&msp->ms_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1469 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1470
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1471 if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1472 vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1473
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1474 space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, asize);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1475
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1476 mutex_exit(&msp->ms_lock);
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1477
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1478 return (offset);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1479 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1480
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1481 /*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1482 * Allocate a block for the specified i/o.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1483 */
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1484 static int
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1485 metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1486 dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1487 {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1488 metaslab_group_t *mg, *rotor;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1489 vdev_t *vd;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1490 int dshift = 3;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1491 int all_zero;
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1492 int zio_lock = B_FALSE;
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1493 boolean_t allocatable;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1494 uint64_t offset = -1ULL;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1495 uint64_t asize;
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1496 uint64_t distance;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1497
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1498 ASSERT(!DVA_IS_VALID(&dva[d]));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1499
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1500 /*
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
1501 * For testing, make some blocks above a certain size be gang blocks.
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
1502 */
11066
cebb50cbe4f9 PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents: 11026
diff changeset
1503 if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0)
5530
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
1504 return (ENOSPC);
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
1505
4ed96167d864 6354519 stack overflow in zfs due to zio pipeline
bonwick
parents: 5450
diff changeset
1506 /*
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1507 * Start at the rotor and loop through all mgs until we find something.
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1508 * Note that there's no locking on mc_rotor or mc_aliquot because
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1509 * nothing actually breaks if we miss a few updates -- we just won't
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1510 * allocate quite as evenly. It all balances out over time.
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1511 *
3063
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
1512 * If we are doing ditto or log blocks, try to spread them across
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
1513 * consecutive vdevs. If we're forced to reuse a vdev before we've
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
1514 * allocated all of our ditto blocks, then try and spread them out on
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
1515 * that vdev as much as possible. If it turns out to not be possible,
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1516 * gradually lower our standards until anything becomes acceptable.
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1517 * Also, allocating on consecutive vdevs (as opposed to random vdevs)
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1518 * gives us hope of containing our fault domains to something we're
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1519 * able to reason about. Otherwise, any two top-level vdev failures
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1520 * will guarantee the loss of data. With consecutive allocation,
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1521 * only two adjacent top-level vdev failures will result in data loss.
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1522 *
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1523 * If we are doing gang blocks (hintdva is non-NULL), try to keep
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1524 * ourselves on the same vdev as our gang block header. That
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1525 * way, we can hope for locality in vdev_cache, plus it makes our
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1526 * fault domains something tractable.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1527 */
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1528 if (hintdva) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1529 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d]));
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1530
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1531 /*
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1532 * It's possible the vdev we're using as the hint no
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1533 * longer exists (i.e. removed). Consult the rotor when
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1534 * all else fails.
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1535 */
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
1536 if (vd != NULL) {
3063
b252896b372b 6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents: 2856
diff changeset
1537 mg = vd->vdev_mg;
10594
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1538
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1539 if (flags & METASLAB_HINTBP_AVOID &&
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1540 mg->mg_next != NULL)
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1541 mg = mg->mg_next;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1542 } else {
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1543 mg = mc->mc_rotor;
986cb68d2347 6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents: 9480
diff changeset
1544 }
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1545 } else if (d != 0) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1546 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1]));
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1547 mg = vd->vdev_mg->mg_next;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1548 } else {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1549 mg = mc->mc_rotor;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1550 }
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1551
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1552 /*
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
1553 * If the hint put us into the wrong metaslab class, or into a
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
1554 * metaslab group that has been passivated, just follow the rotor.
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1555 */
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
1556 if (mg->mg_class != mc || mg->mg_activation_count <= 0)
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1557 mg = mc->mc_rotor;
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1558
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1559 rotor = mg;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1560 top:
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1561 all_zero = B_TRUE;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1562 do {
10974
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
1563 ASSERT(mg->mg_activation_count == 1);
32d689ba6466 6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10922
diff changeset
1564
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1565 vd = mg->mg_vd;
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1566
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1567 /*
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1568 * Don't allocate from faulted devices.
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1569 */
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1570 if (zio_lock) {
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1571 spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER);
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1572 allocatable = vdev_allocatable(vd);
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1573 spa_config_exit(spa, SCL_ZIO, FTAG);
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1574 } else {
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1575 allocatable = vdev_allocatable(vd);
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1576 }
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1577 if (!allocatable)
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1578 goto next;
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1579
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1580 /*
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1581 * Avoid writing single-copy data to a failing vdev
13945
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
1582 * unless the user instructs us that it is okay.
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1583 */
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1584 if ((vd->vdev_stat.vs_write_errors > 0 ||
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1585 vd->vdev_state < VDEV_STATE_HEALTHY) &&
13945
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
1586 d == 0 && dshift == 3 &&
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
1587 !(zfs_write_to_degraded && vd->vdev_state ==
7a9c1d41dfbe 3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents: 13879
diff changeset
1588 VDEV_STATE_DEGRADED)) {
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1589 all_zero = B_FALSE;
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1590 goto next;
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1591 }
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1592
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1593 ASSERT(mg->mg_class == mc);
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1594
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1595 distance = vd->vdev_asize >> dshift;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1596 if (distance <= (1ULL << vd->vdev_ms_shift))
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1597 distance = 0;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1598 else
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1599 all_zero = B_FALSE;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1600
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1601 asize = vdev_psize_to_asize(vd, psize);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1602 ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1603
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1604 offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1605 dva, d, flags);
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1606 if (offset != -1ULL) {
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1607 /*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1608 * If we've just selected this metaslab group,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1609 * figure out whether the corresponding vdev is
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1610 * over- or under-used relative to the pool,
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1611 * and set an allocation bias to even it out.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1612 */
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1613 if (mc->mc_aliquot == 0) {
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1614 vdev_stat_t *vs = &vd->vdev_stat;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1615 int64_t vu, cu;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1616
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1617 vu = (vs->vs_alloc * 100) / (vs->vs_space + 1);
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1618 cu = (mc->mc_alloc * 100) / (mc->mc_space + 1);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1619
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1620 /*
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1621 * Calculate how much more or less we should
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1622 * try to allocate from this device during
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1623 * this iteration around the rotor.
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1624 * For example, if a device is 80% full
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1625 * and the pool is 20% full then we should
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1626 * reduce allocations by 60% on this device.
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1627 *
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1628 * mg_bias = (20 - 80) * 512K / 100 = -307K
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1629 *
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1630 * This reduces allocations by 307K for this
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1631 * iteration.
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1632 */
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1633 mg->mg_bias = ((cu - vu) *
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1634 (int64_t)mg->mg_aliquot) / 100;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1635 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1636
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1637 if (atomic_add_64_nv(&mc->mc_aliquot, asize) >=
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1638 mg->mg_aliquot + mg->mg_bias) {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1639 mc->mc_rotor = mg->mg_next;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1640 mc->mc_aliquot = 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1641 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1642
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1643 DVA_SET_VDEV(&dva[d], vd->vdev_id);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1644 DVA_SET_OFFSET(&dva[d], offset);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1645 DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER));
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1646 DVA_SET_ASIZE(&dva[d], asize);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1647
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1648 return (0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1649 }
5329
33cb98223b2d PSARC 2007/567 zpool failmode property
gw25295
parents: 4944
diff changeset
1650 next:
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1651 mc->mc_rotor = mg->mg_next;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1652 mc->mc_aliquot = 0;
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1653 } while ((mg = mg->mg_next) != rotor);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1654
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1655 if (!all_zero) {
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1656 dshift++;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1657 ASSERT(dshift < 64);
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1658 goto top;
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1659 }
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1660
9480
fcff33da767f 6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents: 8241
diff changeset
1661 if (!allocatable && !zio_lock) {
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1662 dshift = 3;
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1663 zio_lock = B_TRUE;
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1664 goto top;
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1665 }
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1666
1775
e51e26b432c0 6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents: 1732
diff changeset
1667 bzero(&dva[d], sizeof (dva_t));
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1668
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1669 return (ENOSPC);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1670 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1671
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1672 /*
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1673 * Free the block represented by DVA in the context of the specified
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1674 * transaction group.
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1675 */
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1676 static void
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1677 metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now)
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1678 {
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1679 uint64_t vdev = DVA_GET_VDEV(dva);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1680 uint64_t offset = DVA_GET_OFFSET(dva);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1681 uint64_t size = DVA_GET_ASIZE(dva);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1682 vdev_t *vd;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1683 metaslab_t *msp;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1684
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1685 ASSERT(DVA_IS_VALID(dva));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1686
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1687 if (txg > spa_freeze_txg(spa))
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1688 return;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1689
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1690 if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1691 (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1692 cmn_err(CE_WARN, "metaslab_free_dva(): bad DVA %llu:%llu",
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1693 (u_longlong_t)vdev, (u_longlong_t)offset);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1694 ASSERT(0);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1695 return;
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1696 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1697
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1698 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1699
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1700 if (DVA_GET_GANG(dva))
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1701 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1702
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1703 mutex_enter(&msp->ms_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1704
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1705 if (now) {
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1706 space_map_remove(msp->ms_allocmap[txg & TXG_MASK],
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1707 offset, size);
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1708 space_map_free(msp->ms_map, offset, size);
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1709 } else {
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1710 if (msp->ms_freemap[txg & TXG_MASK]->sm_space == 0)
1732
9e3ae798af31 6280668 pluggable block allocation policy
bonwick
parents: 1544
diff changeset
1711 vdev_dirty(vd, VDD_METASLAB, msp, txg);
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1712 space_map_add(msp->ms_freemap[txg & TXG_MASK], offset, size);
789
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1713 }
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1714
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1715 mutex_exit(&msp->ms_lock);
b348f31ed315 PSARC 2002/240 ZFS
ahrens
parents:
diff changeset
1716 }
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1717
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1718 /*
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1719 * Intent log support: upon opening the pool after a crash, notify the SPA
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1720 * of blocks that the intent log has allocated for immediate write, but
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1721 * which are still considered free by the SPA because the last transaction
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1722 * group didn't commit yet.
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1723 */
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1724 static int
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1725 metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1726 {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1727 uint64_t vdev = DVA_GET_VDEV(dva);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1728 uint64_t offset = DVA_GET_OFFSET(dva);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1729 uint64_t size = DVA_GET_ASIZE(dva);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1730 vdev_t *vd;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1731 metaslab_t *msp;
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1732 int error = 0;
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1733
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1734 ASSERT(DVA_IS_VALID(dva));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1735
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1736 if ((vd = vdev_lookup_top(spa, vdev)) == NULL ||
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1737 (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1738 return (ENXIO);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1739
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1740 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1741
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1742 if (DVA_GET_GANG(dva))
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1743 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1744
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1745 mutex_enter(&msp->ms_lock);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1746
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1747 if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map->sm_loaded)
13379
4df42cc92254 1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents: 12047
diff changeset
1748 error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1749
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1750 if (error == 0 && !space_map_contains(msp->ms_map, offset, size))
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1751 error = ENOENT;
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1752
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1753 if (error || txg == 0) { /* txg == 0 indicates dry run */
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1754 mutex_exit(&msp->ms_lock);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1755 return (error);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1756 }
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1757
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1758 space_map_claim(msp->ms_map, offset, size);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1759
8241
5a60f16123ba 6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 7980
diff changeset
1760 if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1761 if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0)
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1762 vdev_dirty(vd, VDD_METASLAB, msp, txg);
13959
e03e14ddfb4c 3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents: 13945
diff changeset
1763 space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, size);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1764 }
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1765
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1766 mutex_exit(&msp->ms_lock);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1767
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1768 return (0);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1769 }
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1770
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1771 int
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1772 metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1773 int ndvas, uint64_t txg, blkptr_t *hintbp, int flags)
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1774 {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1775 dva_t *dva = bp->blk_dva;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1776 dva_t *hintdva = hintbp->blk_dva;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1777 int error = 0;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1778
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1779 ASSERT(bp->blk_birth == 0);
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1780 ASSERT(BP_PHYSICAL_BIRTH(bp) == 0);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1781
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1782 spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1783
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1784 if (mc->mc_rotor == NULL) { /* no vdevs in this class */
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1785 spa_config_exit(spa, SCL_ALLOC, FTAG);
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1786 return (ENOSPC);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1787 }
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1788
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1789 ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1790 ASSERT(BP_GET_NDVAS(bp) == 0);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1791 ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1792
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1793 for (int d = 0; d < ndvas; d++) {
4527
5d5b6ba91b17 PSARC 2007/171 ZFS Separate Intent Log
perrin
parents: 3878
diff changeset
1794 error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva,
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1795 txg, flags);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1796 if (error) {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1797 for (d--; d >= 0; d--) {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1798 metaslab_free_dva(spa, &dva[d], txg, B_TRUE);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1799 bzero(&dva[d], sizeof (dva_t));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1800 }
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1801 spa_config_exit(spa, SCL_ALLOC, FTAG);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1802 return (error);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1803 }
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1804 }
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1805 ASSERT(error == 0);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1806 ASSERT(BP_GET_NDVAS(bp) == ndvas);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1807
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1808 spa_config_exit(spa, SCL_ALLOC, FTAG);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1809
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1810 BP_SET_BIRTH(bp, txg, txg);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1811
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1812 return (0);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1813 }
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1814
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1815 void
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1816 metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now)
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1817 {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1818 const dva_t *dva = bp->blk_dva;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1819 int ndvas = BP_GET_NDVAS(bp);
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1820
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1821 ASSERT(!BP_IS_HOLE(bp));
10922
e2081f502306 PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 10921
diff changeset
1822 ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa));
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1823
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1824 spa_config_enter(spa, SCL_FREE, FTAG, RW_READER);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1825
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1826 for (int d = 0; d < ndvas; d++)
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1827 metaslab_free_dva(spa, &dva[d], txg, now);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1828
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1829 spa_config_exit(spa, SCL_FREE, FTAG);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1830 }
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1831
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1832 int
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1833 metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1834 {
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1835 const dva_t *dva = bp->blk_dva;
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1836 int ndvas = BP_GET_NDVAS(bp);
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1837 int error = 0;
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1838
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1839 ASSERT(!BP_IS_HOLE(bp));
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1840
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1841 if (txg != 0) {
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1842 /*
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1843 * First do a dry run to make sure all DVAs are claimable,
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1844 * so we don't have to unwind from partial failures below.
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1845 */
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1846 if ((error = metaslab_claim(spa, bp, 0)) != 0)
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1847 return (error);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1848 }
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1849
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1850 spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1851
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1852 for (int d = 0; d < ndvas; d++)
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1853 if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0)
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1854 break;
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1855
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1856 spa_config_exit(spa, SCL_ALLOC, FTAG);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1857
7754
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1858 ASSERT(error == 0 || txg == 0);
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1859
b80e4842ad54 6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents: 5530
diff changeset
1860 return (error);
1807
35c8b566d7af 6410711 intent log blocks don't get invited to pool parties
bonwick
parents: 1775
diff changeset
1861 }