Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/metaslab.c @ 13966:0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
3579 ztest trips assertion in metaslab_weight()
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Richard Elling <richard.elling@dey-sys.com>
Approved by: Dan McDonald <danmcd@nexenta.com>
author | George Wilson <george.wilson@delphix.com> |
---|---|
date | Wed, 20 Feb 2013 13:30:36 -0800 |
parents | e03e14ddfb4c |
children | e4988c7d0403 |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1544 | 5 * Common Development and Distribution License (the "License"). |
6 * You may not use this file except in compliance with the License. | |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
13572
85c66b89d5f2
1909 disk sync write perf regression when slog is used post oi_148
George Wilson <george.wilson@delphix.com>
parents:
13379
diff
changeset
|
23 * Copyright (c) 2012 by Delphix. All rights reserved. |
13945
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. |
789 | 25 */ |
26 | |
27 #include <sys/zfs_context.h> | |
28 #include <sys/dmu.h> | |
29 #include <sys/dmu_tx.h> | |
30 #include <sys/space_map.h> | |
31 #include <sys/metaslab_impl.h> | |
32 #include <sys/vdev_impl.h> | |
33 #include <sys/zio.h> | |
34 | |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
35 /* |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
36 * Allow allocations to switch to gang blocks quickly. We do this to |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
37 * avoid having to load lots of space_maps in a given txg. There are, |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
38 * however, some cases where we want to avoid "fast" ganging and instead |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
39 * we want to do an exhaustive search of all metaslabs on this device. |
13572
85c66b89d5f2
1909 disk sync write perf regression when slog is used post oi_148
George Wilson <george.wilson@delphix.com>
parents:
13379
diff
changeset
|
40 * Currently we don't allow any gang, zil, or dump device related allocations |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
41 * to "fast" gang. |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
42 */ |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
43 #define CAN_FASTGANG(flags) \ |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
44 (!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \ |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
45 METASLAB_GANG_AVOID))) |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
46 |
2391 | 47 uint64_t metaslab_aliquot = 512ULL << 10; |
5530 | 48 uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ |
2391 | 49 |
789 | 50 /* |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
51 * The in-core space map representation is more compact than its on-disk form. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
52 * The zfs_condense_pct determines how much more compact the in-core |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
53 * space_map representation must be before we compact it on-disk. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
54 * Values should be greater than or equal to 100. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
55 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
56 int zfs_condense_pct = 200; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
57 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
58 /* |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
59 * This value defines the number of allowed allocation failures per vdev. |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
60 * If a device reaches this threshold in a given txg then we consider skipping |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
61 * allocations on that device. |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
62 */ |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
63 int zfs_mg_alloc_failures; |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
64 |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
65 /* |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
66 * Metaslab debugging: when set, keeps all space maps in core to verify frees. |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
67 */ |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
68 static int metaslab_debug = 0; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
69 |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
70 /* |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
71 * Minimum size which forces the dynamic allocator to change |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
72 * it's allocation strategy. Once the space map cannot satisfy |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
73 * an allocation of this size then it switches to using more |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
74 * aggressive strategy (i.e search by size rather than offset). |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
75 */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
76 uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
77 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
78 /* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
79 * The minimum free space, in percent, which must be available |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
80 * in a space map to continue allocations in a first-fit fashion. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
81 * Once the space_map's free space drops below this level we dynamically |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
82 * switch to using best-fit allocations. |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
83 */ |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
84 int metaslab_df_free_pct = 4; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
85 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
86 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
87 * A metaslab is considered "free" if it contains a contiguous |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
88 * segment which is greater than metaslab_min_alloc_size. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
89 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
90 uint64_t metaslab_min_alloc_size = DMU_MAX_ACCESS; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
91 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
92 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
93 * Max number of space_maps to prefetch. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
94 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
95 int metaslab_prefetch_limit = SPA_DVAS_PER_BP; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
96 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
97 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
98 * Percentage bonus multiplier for metaslabs that are in the bonus area. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
99 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
100 int metaslab_smo_bonus_pct = 150; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
101 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
102 /* |
13945
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
103 * Should we be willing to write data to degraded vdevs? |
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
104 */ |
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
105 boolean_t zfs_write_to_degraded = B_FALSE; |
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
106 |
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
107 /* |
789 | 108 * ========================================================================== |
109 * Metaslab classes | |
110 * ========================================================================== | |
111 */ | |
112 metaslab_class_t * | |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
113 metaslab_class_create(spa_t *spa, space_map_ops_t *ops) |
789 | 114 { |
115 metaslab_class_t *mc; | |
116 | |
117 mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); | |
118 | |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
119 mc->mc_spa = spa; |
789 | 120 mc->mc_rotor = NULL; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
121 mc->mc_ops = ops; |
789 | 122 |
123 return (mc); | |
124 } | |
125 | |
126 void | |
127 metaslab_class_destroy(metaslab_class_t *mc) | |
128 { | |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
129 ASSERT(mc->mc_rotor == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
130 ASSERT(mc->mc_alloc == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
131 ASSERT(mc->mc_deferred == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
132 ASSERT(mc->mc_space == 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
133 ASSERT(mc->mc_dspace == 0); |
789 | 134 |
135 kmem_free(mc, sizeof (metaslab_class_t)); | |
136 } | |
137 | |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
138 int |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
139 metaslab_class_validate(metaslab_class_t *mc) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
140 { |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
141 metaslab_group_t *mg; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
142 vdev_t *vd; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
143 |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
144 /* |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
145 * Must hold one of the spa_config locks. |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
146 */ |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
147 ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) || |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
148 spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER)); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
149 |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
150 if ((mg = mc->mc_rotor) == NULL) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
151 return (0); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
152 |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
153 do { |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
154 vd = mg->mg_vd; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
155 ASSERT(vd->vdev_mg != NULL); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
156 ASSERT3P(vd->vdev_top, ==, vd); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
157 ASSERT3P(mg->mg_class, ==, mc); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
158 ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
159 } while ((mg = mg->mg_next) != mc->mc_rotor); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
160 |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
161 return (0); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
162 } |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
163 |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
164 void |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
165 metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta, |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
166 int64_t defer_delta, int64_t space_delta, int64_t dspace_delta) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
167 { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
168 atomic_add_64(&mc->mc_alloc, alloc_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
169 atomic_add_64(&mc->mc_deferred, defer_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
170 atomic_add_64(&mc->mc_space, space_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
171 atomic_add_64(&mc->mc_dspace, dspace_delta); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
172 } |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
173 |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
174 uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
175 metaslab_class_get_alloc(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
176 { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
177 return (mc->mc_alloc); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
178 } |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
179 |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
180 uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
181 metaslab_class_get_deferred(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
182 { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
183 return (mc->mc_deferred); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
184 } |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
185 |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
186 uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
187 metaslab_class_get_space(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
188 { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
189 return (mc->mc_space); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
190 } |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
191 |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
192 uint64_t |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
193 metaslab_class_get_dspace(metaslab_class_t *mc) |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
194 { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
195 return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
196 } |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
197 |
789 | 198 /* |
199 * ========================================================================== | |
200 * Metaslab groups | |
201 * ========================================================================== | |
202 */ | |
203 static int | |
204 metaslab_compare(const void *x1, const void *x2) | |
205 { | |
206 const metaslab_t *m1 = x1; | |
207 const metaslab_t *m2 = x2; | |
208 | |
209 if (m1->ms_weight < m2->ms_weight) | |
210 return (1); | |
211 if (m1->ms_weight > m2->ms_weight) | |
212 return (-1); | |
213 | |
214 /* | |
215 * If the weights are identical, use the offset to force uniqueness. | |
216 */ | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
217 if (m1->ms_map->sm_start < m2->ms_map->sm_start) |
789 | 218 return (-1); |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
219 if (m1->ms_map->sm_start > m2->ms_map->sm_start) |
789 | 220 return (1); |
221 | |
222 ASSERT3P(m1, ==, m2); | |
223 | |
224 return (0); | |
225 } | |
226 | |
227 metaslab_group_t * | |
228 metaslab_group_create(metaslab_class_t *mc, vdev_t *vd) | |
229 { | |
230 metaslab_group_t *mg; | |
231 | |
232 mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP); | |
233 mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL); | |
234 avl_create(&mg->mg_metaslab_tree, metaslab_compare, | |
235 sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node)); | |
236 mg->mg_vd = vd; | |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
237 mg->mg_class = mc; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
238 mg->mg_activation_count = 0; |
789 | 239 |
240 return (mg); | |
241 } | |
242 | |
243 void | |
244 metaslab_group_destroy(metaslab_group_t *mg) | |
245 { | |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
246 ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
247 ASSERT(mg->mg_next == NULL); |
11026
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
248 /* |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
249 * We may have gone below zero with the activation count |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
250 * either because we never activated in the first place or |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
251 * because we're done, and possibly removing the vdev. |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
252 */ |
e8e10df16a8f
6899159 injection isn't trashing pools
Tim Haley <Tim.Haley@Sun.COM>
parents:
10974
diff
changeset
|
253 ASSERT(mg->mg_activation_count <= 0); |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
254 |
789 | 255 avl_destroy(&mg->mg_metaslab_tree); |
256 mutex_destroy(&mg->mg_lock); | |
257 kmem_free(mg, sizeof (metaslab_group_t)); | |
258 } | |
259 | |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
260 void |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
261 metaslab_group_activate(metaslab_group_t *mg) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
262 { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
263 metaslab_class_t *mc = mg->mg_class; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
264 metaslab_group_t *mgprev, *mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
265 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
266 ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER)); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
267 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
268 ASSERT(mc->mc_rotor != mg); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
269 ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
270 ASSERT(mg->mg_next == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
271 ASSERT(mg->mg_activation_count <= 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
272 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
273 if (++mg->mg_activation_count <= 0) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
274 return; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
275 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
276 mg->mg_aliquot = metaslab_aliquot * MAX(1, mg->mg_vd->vdev_children); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
277 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
278 if ((mgprev = mc->mc_rotor) == NULL) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
279 mg->mg_prev = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
280 mg->mg_next = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
281 } else { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
282 mgnext = mgprev->mg_next; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
283 mg->mg_prev = mgprev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
284 mg->mg_next = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
285 mgprev->mg_next = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
286 mgnext->mg_prev = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
287 } |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
288 mc->mc_rotor = mg; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
289 } |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
290 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
291 void |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
292 metaslab_group_passivate(metaslab_group_t *mg) |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
293 { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
294 metaslab_class_t *mc = mg->mg_class; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
295 metaslab_group_t *mgprev, *mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
296 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
297 ASSERT(spa_config_held(mc->mc_spa, SCL_ALLOC, RW_WRITER)); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
298 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
299 if (--mg->mg_activation_count != 0) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
300 ASSERT(mc->mc_rotor != mg); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
301 ASSERT(mg->mg_prev == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
302 ASSERT(mg->mg_next == NULL); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
303 ASSERT(mg->mg_activation_count < 0); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
304 return; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
305 } |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
306 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
307 mgprev = mg->mg_prev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
308 mgnext = mg->mg_next; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
309 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
310 if (mg == mgnext) { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
311 mc->mc_rotor = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
312 } else { |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
313 mc->mc_rotor = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
314 mgprev->mg_next = mgnext; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
315 mgnext->mg_prev = mgprev; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
316 } |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
317 |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
318 mg->mg_prev = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
319 mg->mg_next = NULL; |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
320 } |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
321 |
1732 | 322 static void |
323 metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp) | |
789 | 324 { |
325 mutex_enter(&mg->mg_lock); | |
326 ASSERT(msp->ms_group == NULL); | |
327 msp->ms_group = mg; | |
1732 | 328 msp->ms_weight = 0; |
789 | 329 avl_add(&mg->mg_metaslab_tree, msp); |
330 mutex_exit(&mg->mg_lock); | |
331 } | |
332 | |
1732 | 333 static void |
789 | 334 metaslab_group_remove(metaslab_group_t *mg, metaslab_t *msp) |
335 { | |
336 mutex_enter(&mg->mg_lock); | |
337 ASSERT(msp->ms_group == mg); | |
338 avl_remove(&mg->mg_metaslab_tree, msp); | |
339 msp->ms_group = NULL; | |
340 mutex_exit(&mg->mg_lock); | |
341 } | |
342 | |
1732 | 343 static void |
789 | 344 metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) |
345 { | |
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
346 /* |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
347 * Although in principle the weight can be any value, in |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
348 * practice we do not use values in the range [1, 510]. |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
349 */ |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
350 ASSERT(weight >= SPA_MINBLOCKSIZE-1 || weight == 0); |
1732 | 351 ASSERT(MUTEX_HELD(&msp->ms_lock)); |
352 | |
789 | 353 mutex_enter(&mg->mg_lock); |
354 ASSERT(msp->ms_group == mg); | |
355 avl_remove(&mg->mg_metaslab_tree, msp); | |
356 msp->ms_weight = weight; | |
357 avl_add(&mg->mg_metaslab_tree, msp); | |
358 mutex_exit(&mg->mg_lock); | |
359 } | |
360 | |
361 /* | |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
362 * ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
363 * Common allocator routines |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
364 * ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
365 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
366 static int |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
367 metaslab_segsize_compare(const void *x1, const void *x2) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
368 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
369 const space_seg_t *s1 = x1; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
370 const space_seg_t *s2 = x2; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
371 uint64_t ss_size1 = s1->ss_end - s1->ss_start; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
372 uint64_t ss_size2 = s2->ss_end - s2->ss_start; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
373 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
374 if (ss_size1 < ss_size2) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
375 return (-1); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
376 if (ss_size1 > ss_size2) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
377 return (1); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
378 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
379 if (s1->ss_start < s2->ss_start) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
380 return (-1); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
381 if (s1->ss_start > s2->ss_start) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
382 return (1); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
383 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
384 return (0); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
385 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
386 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
387 /* |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
388 * This is a helper function that can be used by the allocator to find |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
389 * a suitable block to allocate. This will search the specified AVL |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
390 * tree looking for a block that matches the specified criteria. |
789 | 391 */ |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
392 static uint64_t |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
393 metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
394 uint64_t align) |
789 | 395 { |
396 space_seg_t *ss, ssearch; | |
397 avl_index_t where; | |
398 | |
399 ssearch.ss_start = *cursor; | |
400 ssearch.ss_end = *cursor + size; | |
401 | |
402 ss = avl_find(t, &ssearch, &where); | |
403 if (ss == NULL) | |
404 ss = avl_nearest(t, where, AVL_AFTER); | |
405 | |
406 while (ss != NULL) { | |
407 uint64_t offset = P2ROUNDUP(ss->ss_start, align); | |
408 | |
409 if (offset + size <= ss->ss_end) { | |
410 *cursor = offset + size; | |
411 return (offset); | |
412 } | |
413 ss = AVL_NEXT(t, ss); | |
414 } | |
415 | |
1732 | 416 /* |
417 * If we know we've searched the whole map (*cursor == 0), give up. | |
418 * Otherwise, reset the cursor to the beginning and try again. | |
419 */ | |
420 if (*cursor == 0) | |
421 return (-1ULL); | |
422 | |
423 *cursor = 0; | |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
424 return (metaslab_block_picker(t, cursor, size, align)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
425 } |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
426 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
427 static void |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
428 metaslab_pp_load(space_map_t *sm) |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
429 { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
430 space_seg_t *ss; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
431 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
432 ASSERT(sm->sm_ppd == NULL); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
433 sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
434 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
435 sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
436 avl_create(sm->sm_pp_root, metaslab_segsize_compare, |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
437 sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
438 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
439 for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
440 avl_add(sm->sm_pp_root, ss); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
441 } |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
442 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
443 static void |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
444 metaslab_pp_unload(space_map_t *sm) |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
445 { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
446 void *cookie = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
447 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
448 kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
449 sm->sm_ppd = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
450 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
451 while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
452 /* tear down the tree */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
453 } |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
454 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
455 avl_destroy(sm->sm_pp_root); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
456 kmem_free(sm->sm_pp_root, sizeof (avl_tree_t)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
457 sm->sm_pp_root = NULL; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
458 } |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
459 |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
460 /* ARGSUSED */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
461 static void |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
462 metaslab_pp_claim(space_map_t *sm, uint64_t start, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
463 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
464 /* No need to update cursor */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
465 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
466 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
467 /* ARGSUSED */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
468 static void |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
469 metaslab_pp_free(space_map_t *sm, uint64_t start, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
470 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
471 /* No need to update cursor */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
472 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
473 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
474 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
475 * Return the maximum contiguous segment within the metaslab. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
476 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
477 uint64_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
478 metaslab_pp_maxsize(space_map_t *sm) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
479 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
480 avl_tree_t *t = sm->sm_pp_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
481 space_seg_t *ss; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
482 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
483 if (t == NULL || (ss = avl_last(t)) == NULL) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
484 return (0ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
485 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
486 return (ss->ss_end - ss->ss_start); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
487 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
488 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
489 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
490 * ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
491 * The first-fit block allocator |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
492 * ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
493 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
494 static uint64_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
495 metaslab_ff_alloc(space_map_t *sm, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
496 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
497 avl_tree_t *t = &sm->sm_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
498 uint64_t align = size & -size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
499 uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
500 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
501 return (metaslab_block_picker(t, cursor, size, align)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
502 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
503 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
504 /* ARGSUSED */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
505 boolean_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
506 metaslab_ff_fragmented(space_map_t *sm) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
507 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
508 return (B_TRUE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
509 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
510 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
511 static space_map_ops_t metaslab_ff_ops = { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
512 metaslab_pp_load, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
513 metaslab_pp_unload, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
514 metaslab_ff_alloc, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
515 metaslab_pp_claim, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
516 metaslab_pp_free, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
517 metaslab_pp_maxsize, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
518 metaslab_ff_fragmented |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
519 }; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
520 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
521 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
522 * ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
523 * Dynamic block allocator - |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
524 * Uses the first fit allocation scheme until space get low and then |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
525 * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
526 * and metaslab_df_free_pct to determine when to switch the allocation scheme. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
527 * ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
528 */ |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
529 static uint64_t |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
530 metaslab_df_alloc(space_map_t *sm, uint64_t size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
531 { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
532 avl_tree_t *t = &sm->sm_root; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
533 uint64_t align = size & -size; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
534 uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
535 uint64_t max_size = metaslab_pp_maxsize(sm); |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
536 int free_pct = sm->sm_space * 100 / sm->sm_size; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
537 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
538 ASSERT(MUTEX_HELD(sm->sm_lock)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
539 ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
540 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
541 if (max_size < size) |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
542 return (-1ULL); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
543 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
544 /* |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
545 * If we're running low on space switch to using the size |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
546 * sorted AVL tree (best-fit). |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
547 */ |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
548 if (max_size < metaslab_df_alloc_threshold || |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
549 free_pct < metaslab_df_free_pct) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
550 t = sm->sm_pp_root; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
551 *cursor = 0; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
552 } |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
553 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
554 return (metaslab_block_picker(t, cursor, size, 1ULL)); |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
555 } |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
556 |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
557 static boolean_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
558 metaslab_df_fragmented(space_map_t *sm) |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
559 { |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
560 uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
561 int free_pct = sm->sm_space * 100 / sm->sm_size; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
562 |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
563 if (max_size >= metaslab_df_alloc_threshold && |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
564 free_pct >= metaslab_df_free_pct) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
565 return (B_FALSE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
566 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
567 return (B_TRUE); |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
568 } |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
569 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
570 static space_map_ops_t metaslab_df_ops = { |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
571 metaslab_pp_load, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
572 metaslab_pp_unload, |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
573 metaslab_df_alloc, |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
574 metaslab_pp_claim, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
575 metaslab_pp_free, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
576 metaslab_pp_maxsize, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
577 metaslab_df_fragmented |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
578 }; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
579 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
580 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
581 * ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
582 * Other experimental allocators |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
583 * ========================================================================== |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
584 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
585 static uint64_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
586 metaslab_cdf_alloc(space_map_t *sm, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
587 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
588 avl_tree_t *t = &sm->sm_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
589 uint64_t *cursor = (uint64_t *)sm->sm_ppd; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
590 uint64_t *extent_end = (uint64_t *)sm->sm_ppd + 1; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
591 uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
592 uint64_t rsize = size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
593 uint64_t offset = 0; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
594 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
595 ASSERT(MUTEX_HELD(sm->sm_lock)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
596 ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
597 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
598 if (max_size < size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
599 return (-1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
600 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
601 ASSERT3U(*extent_end, >=, *cursor); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
602 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
603 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
604 * If we're running low on space switch to using the size |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
605 * sorted AVL tree (best-fit). |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
606 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
607 if ((*cursor + size) > *extent_end) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
608 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
609 t = sm->sm_pp_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
610 *cursor = *extent_end = 0; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
611 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
612 if (max_size > 2 * SPA_MAXBLOCKSIZE) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
613 rsize = MIN(metaslab_min_alloc_size, max_size); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
614 offset = metaslab_block_picker(t, extent_end, rsize, 1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
615 if (offset != -1) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
616 *cursor = offset + size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
617 } else { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
618 offset = metaslab_block_picker(t, cursor, rsize, 1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
619 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
620 ASSERT3U(*cursor, <=, *extent_end); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
621 return (offset); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
622 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
623 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
624 static boolean_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
625 metaslab_cdf_fragmented(space_map_t *sm) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
626 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
627 uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
628 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
629 if (max_size > (metaslab_min_alloc_size * 10)) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
630 return (B_FALSE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
631 return (B_TRUE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
632 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
633 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
634 static space_map_ops_t metaslab_cdf_ops = { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
635 metaslab_pp_load, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
636 metaslab_pp_unload, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
637 metaslab_cdf_alloc, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
638 metaslab_pp_claim, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
639 metaslab_pp_free, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
640 metaslab_pp_maxsize, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
641 metaslab_cdf_fragmented |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
642 }; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
643 |
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
644 uint64_t metaslab_ndf_clump_shift = 4; |
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
645 |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
646 static uint64_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
647 metaslab_ndf_alloc(space_map_t *sm, uint64_t size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
648 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
649 avl_tree_t *t = &sm->sm_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
650 avl_index_t where; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
651 space_seg_t *ss, ssearch; |
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
652 uint64_t hbit = highbit(size); |
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
653 uint64_t *cursor = (uint64_t *)sm->sm_ppd + hbit - 1; |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
654 uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
655 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
656 ASSERT(MUTEX_HELD(sm->sm_lock)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
657 ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
658 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
659 if (max_size < size) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
660 return (-1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
661 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
662 ssearch.ss_start = *cursor; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
663 ssearch.ss_end = *cursor + size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
664 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
665 ss = avl_find(t, &ssearch, &where); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
666 if (ss == NULL || (ss->ss_start + size > ss->ss_end)) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
667 t = sm->sm_pp_root; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
668 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
669 ssearch.ss_start = 0; |
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
670 ssearch.ss_end = MIN(max_size, |
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
671 1ULL << (hbit + metaslab_ndf_clump_shift)); |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
672 ss = avl_find(t, &ssearch, &where); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
673 if (ss == NULL) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
674 ss = avl_nearest(t, where, AVL_AFTER); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
675 ASSERT(ss != NULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
676 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
677 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
678 if (ss != NULL) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
679 if (ss->ss_start + size <= ss->ss_end) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
680 *cursor = ss->ss_start + size; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
681 return (ss->ss_start); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
682 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
683 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
684 return (-1ULL); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
685 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
686 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
687 static boolean_t |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
688 metaslab_ndf_fragmented(space_map_t *sm) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
689 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
690 uint64_t max_size = metaslab_pp_maxsize(sm); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
691 |
12047
7c1fcc8419ca
6917066 zfs block picking can be improved
Mark J Musante <Mark.Musante@Sun.COM>
parents:
11146
diff
changeset
|
692 if (max_size > (metaslab_min_alloc_size << metaslab_ndf_clump_shift)) |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
693 return (B_FALSE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
694 return (B_TRUE); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
695 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
696 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
697 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
698 static space_map_ops_t metaslab_ndf_ops = { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
699 metaslab_pp_load, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
700 metaslab_pp_unload, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
701 metaslab_ndf_alloc, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
702 metaslab_pp_claim, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
703 metaslab_pp_free, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
704 metaslab_pp_maxsize, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
705 metaslab_ndf_fragmented |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
706 }; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
707 |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
708 space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
709 |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
710 /* |
1732 | 711 * ========================================================================== |
712 * Metaslabs | |
713 * ========================================================================== | |
714 */ | |
715 metaslab_t * | |
716 metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, | |
717 uint64_t start, uint64_t size, uint64_t txg) | |
718 { | |
719 vdev_t *vd = mg->mg_vd; | |
720 metaslab_t *msp; | |
721 | |
722 msp = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP); | |
2856 | 723 mutex_init(&msp->ms_lock, NULL, MUTEX_DEFAULT, NULL); |
1732 | 724 |
725 msp->ms_smo_syncing = *smo; | |
726 | |
727 /* | |
728 * We create the main space map here, but we don't create the | |
729 * allocmaps and freemaps until metaslab_sync_done(). This serves | |
730 * two purposes: it allows metaslab_sync_done() to detect the | |
731 * addition of new space; and for debugging, it ensures that we'd | |
732 * data fault on any attempt to use this metaslab before it's ready. | |
733 */ | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
734 msp->ms_map = kmem_zalloc(sizeof (space_map_t), KM_SLEEP); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
735 space_map_create(msp->ms_map, start, size, |
1732 | 736 vd->vdev_ashift, &msp->ms_lock); |
737 | |
738 metaslab_group_add(mg, msp); | |
739 | |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
740 if (metaslab_debug && smo->smo_object != 0) { |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
741 mutex_enter(&msp->ms_lock); |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
742 VERIFY(space_map_load(msp->ms_map, mg->mg_class->mc_ops, |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
743 SM_FREE, smo, spa_meta_objset(vd->vdev_spa)) == 0); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
744 mutex_exit(&msp->ms_lock); |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
745 } |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
746 |
1732 | 747 /* |
748 * If we're opening an existing pool (txg == 0) or creating | |
749 * a new one (txg == TXG_INITIAL), all space is available now. | |
750 * If we're adding space to an existing pool, the new space | |
751 * does not become available until after this txg has synced. | |
752 */ | |
753 if (txg <= TXG_INITIAL) | |
754 metaslab_sync_done(msp, 0); | |
755 | |
756 if (txg != 0) { | |
757 vdev_dirty(vd, 0, NULL, txg); | |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
758 vdev_dirty(vd, VDD_METASLAB, msp, txg); |
789 | 759 } |
760 | |
1732 | 761 return (msp); |
762 } | |
763 | |
764 void | |
765 metaslab_fini(metaslab_t *msp) | |
766 { | |
767 metaslab_group_t *mg = msp->ms_group; | |
768 | |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
769 vdev_space_update(mg->mg_vd, |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
770 -msp->ms_smo.smo_alloc, 0, -msp->ms_map->sm_size); |
1732 | 771 |
772 metaslab_group_remove(mg, msp); | |
773 | |
774 mutex_enter(&msp->ms_lock); | |
775 | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
776 space_map_unload(msp->ms_map); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
777 space_map_destroy(msp->ms_map); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
778 kmem_free(msp->ms_map, sizeof (*msp->ms_map)); |
1732 | 779 |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
780 for (int t = 0; t < TXG_SIZE; t++) { |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
781 space_map_destroy(msp->ms_allocmap[t]); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
782 space_map_destroy(msp->ms_freemap[t]); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
783 kmem_free(msp->ms_allocmap[t], sizeof (*msp->ms_allocmap[t])); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
784 kmem_free(msp->ms_freemap[t], sizeof (*msp->ms_freemap[t])); |
1732 | 785 } |
786 | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
787 for (int t = 0; t < TXG_DEFER_SIZE; t++) { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
788 space_map_destroy(msp->ms_defermap[t]); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
789 kmem_free(msp->ms_defermap[t], sizeof (*msp->ms_defermap[t])); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
790 } |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
791 |
13805
e3a9ae14a119
3006 VERIFY[S,U,P] and ASSERT[S,U,P] frequently check if first argument is zero
Madhav Suresh <madhav.suresh@delphix.com>
parents:
13765
diff
changeset
|
792 ASSERT0(msp->ms_deferspace); |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
793 |
1732 | 794 mutex_exit(&msp->ms_lock); |
2856 | 795 mutex_destroy(&msp->ms_lock); |
1732 | 796 |
797 kmem_free(msp, sizeof (metaslab_t)); | |
789 | 798 } |
799 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
800 #define METASLAB_WEIGHT_PRIMARY (1ULL << 63) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
801 #define METASLAB_WEIGHT_SECONDARY (1ULL << 62) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
802 #define METASLAB_ACTIVE_MASK \ |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
803 (METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY) |
1732 | 804 |
789 | 805 static uint64_t |
1732 | 806 metaslab_weight(metaslab_t *msp) |
807 { | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
808 metaslab_group_t *mg = msp->ms_group; |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
809 space_map_t *sm = msp->ms_map; |
1732 | 810 space_map_obj_t *smo = &msp->ms_smo; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
811 vdev_t *vd = mg->mg_vd; |
1732 | 812 uint64_t weight, space; |
813 | |
814 ASSERT(MUTEX_HELD(&msp->ms_lock)); | |
815 | |
816 /* | |
13966
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
817 * This vdev is in the process of being removed so there is nothing |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
818 * for us to do here. |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
819 */ |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
820 if (vd->vdev_removing) { |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
821 ASSERT0(smo->smo_alloc); |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
822 ASSERT0(vd->vdev_ms_shift); |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
823 return (0); |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
824 } |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
825 |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
826 /* |
1732 | 827 * The baseline weight is the metaslab's free space. |
828 */ | |
829 space = sm->sm_size - smo->smo_alloc; | |
830 weight = space; | |
831 | |
832 /* | |
833 * Modern disks have uniform bit density and constant angular velocity. | |
834 * Therefore, the outer recording zones are faster (higher bandwidth) | |
835 * than the inner zones by the ratio of outer to inner track diameter, | |
836 * which is typically around 2:1. We account for this by assigning | |
837 * higher weight to lower metaslabs (multiplier ranging from 2x to 1x). | |
838 * In effect, this means that we'll select the metaslab with the most | |
839 * free bandwidth rather than simply the one with the most free space. | |
840 */ | |
841 weight = 2 * weight - | |
842 ((sm->sm_start >> vd->vdev_ms_shift) * weight) / vd->vdev_ms_count; | |
843 ASSERT(weight >= space && weight <= 2 * space); | |
844 | |
845 /* | |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
846 * For locality, assign higher weight to metaslabs which have |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
847 * a lower offset than what we've already activated. |
1732 | 848 */ |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
849 if (sm->sm_start <= mg->mg_bonus_area) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
850 weight *= (metaslab_smo_bonus_pct / 100); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
851 ASSERT(weight >= space && |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
852 weight <= 2 * (metaslab_smo_bonus_pct / 100) * space); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
853 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
854 if (sm->sm_loaded && !sm->sm_ops->smop_fragmented(sm)) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
855 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
856 * If this metaslab is one we're actively using, adjust its |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
857 * weight to make it preferable to any inactive metaslab so |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
858 * we'll polish it off. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
859 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
860 weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
861 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
862 return (weight); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
863 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
864 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
865 static void |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
866 metaslab_prefetch(metaslab_group_t *mg) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
867 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
868 spa_t *spa = mg->mg_vd->vdev_spa; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
869 metaslab_t *msp; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
870 avl_tree_t *t = &mg->mg_metaslab_tree; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
871 int m; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
872 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
873 mutex_enter(&mg->mg_lock); |
1732 | 874 |
875 /* | |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
876 * Prefetch the next potential metaslabs |
1732 | 877 */ |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
878 for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) { |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
879 space_map_t *sm = msp->ms_map; |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
880 space_map_obj_t *smo = &msp->ms_smo; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
881 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
882 /* If we have reached our prefetch limit then we're done */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
883 if (m >= metaslab_prefetch_limit) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
884 break; |
1732 | 885 |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
886 if (!sm->sm_loaded && smo->smo_object != 0) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
887 mutex_exit(&mg->mg_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
888 dmu_prefetch(spa_meta_objset(spa), smo->smo_object, |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
889 0ULL, smo->smo_objsize); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
890 mutex_enter(&mg->mg_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
891 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
892 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
893 mutex_exit(&mg->mg_lock); |
1732 | 894 } |
895 | |
896 static int | |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
897 metaslab_activate(metaslab_t *msp, uint64_t activation_weight) |
789 | 898 { |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
899 metaslab_group_t *mg = msp->ms_group; |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
900 space_map_t *sm = msp->ms_map; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
901 space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops; |
789 | 902 |
903 ASSERT(MUTEX_HELD(&msp->ms_lock)); | |
1732 | 904 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
905 if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
906 space_map_load_wait(sm); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
907 if (!sm->sm_loaded) { |
13879
4eac7a87eff2
3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb()
George Wilson <george.wilson@delphix.com>
parents:
13805
diff
changeset
|
908 space_map_obj_t *smo = &msp->ms_smo; |
4eac7a87eff2
3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb()
George Wilson <george.wilson@delphix.com>
parents:
13805
diff
changeset
|
909 |
4eac7a87eff2
3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb()
George Wilson <george.wilson@delphix.com>
parents:
13805
diff
changeset
|
910 int error = space_map_load(sm, sm_ops, SM_FREE, smo, |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
911 spa_meta_objset(msp->ms_group->mg_vd->vdev_spa)); |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
912 if (error) { |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
913 metaslab_group_sort(msp->ms_group, msp, 0); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
914 return (error); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
915 } |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
916 for (int t = 0; t < TXG_DEFER_SIZE; t++) |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
917 space_map_walk(msp->ms_defermap[t], |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
918 space_map_claim, sm); |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
919 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
920 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
921 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
922 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
923 * Track the bonus area as we activate new metaslabs. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
924 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
925 if (sm->sm_start > mg->mg_bonus_area) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
926 mutex_enter(&mg->mg_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
927 mg->mg_bonus_area = sm->sm_start; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
928 mutex_exit(&mg->mg_lock); |
1732 | 929 } |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
930 |
1732 | 931 metaslab_group_sort(msp->ms_group, msp, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
932 msp->ms_weight | activation_weight); |
1732 | 933 } |
934 ASSERT(sm->sm_loaded); | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
935 ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK); |
1732 | 936 |
937 return (0); | |
938 } | |
939 | |
940 static void | |
941 metaslab_passivate(metaslab_t *msp, uint64_t size) | |
942 { | |
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
943 /* |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
944 * If size < SPA_MINBLOCKSIZE, then we will not allocate from |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
945 * this metaslab again. In that case, it had better be empty, |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
946 * or we would be leaving space on the table. |
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
947 */ |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
948 ASSERT(size >= SPA_MINBLOCKSIZE || msp->ms_map->sm_space == 0); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
949 metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size)); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
950 ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0); |
1732 | 951 } |
952 | |
953 /* | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
954 * Determine if the in-core space map representation can be condensed on-disk. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
955 * We would like to use the following criteria to make our decision: |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
956 * |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
957 * 1. The size of the space map object should not dramatically increase as a |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
958 * result of writing out our in-core free map. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
959 * |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
960 * 2. The minimal on-disk space map representation is zfs_condense_pct/100 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
961 * times the size than the in-core representation (i.e. zfs_condense_pct = 110 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
962 * and in-core = 1MB, minimal = 1.1.MB). |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
963 * |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
964 * Checking the first condition is tricky since we don't want to walk |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
965 * the entire AVL tree calculating the estimated on-disk size. Instead we |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
966 * use the size-ordered AVL tree in the space map and calculate the |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
967 * size required for the largest segment in our in-core free map. If the |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
968 * size required to represent that segment on disk is larger than the space |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
969 * map object then we avoid condensing this map. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
970 * |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
971 * To determine the second criterion we use a best-case estimate and assume |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
972 * each segment can be represented on-disk as a single 64-bit entry. We refer |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
973 * to this best-case estimate as the space map's minimal form. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
974 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
975 static boolean_t |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
976 metaslab_should_condense(metaslab_t *msp) |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
977 { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
978 space_map_t *sm = msp->ms_map; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
979 space_map_obj_t *smo = &msp->ms_smo_syncing; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
980 space_seg_t *ss; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
981 uint64_t size, entries, segsz; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
982 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
983 ASSERT(MUTEX_HELD(&msp->ms_lock)); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
984 ASSERT(sm->sm_loaded); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
985 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
986 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
987 * Use the sm_pp_root AVL tree, which is ordered by size, to obtain |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
988 * the largest segment in the in-core free map. If the tree is |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
989 * empty then we should condense the map. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
990 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
991 ss = avl_last(sm->sm_pp_root); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
992 if (ss == NULL) |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
993 return (B_TRUE); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
994 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
995 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
996 * Calculate the number of 64-bit entries this segment would |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
997 * require when written to disk. If this single segment would be |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
998 * larger on-disk than the entire current on-disk structure, then |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
999 * clearly condensing will increase the on-disk structure size. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1000 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1001 size = (ss->ss_end - ss->ss_start) >> sm->sm_shift; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1002 entries = size / (MIN(size, SM_RUN_MAX)); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1003 segsz = entries * sizeof (uint64_t); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1004 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1005 return (segsz <= smo->smo_objsize && |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1006 smo->smo_objsize >= (zfs_condense_pct * |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1007 sizeof (uint64_t) * avl_numnodes(&sm->sm_root)) / 100); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1008 } |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1009 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1010 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1011 * Condense the on-disk space map representation to its minimized form. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1012 * The minimized form consists of a small number of allocations followed by |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1013 * the in-core free map. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1014 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1015 static void |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1016 metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx) |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1017 { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1018 spa_t *spa = msp->ms_group->mg_vd->vdev_spa; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1019 space_map_t *freemap = msp->ms_freemap[txg & TXG_MASK]; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1020 space_map_t condense_map; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1021 space_map_t *sm = msp->ms_map; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1022 objset_t *mos = spa_meta_objset(spa); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1023 space_map_obj_t *smo = &msp->ms_smo_syncing; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1024 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1025 ASSERT(MUTEX_HELD(&msp->ms_lock)); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1026 ASSERT3U(spa_sync_pass(spa), ==, 1); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1027 ASSERT(sm->sm_loaded); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1028 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1029 spa_dbgmsg(spa, "condensing: txg %llu, msp[%llu] %p, " |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1030 "smo size %llu, segments %lu", txg, |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1031 (msp->ms_map->sm_start / msp->ms_map->sm_size), msp, |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1032 smo->smo_objsize, avl_numnodes(&sm->sm_root)); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1033 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1034 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1035 * Create an map that is a 100% allocated map. We remove segments |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1036 * that have been freed in this txg, any deferred frees that exist, |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1037 * and any allocation in the future. Removing segments should be |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1038 * a relatively inexpensive operation since we expect these maps to |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1039 * a small number of nodes. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1040 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1041 space_map_create(&condense_map, sm->sm_start, sm->sm_size, |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1042 sm->sm_shift, sm->sm_lock); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1043 space_map_add(&condense_map, condense_map.sm_start, |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1044 condense_map.sm_size); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1045 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1046 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1047 * Remove what's been freed in this txg from the condense_map. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1048 * Since we're in sync_pass 1, we know that all the frees from |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1049 * this txg are in the freemap. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1050 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1051 space_map_walk(freemap, space_map_remove, &condense_map); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1052 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1053 for (int t = 0; t < TXG_DEFER_SIZE; t++) |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1054 space_map_walk(msp->ms_defermap[t], |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1055 space_map_remove, &condense_map); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1056 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1057 for (int t = 1; t < TXG_CONCURRENT_STATES; t++) |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1058 space_map_walk(msp->ms_allocmap[(txg + t) & TXG_MASK], |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1059 space_map_remove, &condense_map); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1060 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1061 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1062 * We're about to drop the metaslab's lock thus allowing |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1063 * other consumers to change it's content. Set the |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1064 * space_map's sm_condensing flag to ensure that |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1065 * allocations on this metaslab do not occur while we're |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1066 * in the middle of committing it to disk. This is only critical |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1067 * for the ms_map as all other space_maps use per txg |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1068 * views of their content. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1069 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1070 sm->sm_condensing = B_TRUE; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1071 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1072 mutex_exit(&msp->ms_lock); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1073 space_map_truncate(smo, mos, tx); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1074 mutex_enter(&msp->ms_lock); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1075 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1076 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1077 * While we would ideally like to create a space_map representation |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1078 * that consists only of allocation records, doing so can be |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1079 * prohibitively expensive because the in-core free map can be |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1080 * large, and therefore computationally expensive to subtract |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1081 * from the condense_map. Instead we sync out two maps, a cheap |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1082 * allocation only map followed by the in-core free map. While not |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1083 * optimal, this is typically close to optimal, and much cheaper to |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1084 * compute. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1085 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1086 space_map_sync(&condense_map, SM_ALLOC, smo, mos, tx); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1087 space_map_vacate(&condense_map, NULL, NULL); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1088 space_map_destroy(&condense_map); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1089 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1090 space_map_sync(sm, SM_FREE, smo, mos, tx); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1091 sm->sm_condensing = B_FALSE; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1092 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1093 spa_dbgmsg(spa, "condensed: txg %llu, msp[%llu] %p, " |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1094 "smo size %llu", txg, |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1095 (msp->ms_map->sm_start / msp->ms_map->sm_size), msp, |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1096 smo->smo_objsize); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1097 } |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1098 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1099 /* |
1732 | 1100 * Write a metaslab to disk in the context of the specified transaction group. |
1101 */ | |
1102 void | |
1103 metaslab_sync(metaslab_t *msp, uint64_t txg) | |
1104 { | |
1105 vdev_t *vd = msp->ms_group->mg_vd; | |
1106 spa_t *spa = vd->vdev_spa; | |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1107 objset_t *mos = spa_meta_objset(spa); |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1108 space_map_t *allocmap = msp->ms_allocmap[txg & TXG_MASK]; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1109 space_map_t **freemap = &msp->ms_freemap[txg & TXG_MASK]; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1110 space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1111 space_map_t *sm = msp->ms_map; |
1732 | 1112 space_map_obj_t *smo = &msp->ms_smo_syncing; |
1113 dmu_buf_t *db; | |
1114 dmu_tx_t *tx; | |
1115 | |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1116 ASSERT(!vd->vdev_ishole); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1117 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1118 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1119 * This metaslab has just been added so there's no work to do now. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1120 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1121 if (*freemap == NULL) { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1122 ASSERT3P(allocmap, ==, NULL); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1123 return; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1124 } |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1125 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1126 ASSERT3P(allocmap, !=, NULL); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1127 ASSERT3P(*freemap, !=, NULL); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1128 ASSERT3P(*freed_map, !=, NULL); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1129 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1130 if (allocmap->sm_space == 0 && (*freemap)->sm_space == 0) |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1131 return; |
1732 | 1132 |
1133 /* | |
1134 * The only state that can actually be changing concurrently with | |
1135 * metaslab_sync() is the metaslab's ms_map. No other thread can | |
1136 * be modifying this txg's allocmap, freemap, freed_map, or smo. | |
1137 * Therefore, we only hold ms_lock to satify space_map ASSERTs. | |
1138 * We drop it whenever we call into the DMU, because the DMU | |
1139 * can call down to us (e.g. via zio_free()) at any time. | |
1140 */ | |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1141 |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1142 tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg); |
1732 | 1143 |
1144 if (smo->smo_object == 0) { | |
1145 ASSERT(smo->smo_objsize == 0); | |
1146 ASSERT(smo->smo_alloc == 0); | |
1147 smo->smo_object = dmu_object_alloc(mos, | |
1148 DMU_OT_SPACE_MAP, 1 << SPACE_MAP_BLOCKSHIFT, | |
1149 DMU_OT_SPACE_MAP_HEADER, sizeof (*smo), tx); | |
1150 ASSERT(smo->smo_object != 0); | |
1151 dmu_write(mos, vd->vdev_ms_array, sizeof (uint64_t) * | |
1152 (sm->sm_start >> vd->vdev_ms_shift), | |
1153 sizeof (uint64_t), &smo->smo_object, tx); | |
1154 } | |
1155 | |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1156 mutex_enter(&msp->ms_lock); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1157 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1158 if (sm->sm_loaded && spa_sync_pass(spa) == 1 && |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1159 metaslab_should_condense(msp)) { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1160 metaslab_condense(msp, txg, tx); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1161 } else { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1162 space_map_sync(allocmap, SM_ALLOC, smo, mos, tx); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1163 space_map_sync(*freemap, SM_FREE, smo, mos, tx); |
789 | 1164 } |
1732 | 1165 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1166 space_map_vacate(allocmap, NULL, NULL); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1167 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1168 /* |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1169 * For sync pass 1, we avoid walking the entire space map and |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1170 * instead will just swap the pointers for freemap and |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1171 * freed_map. We can safely do this since the freed_map is |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1172 * guaranteed to be empty on the initial pass. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1173 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1174 if (spa_sync_pass(spa) == 1) { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1175 ASSERT0((*freed_map)->sm_space); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1176 ASSERT0(avl_numnodes(&(*freed_map)->sm_root)); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1177 space_map_swap(freemap, freed_map); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1178 } else { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1179 space_map_vacate(*freemap, space_map_add, *freed_map); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1180 } |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1181 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1182 ASSERT0(msp->ms_allocmap[txg & TXG_MASK]->sm_space); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1183 ASSERT0(msp->ms_freemap[txg & TXG_MASK]->sm_space); |
1732 | 1184 |
1185 mutex_exit(&msp->ms_lock); | |
1186 | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1187 VERIFY0(dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)); |
1732 | 1188 dmu_buf_will_dirty(db, tx); |
4944
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4527
diff
changeset
|
1189 ASSERT3U(db->db_size, >=, sizeof (*smo)); |
96d96f8de974
6569719 panic dangling dbufs (dn=ffffffff28814d30, dbuf=ffffffff20756008)
maybee
parents:
4527
diff
changeset
|
1190 bcopy(smo, db->db_data, sizeof (*smo)); |
1732 | 1191 dmu_buf_rele(db, FTAG); |
1192 | |
1193 dmu_tx_commit(tx); | |
1194 } | |
1195 | |
1196 /* | |
1197 * Called after a transaction group has completely synced to mark | |
1198 * all of the metaslab's free space as usable. | |
1199 */ | |
1200 void | |
1201 metaslab_sync_done(metaslab_t *msp, uint64_t txg) | |
1202 { | |
1203 space_map_obj_t *smo = &msp->ms_smo; | |
1204 space_map_obj_t *smosync = &msp->ms_smo_syncing; | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1205 space_map_t *sm = msp->ms_map; |
13966
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1206 space_map_t **freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1207 space_map_t **defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE]; |
1732 | 1208 metaslab_group_t *mg = msp->ms_group; |
1209 vdev_t *vd = mg->mg_vd; | |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1210 int64_t alloc_delta, defer_delta; |
1732 | 1211 |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1212 ASSERT(!vd->vdev_ishole); |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1213 |
1732 | 1214 mutex_enter(&msp->ms_lock); |
1215 | |
1216 /* | |
1217 * If this metaslab is just becoming available, initialize its | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1218 * allocmaps, freemaps, and defermap and add its capacity to the vdev. |
1732 | 1219 */ |
13966
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1220 if (*freed_map == NULL) { |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1221 ASSERT(*defer_map == NULL); |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1222 for (int t = 0; t < TXG_SIZE; t++) { |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1223 msp->ms_allocmap[t] = kmem_zalloc(sizeof (space_map_t), |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1224 KM_SLEEP); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1225 space_map_create(msp->ms_allocmap[t], sm->sm_start, |
1732 | 1226 sm->sm_size, sm->sm_shift, sm->sm_lock); |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1227 msp->ms_freemap[t] = kmem_zalloc(sizeof (space_map_t), |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1228 KM_SLEEP); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1229 space_map_create(msp->ms_freemap[t], sm->sm_start, |
1732 | 1230 sm->sm_size, sm->sm_shift, sm->sm_lock); |
1231 } | |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1232 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1233 for (int t = 0; t < TXG_DEFER_SIZE; t++) { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1234 msp->ms_defermap[t] = kmem_zalloc(sizeof (space_map_t), |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1235 KM_SLEEP); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1236 space_map_create(msp->ms_defermap[t], sm->sm_start, |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1237 sm->sm_size, sm->sm_shift, sm->sm_lock); |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1238 } |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1239 |
13966
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1240 freed_map = &msp->ms_freemap[TXG_CLEAN(txg) & TXG_MASK]; |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1241 defer_map = &msp->ms_defermap[txg % TXG_DEFER_SIZE]; |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1242 |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1243 vdev_space_update(vd, 0, 0, sm->sm_size); |
1732 | 1244 } |
1245 | |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1246 alloc_delta = smosync->smo_alloc - smo->smo_alloc; |
13966
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1247 defer_delta = (*freed_map)->sm_space - (*defer_map)->sm_space; |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1248 |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1249 vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0); |
1732 | 1250 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1251 ASSERT(msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1252 ASSERT(msp->ms_freemap[txg & TXG_MASK]->sm_space == 0); |
1732 | 1253 |
1254 /* | |
1255 * If there's a space_map_load() in progress, wait for it to complete | |
1256 * so that we have a consistent view of the in-core space map. | |
1257 */ | |
1258 space_map_load_wait(sm); | |
13966
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1259 |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1260 /* |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1261 * Move the frees from the defer_map to this map (if it's loaded). |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1262 * Swap the freed_map and the defer_map -- this is safe to do |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1263 * because we've just emptied out the defer_map. |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1264 */ |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1265 space_map_vacate(*defer_map, sm->sm_loaded ? space_map_free : NULL, sm); |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1266 ASSERT0((*defer_map)->sm_space); |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1267 ASSERT0(avl_numnodes(&(*defer_map)->sm_root)); |
0e1d84ebb004
3578 transferring the freed map to the defer map should be constant time
George Wilson <george.wilson@delphix.com>
parents:
13959
diff
changeset
|
1268 space_map_swap(freed_map, defer_map); |
1732 | 1269 |
1270 *smo = *smosync; | |
1271 | |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1272 msp->ms_deferspace += defer_delta; |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1273 ASSERT3S(msp->ms_deferspace, >=, 0); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1274 ASSERT3S(msp->ms_deferspace, <=, sm->sm_size); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1275 if (msp->ms_deferspace != 0) { |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1276 /* |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1277 * Keep syncing this metaslab until all deferred frees |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1278 * are back in circulation. |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1279 */ |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1280 vdev_dirty(vd, VDD_METASLAB, msp, txg + 1); |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1281 } |
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1282 |
1732 | 1283 /* |
1284 * If the map is loaded but no longer active, evict it as soon as all | |
1285 * future allocations have synced. (If we unloaded it now and then | |
1286 * loaded a moment later, the map wouldn't reflect those allocations.) | |
1287 */ | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1288 if (sm->sm_loaded && (msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { |
1732 | 1289 int evictable = 1; |
1290 | |
10921
8aac17999e4d
PSARC 2009/479 zpool recovery support
Tim Haley <Tim.Haley@Sun.COM>
parents:
10594
diff
changeset
|
1291 for (int t = 1; t < TXG_CONCURRENT_STATES; t++) |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1292 if (msp->ms_allocmap[(txg + t) & TXG_MASK]->sm_space) |
1732 | 1293 evictable = 0; |
1294 | |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1295 if (evictable && !metaslab_debug) |
1732 | 1296 space_map_unload(sm); |
1297 } | |
1298 | |
1299 metaslab_group_sort(mg, msp, metaslab_weight(msp)); | |
1300 | |
1301 mutex_exit(&msp->ms_lock); | |
789 | 1302 } |
1303 | |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1304 void |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1305 metaslab_sync_reassess(metaslab_group_t *mg) |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1306 { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1307 vdev_t *vd = mg->mg_vd; |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1308 int64_t failures = mg->mg_alloc_failures; |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1309 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1310 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1311 * Re-evaluate all metaslabs which have lower offsets than the |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1312 * bonus area. |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1313 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1314 for (int m = 0; m < vd->vdev_ms_count; m++) { |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1315 metaslab_t *msp = vd->vdev_ms[m]; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1316 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1317 if (msp->ms_map->sm_start > mg->mg_bonus_area) |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1318 break; |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1319 |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1320 mutex_enter(&msp->ms_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1321 metaslab_group_sort(mg, msp, metaslab_weight(msp)); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1322 mutex_exit(&msp->ms_lock); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1323 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1324 |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1325 atomic_add_64(&mg->mg_alloc_failures, -failures); |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1326 |
11146
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1327 /* |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1328 * Prefetch the next potential metaslabs |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1329 */ |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1330 metaslab_prefetch(mg); |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1331 } |
7e58f40bcb1c
6826241 Sync write IOPS drops dramatically during TXG sync
George Wilson <George.Wilson@Sun.COM>
parents:
11066
diff
changeset
|
1332 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1333 static uint64_t |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1334 metaslab_distance(metaslab_t *msp, dva_t *dva) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1335 { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1336 uint64_t ms_shift = msp->ms_group->mg_vd->vdev_ms_shift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1337 uint64_t offset = DVA_GET_OFFSET(dva) >> ms_shift; |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1338 uint64_t start = msp->ms_map->sm_start >> ms_shift; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1339 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1340 if (msp->ms_group->mg_vd->vdev_id != DVA_GET_VDEV(dva)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1341 return (1ULL << 63); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1342 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1343 if (offset < start) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1344 return ((start - offset) << ms_shift); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1345 if (offset > start) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1346 return ((offset - start) << ms_shift); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1347 return (0); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1348 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1349 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1350 static uint64_t |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1351 metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1352 uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags) |
789 | 1353 { |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1354 spa_t *spa = mg->mg_vd->vdev_spa; |
1732 | 1355 metaslab_t *msp = NULL; |
1356 uint64_t offset = -1ULL; | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1357 avl_tree_t *t = &mg->mg_metaslab_tree; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1358 uint64_t activation_weight; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1359 uint64_t target_distance; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1360 int i; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1361 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1362 activation_weight = METASLAB_WEIGHT_PRIMARY; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1363 for (i = 0; i < d; i++) { |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1364 if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) { |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1365 activation_weight = METASLAB_WEIGHT_SECONDARY; |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1366 break; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1367 } |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1368 } |
789 | 1369 |
1732 | 1370 for (;;) { |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1371 boolean_t was_active; |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1372 |
1732 | 1373 mutex_enter(&mg->mg_lock); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1374 for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1375 if (msp->ms_weight < asize) { |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1376 spa_dbgmsg(spa, "%s: failed to meet weight " |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1377 "requirement: vdev %llu, txg %llu, mg %p, " |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1378 "msp %p, psize %llu, asize %llu, " |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1379 "failures %llu, weight %llu", |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1380 spa_name(spa), mg->mg_vd->vdev_id, txg, |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1381 mg, msp, psize, asize, |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1382 mg->mg_alloc_failures, msp->ms_weight); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1383 mutex_exit(&mg->mg_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1384 return (-1ULL); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1385 } |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1386 was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1387 if (activation_weight == METASLAB_WEIGHT_PRIMARY) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1388 break; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1389 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1390 target_distance = min_distance + |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1391 (msp->ms_smo.smo_alloc ? 0 : min_distance >> 1); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1392 |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1393 for (i = 0; i < d; i++) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1394 if (metaslab_distance(msp, &dva[i]) < |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1395 target_distance) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1396 break; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1397 if (i == d) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1398 break; |
1732 | 1399 } |
1400 mutex_exit(&mg->mg_lock); | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1401 if (msp == NULL) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1402 return (-1ULL); |
789 | 1403 |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1404 /* |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1405 * If we've already reached the allowable number of failed |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1406 * allocation attempts on this metaslab group then we |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1407 * consider skipping it. We skip it only if we're allowed |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1408 * to "fast" gang, the physical size is larger than |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1409 * a gang block, and we're attempting to allocate from |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1410 * the primary metaslab. |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1411 */ |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1412 if (mg->mg_alloc_failures > zfs_mg_alloc_failures && |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1413 CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE && |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1414 activation_weight == METASLAB_WEIGHT_PRIMARY) { |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1415 spa_dbgmsg(spa, "%s: skipping metaslab group: " |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1416 "vdev %llu, txg %llu, mg %p, psize %llu, " |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1417 "asize %llu, failures %llu", spa_name(spa), |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1418 mg->mg_vd->vdev_id, txg, mg, psize, asize, |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1419 mg->mg_alloc_failures); |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1420 return (-1ULL); |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1421 } |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1422 |
789 | 1423 mutex_enter(&msp->ms_lock); |
1732 | 1424 |
3848
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1425 /* |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1426 * If this metaslab is currently condensing then pick again as |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1427 * we can't manipulate this metaslab until it's committed |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1428 * to disk. |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1429 */ |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1430 if (msp->ms_map->sm_condensing) { |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1431 mutex_exit(&msp->ms_lock); |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1432 continue; |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1433 } |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1434 |
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1435 /* |
3848
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1436 * Ensure that the metaslab we have selected is still |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1437 * capable of handling our request. It's possible that |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1438 * another thread may have changed the weight while we |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1439 * were blocked on the metaslab lock. |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1440 */ |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1441 if (msp->ms_weight < asize || (was_active && |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1442 !(msp->ms_weight & METASLAB_ACTIVE_MASK) && |
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1443 activation_weight == METASLAB_WEIGHT_PRIMARY)) { |
3848
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1444 mutex_exit(&msp->ms_lock); |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1445 continue; |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1446 } |
abf146257cf9
6495013 Loops and recursion in metaslab_ff_alloc can kill performance, even on a pool with lots of free data
gw25295
parents:
3713
diff
changeset
|
1447 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1448 if ((msp->ms_weight & METASLAB_WEIGHT_SECONDARY) && |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1449 activation_weight == METASLAB_WEIGHT_PRIMARY) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1450 metaslab_passivate(msp, |
2459
7511d9859fcd
6452923 really out of space panic even though ms_map.sm_space > 0
ahrens
parents:
2391
diff
changeset
|
1451 msp->ms_weight & ~METASLAB_ACTIVE_MASK); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1452 mutex_exit(&msp->ms_lock); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1453 continue; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1454 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1455 |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1456 if (metaslab_activate(msp, activation_weight) != 0) { |
789 | 1457 mutex_exit(&msp->ms_lock); |
1458 continue; | |
1459 } | |
1732 | 1460 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1461 if ((offset = space_map_alloc(msp->ms_map, asize)) != -1ULL) |
1732 | 1462 break; |
1463 | |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1464 atomic_inc_64(&mg->mg_alloc_failures); |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1465 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1466 metaslab_passivate(msp, space_map_maxsize(msp->ms_map)); |
1732 | 1467 |
789 | 1468 mutex_exit(&msp->ms_lock); |
1469 } | |
1470 | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1471 if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0) |
1732 | 1472 vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg); |
1473 | |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1474 space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, asize); |
1732 | 1475 |
1476 mutex_exit(&msp->ms_lock); | |
1477 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1478 return (offset); |
789 | 1479 } |
1480 | |
1481 /* | |
1482 * Allocate a block for the specified i/o. | |
1483 */ | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1484 static int |
4527 | 1485 metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1486 dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags) |
789 | 1487 { |
1488 metaslab_group_t *mg, *rotor; | |
1489 vdev_t *vd; | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1490 int dshift = 3; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1491 int all_zero; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1492 int zio_lock = B_FALSE; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1493 boolean_t allocatable; |
789 | 1494 uint64_t offset = -1ULL; |
1495 uint64_t asize; | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1496 uint64_t distance; |
789 | 1497 |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1498 ASSERT(!DVA_IS_VALID(&dva[d])); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1499 |
789 | 1500 /* |
5530 | 1501 * For testing, make some blocks above a certain size be gang blocks. |
1502 */ | |
11066
cebb50cbe4f9
PSARC/2009/396 Tickless Kernel Architecture / lbolt decoupling
Rafael Vanoni <rafael.vanoni@sun.com>
parents:
11026
diff
changeset
|
1503 if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0) |
5530 | 1504 return (ENOSPC); |
1505 | |
1506 /* | |
789 | 1507 * Start at the rotor and loop through all mgs until we find something. |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1508 * Note that there's no locking on mc_rotor or mc_aliquot because |
789 | 1509 * nothing actually breaks if we miss a few updates -- we just won't |
1510 * allocate quite as evenly. It all balances out over time. | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1511 * |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1512 * If we are doing ditto or log blocks, try to spread them across |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1513 * consecutive vdevs. If we're forced to reuse a vdev before we've |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1514 * allocated all of our ditto blocks, then try and spread them out on |
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1515 * that vdev as much as possible. If it turns out to not be possible, |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1516 * gradually lower our standards until anything becomes acceptable. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1517 * Also, allocating on consecutive vdevs (as opposed to random vdevs) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1518 * gives us hope of containing our fault domains to something we're |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1519 * able to reason about. Otherwise, any two top-level vdev failures |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1520 * will guarantee the loss of data. With consecutive allocation, |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1521 * only two adjacent top-level vdev failures will result in data loss. |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1522 * |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1523 * If we are doing gang blocks (hintdva is non-NULL), try to keep |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1524 * ourselves on the same vdev as our gang block header. That |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1525 * way, we can hope for locality in vdev_cache, plus it makes our |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1526 * fault domains something tractable. |
789 | 1527 */ |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1528 if (hintdva) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1529 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d])); |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1530 |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1531 /* |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1532 * It's possible the vdev we're using as the hint no |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1533 * longer exists (i.e. removed). Consult the rotor when |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1534 * all else fails. |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1535 */ |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1536 if (vd != NULL) { |
3063
b252896b372b
6341569 zio_alloc_blk() vdev distribution performs badly
perrin
parents:
2856
diff
changeset
|
1537 mg = vd->vdev_mg; |
10594
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1538 |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1539 if (flags & METASLAB_HINTBP_AVOID && |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1540 mg->mg_next != NULL) |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1541 mg = mg->mg_next; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1542 } else { |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1543 mg = mc->mc_rotor; |
986cb68d2347
6574286 removing a slog doesn't work
George Wilson <George.Wilson@Sun.COM>
parents:
9480
diff
changeset
|
1544 } |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1545 } else if (d != 0) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1546 vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1547 mg = vd->vdev_mg->mg_next; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1548 } else { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1549 mg = mc->mc_rotor; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1550 } |
4527 | 1551 |
1552 /* | |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1553 * If the hint put us into the wrong metaslab class, or into a |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1554 * metaslab group that has been passivated, just follow the rotor. |
4527 | 1555 */ |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1556 if (mg->mg_class != mc || mg->mg_activation_count <= 0) |
4527 | 1557 mg = mc->mc_rotor; |
1558 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1559 rotor = mg; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1560 top: |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1561 all_zero = B_TRUE; |
789 | 1562 do { |
10974
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1563 ASSERT(mg->mg_activation_count == 1); |
32d689ba6466
6897958 ASSERT in metaslab_class_space_update() with 8+ exabyte pool
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10922
diff
changeset
|
1564 |
789 | 1565 vd = mg->mg_vd; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1566 |
5329 | 1567 /* |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1568 * Don't allocate from faulted devices. |
5329 | 1569 */ |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1570 if (zio_lock) { |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1571 spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1572 allocatable = vdev_allocatable(vd); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1573 spa_config_exit(spa, SCL_ZIO, FTAG); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1574 } else { |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1575 allocatable = vdev_allocatable(vd); |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1576 } |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1577 if (!allocatable) |
5329 | 1578 goto next; |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1579 |
5329 | 1580 /* |
1581 * Avoid writing single-copy data to a failing vdev | |
13945
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
1582 * unless the user instructs us that it is okay. |
5329 | 1583 */ |
1584 if ((vd->vdev_stat.vs_write_errors > 0 || | |
1585 vd->vdev_state < VDEV_STATE_HEALTHY) && | |
13945
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
1586 d == 0 && dshift == 3 && |
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
1587 !(zfs_write_to_degraded && vd->vdev_state == |
7a9c1d41dfbe
3507 Tunable to allow block allocation even on degraded vdevs
Sašo Kiselkov <skiselkov@gmail.com>
parents:
13879
diff
changeset
|
1588 VDEV_STATE_DEGRADED)) { |
5329 | 1589 all_zero = B_FALSE; |
1590 goto next; | |
1591 } | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1592 |
4527 | 1593 ASSERT(mg->mg_class == mc); |
1594 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1595 distance = vd->vdev_asize >> dshift; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1596 if (distance <= (1ULL << vd->vdev_ms_shift)) |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1597 distance = 0; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1598 else |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1599 all_zero = B_FALSE; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1600 |
789 | 1601 asize = vdev_psize_to_asize(vd, psize); |
1602 ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); | |
1603 | |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1604 offset = metaslab_group_alloc(mg, psize, asize, txg, distance, |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1605 dva, d, flags); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1606 if (offset != -1ULL) { |
789 | 1607 /* |
1608 * If we've just selected this metaslab group, | |
1609 * figure out whether the corresponding vdev is | |
1610 * over- or under-used relative to the pool, | |
1611 * and set an allocation bias to even it out. | |
1612 */ | |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1613 if (mc->mc_aliquot == 0) { |
789 | 1614 vdev_stat_t *vs = &vd->vdev_stat; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1615 int64_t vu, cu; |
789 | 1616 |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1617 vu = (vs->vs_alloc * 100) / (vs->vs_space + 1); |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1618 cu = (mc->mc_alloc * 100) / (mc->mc_space + 1); |
789 | 1619 |
1620 /* | |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1621 * Calculate how much more or less we should |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1622 * try to allocate from this device during |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1623 * this iteration around the rotor. |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1624 * For example, if a device is 80% full |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1625 * and the pool is 20% full then we should |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1626 * reduce allocations by 60% on this device. |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1627 * |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1628 * mg_bias = (20 - 80) * 512K / 100 = -307K |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1629 * |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1630 * This reduces allocations by 307K for this |
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1631 * iteration. |
789 | 1632 */ |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1633 mg->mg_bias = ((cu - vu) * |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1634 (int64_t)mg->mg_aliquot) / 100; |
789 | 1635 } |
1636 | |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1637 if (atomic_add_64_nv(&mc->mc_aliquot, asize) >= |
789 | 1638 mg->mg_aliquot + mg->mg_bias) { |
1639 mc->mc_rotor = mg->mg_next; | |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1640 mc->mc_aliquot = 0; |
789 | 1641 } |
1642 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1643 DVA_SET_VDEV(&dva[d], vd->vdev_id); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1644 DVA_SET_OFFSET(&dva[d], offset); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1645 DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER)); |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1646 DVA_SET_ASIZE(&dva[d], asize); |
789 | 1647 |
1648 return (0); | |
1649 } | |
5329 | 1650 next: |
789 | 1651 mc->mc_rotor = mg->mg_next; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1652 mc->mc_aliquot = 0; |
789 | 1653 } while ((mg = mg->mg_next) != rotor); |
1654 | |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1655 if (!all_zero) { |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1656 dshift++; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1657 ASSERT(dshift < 64); |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1658 goto top; |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1659 } |
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1660 |
9480
fcff33da767f
6596237 Stop looking and start ganging
George Wilson <George.Wilson@Sun.COM>
parents:
8241
diff
changeset
|
1661 if (!allocatable && !zio_lock) { |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1662 dshift = 3; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1663 zio_lock = B_TRUE; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1664 goto top; |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1665 } |
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1666 |
1775
e51e26b432c0
6410698 ZFS metadata needs to be more highly replicated (ditto blocks)
billm
parents:
1732
diff
changeset
|
1667 bzero(&dva[d], sizeof (dva_t)); |
789 | 1668 |
1669 return (ENOSPC); | |
1670 } | |
1671 | |
1672 /* | |
1673 * Free the block represented by DVA in the context of the specified | |
1674 * transaction group. | |
1675 */ | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1676 static void |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1677 metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now) |
789 | 1678 { |
1679 uint64_t vdev = DVA_GET_VDEV(dva); | |
1680 uint64_t offset = DVA_GET_OFFSET(dva); | |
1681 uint64_t size = DVA_GET_ASIZE(dva); | |
1682 vdev_t *vd; | |
1683 metaslab_t *msp; | |
1684 | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1685 ASSERT(DVA_IS_VALID(dva)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1686 |
789 | 1687 if (txg > spa_freeze_txg(spa)) |
1688 return; | |
1689 | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1690 if ((vd = vdev_lookup_top(spa, vdev)) == NULL || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1691 (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1692 cmn_err(CE_WARN, "metaslab_free_dva(): bad DVA %llu:%llu", |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1693 (u_longlong_t)vdev, (u_longlong_t)offset); |
789 | 1694 ASSERT(0); |
1695 return; | |
1696 } | |
1697 | |
1698 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; | |
1699 | |
1700 if (DVA_GET_GANG(dva)) | |
1701 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); | |
1702 | |
1703 mutex_enter(&msp->ms_lock); | |
1704 | |
1732 | 1705 if (now) { |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1706 space_map_remove(msp->ms_allocmap[txg & TXG_MASK], |
1732 | 1707 offset, size); |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1708 space_map_free(msp->ms_map, offset, size); |
1732 | 1709 } else { |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1710 if (msp->ms_freemap[txg & TXG_MASK]->sm_space == 0) |
1732 | 1711 vdev_dirty(vd, VDD_METASLAB, msp, txg); |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1712 space_map_add(msp->ms_freemap[txg & TXG_MASK], offset, size); |
789 | 1713 } |
1714 | |
1715 mutex_exit(&msp->ms_lock); | |
1716 } | |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1717 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1718 /* |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1719 * Intent log support: upon opening the pool after a crash, notify the SPA |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1720 * of blocks that the intent log has allocated for immediate write, but |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1721 * which are still considered free by the SPA because the last transaction |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1722 * group didn't commit yet. |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1723 */ |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1724 static int |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1725 metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1726 { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1727 uint64_t vdev = DVA_GET_VDEV(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1728 uint64_t offset = DVA_GET_OFFSET(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1729 uint64_t size = DVA_GET_ASIZE(dva); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1730 vdev_t *vd; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1731 metaslab_t *msp; |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1732 int error = 0; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1733 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1734 ASSERT(DVA_IS_VALID(dva)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1735 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1736 if ((vd = vdev_lookup_top(spa, vdev)) == NULL || |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1737 (offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1738 return (ENXIO); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1739 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1740 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift]; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1741 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1742 if (DVA_GET_GANG(dva)) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1743 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1744 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1745 mutex_enter(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1746 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1747 if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map->sm_loaded) |
13379
4df42cc92254
1051 zfs should handle imbalanced luns
George Wilson <George.Wilson@delphix.com>
parents:
12047
diff
changeset
|
1748 error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY); |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1749 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1750 if (error == 0 && !space_map_contains(msp->ms_map, offset, size)) |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1751 error = ENOENT; |
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1752 |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1753 if (error || txg == 0) { /* txg == 0 indicates dry run */ |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1754 mutex_exit(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1755 return (error); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1756 } |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1757 |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1758 space_map_claim(msp->ms_map, offset, size); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1759 |
8241
5a60f16123ba
6328632 zpool offline is a bit too conservative
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
7980
diff
changeset
|
1760 if (spa_writeable(spa)) { /* don't dirty if we're zdb(1M) */ |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1761 if (msp->ms_allocmap[txg & TXG_MASK]->sm_space == 0) |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1762 vdev_dirty(vd, VDD_METASLAB, msp, txg); |
13959
e03e14ddfb4c
3552 condensing one space map burns 3 seconds of CPU in spa_sync() thread
George Wilson <george.wilson@delphix.com>
parents:
13945
diff
changeset
|
1763 space_map_add(msp->ms_allocmap[txg & TXG_MASK], offset, size); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1764 } |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1765 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1766 mutex_exit(&msp->ms_lock); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1767 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1768 return (0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1769 } |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1770 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1771 int |
4527 | 1772 metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1773 int ndvas, uint64_t txg, blkptr_t *hintbp, int flags) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1774 { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1775 dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1776 dva_t *hintdva = hintbp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1777 int error = 0; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1778 |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1779 ASSERT(bp->blk_birth == 0); |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1780 ASSERT(BP_PHYSICAL_BIRTH(bp) == 0); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1781 |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1782 spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1783 |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1784 if (mc->mc_rotor == NULL) { /* no vdevs in this class */ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1785 spa_config_exit(spa, SCL_ALLOC, FTAG); |
4527 | 1786 return (ENOSPC); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1787 } |
4527 | 1788 |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1789 ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1790 ASSERT(BP_GET_NDVAS(bp) == 0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1791 ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1792 |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1793 for (int d = 0; d < ndvas; d++) { |
4527 | 1794 error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva, |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1795 txg, flags); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1796 if (error) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1797 for (d--; d >= 0; d--) { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1798 metaslab_free_dva(spa, &dva[d], txg, B_TRUE); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1799 bzero(&dva[d], sizeof (dva_t)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1800 } |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1801 spa_config_exit(spa, SCL_ALLOC, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1802 return (error); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1803 } |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1804 } |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1805 ASSERT(error == 0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1806 ASSERT(BP_GET_NDVAS(bp) == ndvas); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1807 |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1808 spa_config_exit(spa, SCL_ALLOC, FTAG); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1809 |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1810 BP_SET_BIRTH(bp, txg, txg); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1811 |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1812 return (0); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1813 } |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1814 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1815 void |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1816 metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1817 { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1818 const dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1819 int ndvas = BP_GET_NDVAS(bp); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1820 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1821 ASSERT(!BP_IS_HOLE(bp)); |
10922
e2081f502306
PSARC 2009/571 ZFS Deduplication Properties
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
10921
diff
changeset
|
1822 ASSERT(!now || bp->blk_birth >= spa_syncing_txg(spa)); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1823 |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1824 spa_config_enter(spa, SCL_FREE, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1825 |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1826 for (int d = 0; d < ndvas; d++) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1827 metaslab_free_dva(spa, &dva[d], txg, now); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1828 |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1829 spa_config_exit(spa, SCL_FREE, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1830 } |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1831 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1832 int |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1833 metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg) |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1834 { |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1835 const dva_t *dva = bp->blk_dva; |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1836 int ndvas = BP_GET_NDVAS(bp); |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1837 int error = 0; |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1838 |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1839 ASSERT(!BP_IS_HOLE(bp)); |
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1840 |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1841 if (txg != 0) { |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1842 /* |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1843 * First do a dry run to make sure all DVAs are claimable, |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1844 * so we don't have to unwind from partial failures below. |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1845 */ |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1846 if ((error = metaslab_claim(spa, bp, 0)) != 0) |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1847 return (error); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1848 } |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1849 |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1850 spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1851 |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1852 for (int d = 0; d < ndvas; d++) |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1853 if ((error = metaslab_claim_dva(spa, &dva[d], txg)) != 0) |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1854 break; |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1855 |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1856 spa_config_exit(spa, SCL_ALLOC, FTAG); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1857 |
7754
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1858 ASSERT(error == 0 || txg == 0); |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1859 |
b80e4842ad54
6754011 SPA 3.0: lock breakup, i/o pipeline refactoring, device failure handling
Jeff Bonwick <Jeff.Bonwick@Sun.COM>
parents:
5530
diff
changeset
|
1860 return (error); |
1807
35c8b566d7af
6410711 intent log blocks don't get invited to pool parties
bonwick
parents:
1775
diff
changeset
|
1861 } |