Mercurial > illumos > illumos-gate
annotate usr/src/uts/common/fs/zfs/zfs_vfsops.c @ 4577:ed36b0e652bc
PSARC/2007/328 zfs upgrade
6552536 'zpool status -v' doesn't work with new (type-bearing) directory entries
6559635 can not import pool whose front labels are gone
6572636 need "zfs upgrade" to change ZPL version number
6572637 store object type in directory entries.
6572648 ZPL's delete queue should not be processed if the filesystem is mounted read-only
6572650 ZFS_VERSION should be SPA_VERSION for readability
author | ahrens |
---|---|
date | Fri, 29 Jun 2007 16:23:19 -0700 |
parents | 12bb2876a62e |
children | 3aa6e3b5dfca |
rev | line source |
---|---|
789 | 1 /* |
2 * CDDL HEADER START | |
3 * | |
4 * The contents of this file are subject to the terms of the | |
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
5 * Common Development and Distribution License (the "License"). |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
6 * You may not use this file except in compliance with the License. |
789 | 7 * |
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 * or http://www.opensolaris.org/os/licensing. | |
10 * See the License for the specific language governing permissions | |
11 * and limitations under the License. | |
12 * | |
13 * When distributing Covered Code, include this CDDL HEADER in each | |
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 * If applicable, add the following below this CDDL HEADER, with the | |
16 * fields enclosed by brackets "[]" replaced with your own identifying | |
17 * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 * | |
19 * CDDL HEADER END | |
20 */ | |
21 /* | |
3461 | 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. |
789 | 23 * Use is subject to license terms. |
24 */ | |
25 | |
3246
7a46b8f56ee0
6351954 zfs missing noxattr mount flag (fix keywords)
ck153898
parents:
3234
diff
changeset
|
26 #pragma ident "%Z%%M% %I% %E% SMI" |
789 | 27 |
28 #include <sys/types.h> | |
29 #include <sys/param.h> | |
30 #include <sys/systm.h> | |
31 #include <sys/sysmacros.h> | |
32 #include <sys/kmem.h> | |
33 #include <sys/pathname.h> | |
34 #include <sys/vnode.h> | |
35 #include <sys/vfs.h> | |
3898
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
36 #include <sys/vfs_opreg.h> |
789 | 37 #include <sys/mntent.h> |
38 #include <sys/mount.h> | |
39 #include <sys/cmn_err.h> | |
40 #include "fs/fs_subr.h" | |
41 #include <sys/zfs_znode.h> | |
3461 | 42 #include <sys/zfs_dir.h> |
789 | 43 #include <sys/zil.h> |
44 #include <sys/fs/zfs.h> | |
45 #include <sys/dmu.h> | |
46 #include <sys/dsl_prop.h> | |
3912 | 47 #include <sys/dsl_dataset.h> |
4543 | 48 #include <sys/dsl_deleg.h> |
789 | 49 #include <sys/spa.h> |
50 #include <sys/zap.h> | |
51 #include <sys/varargs.h> | |
52 #include <sys/policy.h> | |
53 #include <sys/atomic.h> | |
54 #include <sys/mkdev.h> | |
55 #include <sys/modctl.h> | |
4543 | 56 #include <sys/refstr.h> |
789 | 57 #include <sys/zfs_ioctl.h> |
58 #include <sys/zfs_ctldir.h> | |
1544 | 59 #include <sys/bootconf.h> |
849
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
60 #include <sys/sunddi.h> |
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
61 #include <sys/dnlc.h> |
789 | 62 |
63 int zfsfstype; | |
64 vfsops_t *zfs_vfsops = NULL; | |
849
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
65 static major_t zfs_major; |
789 | 66 static minor_t zfs_minor; |
67 static kmutex_t zfs_dev_mtx; | |
68 | |
69 static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); | |
70 static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); | |
1544 | 71 static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); |
789 | 72 static int zfs_root(vfs_t *vfsp, vnode_t **vpp); |
73 static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); | |
74 static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); | |
75 static void zfs_freevfs(vfs_t *vfsp); | |
76 static void zfs_objset_close(zfsvfs_t *zfsvfs); | |
77 | |
78 static const fs_operation_def_t zfs_vfsops_template[] = { | |
3898
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
79 VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
80 VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
81 VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
82 VFSNAME_ROOT, { .vfs_root = zfs_root }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
83 VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
84 VFSNAME_SYNC, { .vfs_sync = zfs_sync }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
85 VFSNAME_VGET, { .vfs_vget = zfs_vget }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
86 VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
87 NULL, NULL |
789 | 88 }; |
89 | |
90 static const fs_operation_def_t zfs_vfsops_eio_template[] = { | |
3898
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
91 VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, |
c788126f2a20
PSARC/2007/124 Strong Type-Checking for VFS Operation Registration Mechanism
rsb
parents:
3461
diff
changeset
|
92 NULL, NULL |
789 | 93 }; |
94 | |
95 /* | |
96 * We need to keep a count of active fs's. | |
97 * This is necessary to prevent our module | |
98 * from being unloaded after a umount -f | |
99 */ | |
100 static uint32_t zfs_active_fs_count = 0; | |
101 | |
102 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; | |
103 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; | |
3234 | 104 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; |
105 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; | |
789 | 106 |
3234 | 107 /* |
108 * MNTOPT_DEFAULT was removed from MNTOPT_XATTR, since the | |
109 * default value is now determined by the xattr property. | |
110 */ | |
789 | 111 static mntopt_t mntopts[] = { |
3234 | 112 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, |
113 { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, | |
789 | 114 { MNTOPT_NOATIME, noatime_cancel, NULL, MO_DEFAULT, NULL }, |
115 { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } | |
116 }; | |
117 | |
118 static mntopts_t zfs_mntopts = { | |
119 sizeof (mntopts) / sizeof (mntopt_t), | |
120 mntopts | |
121 }; | |
122 | |
123 /*ARGSUSED*/ | |
124 int | |
125 zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) | |
126 { | |
127 /* | |
128 * Data integrity is job one. We don't want a compromised kernel | |
129 * writing to the storage pool, so we never sync during panic. | |
130 */ | |
131 if (panicstr) | |
132 return (0); | |
133 | |
134 /* | |
135 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS | |
136 * to sync metadata, which they would otherwise cache indefinitely. | |
137 * Semantically, the only requirement is that the sync be initiated. | |
138 * The DMU syncs out txgs frequently, so there's nothing to do. | |
139 */ | |
140 if (flag & SYNC_ATTR) | |
141 return (0); | |
142 | |
143 if (vfsp != NULL) { | |
144 /* | |
145 * Sync a specific filesystem. | |
146 */ | |
147 zfsvfs_t *zfsvfs = vfsp->vfs_data; | |
148 | |
149 ZFS_ENTER(zfsvfs); | |
150 if (zfsvfs->z_log != NULL) | |
2638
4f583dfeae92
6413510 zfs: writing to ZFS filesystem slows down fsync() on other files in the same FS
perrin
parents:
2474
diff
changeset
|
151 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); |
789 | 152 else |
153 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); | |
154 ZFS_EXIT(zfsvfs); | |
155 } else { | |
156 /* | |
157 * Sync all ZFS filesystems. This is what happens when you | |
158 * run sync(1M). Unlike other filesystems, ZFS honors the | |
159 * request by waiting for all pools to commit all dirty data. | |
160 */ | |
161 spa_sync_allpools(); | |
162 } | |
163 | |
164 return (0); | |
165 } | |
166 | |
1544 | 167 static int |
168 zfs_create_unique_device(dev_t *dev) | |
169 { | |
170 major_t new_major; | |
171 | |
172 do { | |
173 ASSERT3U(zfs_minor, <=, MAXMIN32); | |
174 minor_t start = zfs_minor; | |
175 do { | |
176 mutex_enter(&zfs_dev_mtx); | |
177 if (zfs_minor >= MAXMIN32) { | |
178 /* | |
179 * If we're still using the real major | |
180 * keep out of /dev/zfs and /dev/zvol minor | |
181 * number space. If we're using a getudev()'ed | |
182 * major number, we can use all of its minors. | |
183 */ | |
184 if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) | |
185 zfs_minor = ZFS_MIN_MINOR; | |
186 else | |
187 zfs_minor = 0; | |
188 } else { | |
189 zfs_minor++; | |
190 } | |
191 *dev = makedevice(zfs_major, zfs_minor); | |
192 mutex_exit(&zfs_dev_mtx); | |
193 } while (vfs_devismounted(*dev) && zfs_minor != start); | |
194 if (zfs_minor == start) { | |
195 /* | |
196 * We are using all ~262,000 minor numbers for the | |
197 * current major number. Create a new major number. | |
198 */ | |
199 if ((new_major = getudev()) == (major_t)-1) { | |
200 cmn_err(CE_WARN, | |
201 "zfs_mount: Can't get unique major " | |
202 "device number."); | |
203 return (-1); | |
204 } | |
205 mutex_enter(&zfs_dev_mtx); | |
206 zfs_major = new_major; | |
207 zfs_minor = 0; | |
208 | |
209 mutex_exit(&zfs_dev_mtx); | |
210 } else { | |
211 break; | |
212 } | |
213 /* CONSTANTCONDITION */ | |
214 } while (1); | |
215 | |
216 return (0); | |
217 } | |
218 | |
789 | 219 static void |
220 atime_changed_cb(void *arg, uint64_t newval) | |
221 { | |
222 zfsvfs_t *zfsvfs = arg; | |
223 | |
224 if (newval == TRUE) { | |
225 zfsvfs->z_atime = TRUE; | |
226 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); | |
227 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); | |
228 } else { | |
229 zfsvfs->z_atime = FALSE; | |
230 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); | |
231 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); | |
232 } | |
233 } | |
234 | |
235 static void | |
3234 | 236 xattr_changed_cb(void *arg, uint64_t newval) |
237 { | |
238 zfsvfs_t *zfsvfs = arg; | |
239 | |
240 if (newval == TRUE) { | |
241 /* XXX locking on vfs_flag? */ | |
242 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; | |
243 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); | |
244 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); | |
245 } else { | |
246 /* XXX locking on vfs_flag? */ | |
247 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; | |
248 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); | |
249 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); | |
250 } | |
251 } | |
252 | |
253 static void | |
789 | 254 blksz_changed_cb(void *arg, uint64_t newval) |
255 { | |
256 zfsvfs_t *zfsvfs = arg; | |
257 | |
258 if (newval < SPA_MINBLOCKSIZE || | |
259 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) | |
260 newval = SPA_MAXBLOCKSIZE; | |
261 | |
262 zfsvfs->z_max_blksz = newval; | |
263 zfsvfs->z_vfs->vfs_bsize = newval; | |
264 } | |
265 | |
266 static void | |
267 readonly_changed_cb(void *arg, uint64_t newval) | |
268 { | |
269 zfsvfs_t *zfsvfs = arg; | |
270 | |
271 if (newval) { | |
272 /* XXX locking on vfs_flag? */ | |
273 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; | |
274 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); | |
275 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); | |
276 } else { | |
277 /* XXX locking on vfs_flag? */ | |
278 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; | |
279 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); | |
280 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); | |
281 } | |
282 } | |
283 | |
284 static void | |
285 devices_changed_cb(void *arg, uint64_t newval) | |
286 { | |
287 zfsvfs_t *zfsvfs = arg; | |
288 | |
289 if (newval == FALSE) { | |
290 zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; | |
291 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); | |
292 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); | |
293 } else { | |
294 zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; | |
295 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); | |
296 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); | |
297 } | |
298 } | |
299 | |
300 static void | |
301 setuid_changed_cb(void *arg, uint64_t newval) | |
302 { | |
303 zfsvfs_t *zfsvfs = arg; | |
304 | |
305 if (newval == FALSE) { | |
306 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; | |
307 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); | |
308 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); | |
309 } else { | |
310 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; | |
311 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); | |
312 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); | |
313 } | |
314 } | |
315 | |
316 static void | |
317 exec_changed_cb(void *arg, uint64_t newval) | |
318 { | |
319 zfsvfs_t *zfsvfs = arg; | |
320 | |
321 if (newval == FALSE) { | |
322 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; | |
323 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); | |
324 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); | |
325 } else { | |
326 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; | |
327 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); | |
328 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); | |
329 } | |
330 } | |
331 | |
332 static void | |
333 snapdir_changed_cb(void *arg, uint64_t newval) | |
334 { | |
335 zfsvfs_t *zfsvfs = arg; | |
336 | |
337 zfsvfs->z_show_ctldir = newval; | |
338 } | |
339 | |
340 static void | |
341 acl_mode_changed_cb(void *arg, uint64_t newval) | |
342 { | |
343 zfsvfs_t *zfsvfs = arg; | |
344 | |
345 zfsvfs->z_acl_mode = newval; | |
346 } | |
347 | |
348 static void | |
349 acl_inherit_changed_cb(void *arg, uint64_t newval) | |
350 { | |
351 zfsvfs_t *zfsvfs = arg; | |
352 | |
353 zfsvfs->z_acl_inherit = newval; | |
354 } | |
355 | |
1544 | 356 static int |
357 zfs_refresh_properties(vfs_t *vfsp) | |
358 { | |
359 zfsvfs_t *zfsvfs = vfsp->vfs_data; | |
360 | |
2354
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
361 /* |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
362 * Remount operations default to "rw" unless "ro" is explicitly |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
363 * specified. |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
364 */ |
1544 | 365 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { |
366 readonly_changed_cb(zfsvfs, B_TRUE); | |
2354
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
367 } else { |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
368 if (!dmu_objset_is_snapshot(zfsvfs->z_os)) |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
369 readonly_changed_cb(zfsvfs, B_FALSE); |
8cc863b1e47a
6420204 root filesystem's delete queue is not running
tabriz
parents:
1646
diff
changeset
|
370 else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) |
3912 | 371 return (EROFS); |
1544 | 372 } |
373 | |
374 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { | |
375 devices_changed_cb(zfsvfs, B_FALSE); | |
376 setuid_changed_cb(zfsvfs, B_FALSE); | |
377 } else { | |
378 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) | |
379 devices_changed_cb(zfsvfs, B_FALSE); | |
380 else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) | |
381 devices_changed_cb(zfsvfs, B_TRUE); | |
382 | |
383 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) | |
384 setuid_changed_cb(zfsvfs, B_FALSE); | |
385 else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) | |
386 setuid_changed_cb(zfsvfs, B_TRUE); | |
387 } | |
388 | |
389 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) | |
390 exec_changed_cb(zfsvfs, B_FALSE); | |
391 else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) | |
392 exec_changed_cb(zfsvfs, B_TRUE); | |
393 | |
2474
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
394 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) |
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
395 atime_changed_cb(zfsvfs, B_TRUE); |
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
396 else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) |
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
397 atime_changed_cb(zfsvfs, B_FALSE); |
c001ad7e0c25
6368751 libzfs interface for mount/umounting all the file systems for a given pool
eschrock
parents:
2354
diff
changeset
|
398 |
3234 | 399 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) |
400 xattr_changed_cb(zfsvfs, B_TRUE); | |
401 else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) | |
402 xattr_changed_cb(zfsvfs, B_FALSE); | |
403 | |
1544 | 404 return (0); |
405 } | |
406 | |
407 static int | |
408 zfs_register_callbacks(vfs_t *vfsp) | |
409 { | |
410 struct dsl_dataset *ds = NULL; | |
411 objset_t *os = NULL; | |
412 zfsvfs_t *zfsvfs = NULL; | |
3265
967e0fca6143
6463140 zfs recv with a snapshot name that has 2 @@ in a row succeeds
ahrens
parents:
3246
diff
changeset
|
413 int readonly, do_readonly = FALSE; |
967e0fca6143
6463140 zfs recv with a snapshot name that has 2 @@ in a row succeeds
ahrens
parents:
3246
diff
changeset
|
414 int setuid, do_setuid = FALSE; |
967e0fca6143
6463140 zfs recv with a snapshot name that has 2 @@ in a row succeeds
ahrens
parents:
3246
diff
changeset
|
415 int exec, do_exec = FALSE; |
967e0fca6143
6463140 zfs recv with a snapshot name that has 2 @@ in a row succeeds
ahrens
parents:
3246
diff
changeset
|
416 int devices, do_devices = FALSE; |
967e0fca6143
6463140 zfs recv with a snapshot name that has 2 @@ in a row succeeds
ahrens
parents:
3246
diff
changeset
|
417 int xattr, do_xattr = FALSE; |
1544 | 418 int error = 0; |
419 | |
420 ASSERT(vfsp); | |
421 zfsvfs = vfsp->vfs_data; | |
422 ASSERT(zfsvfs); | |
423 os = zfsvfs->z_os; | |
424 | |
425 /* | |
426 * The act of registering our callbacks will destroy any mount | |
427 * options we may have. In order to enable temporary overrides | |
3234 | 428 * of mount options, we stash away the current values and |
1544 | 429 * restore them after we register the callbacks. |
430 */ | |
431 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { | |
432 readonly = B_TRUE; | |
433 do_readonly = B_TRUE; | |
434 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { | |
435 readonly = B_FALSE; | |
436 do_readonly = B_TRUE; | |
437 } | |
438 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { | |
439 devices = B_FALSE; | |
440 setuid = B_FALSE; | |
441 do_devices = B_TRUE; | |
442 do_setuid = B_TRUE; | |
443 } else { | |
444 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { | |
445 devices = B_FALSE; | |
446 do_devices = B_TRUE; | |
3912 | 447 } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { |
1544 | 448 devices = B_TRUE; |
449 do_devices = B_TRUE; | |
450 } | |
451 | |
452 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { | |
453 setuid = B_FALSE; | |
454 do_setuid = B_TRUE; | |
455 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { | |
456 setuid = B_TRUE; | |
457 do_setuid = B_TRUE; | |
458 } | |
459 } | |
460 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { | |
461 exec = B_FALSE; | |
462 do_exec = B_TRUE; | |
463 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { | |
464 exec = B_TRUE; | |
465 do_exec = B_TRUE; | |
466 } | |
3234 | 467 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { |
468 xattr = B_FALSE; | |
469 do_xattr = B_TRUE; | |
470 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { | |
471 xattr = B_TRUE; | |
472 do_xattr = B_TRUE; | |
473 } | |
1544 | 474 |
475 /* | |
476 * Register property callbacks. | |
477 * | |
478 * It would probably be fine to just check for i/o error from | |
479 * the first prop_register(), but I guess I like to go | |
480 * overboard... | |
481 */ | |
482 ds = dmu_objset_ds(os); | |
483 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); | |
484 error = error ? error : dsl_prop_register(ds, | |
3234 | 485 "xattr", xattr_changed_cb, zfsvfs); |
486 error = error ? error : dsl_prop_register(ds, | |
1544 | 487 "recordsize", blksz_changed_cb, zfsvfs); |
488 error = error ? error : dsl_prop_register(ds, | |
489 "readonly", readonly_changed_cb, zfsvfs); | |
490 error = error ? error : dsl_prop_register(ds, | |
491 "devices", devices_changed_cb, zfsvfs); | |
492 error = error ? error : dsl_prop_register(ds, | |
493 "setuid", setuid_changed_cb, zfsvfs); | |
494 error = error ? error : dsl_prop_register(ds, | |
495 "exec", exec_changed_cb, zfsvfs); | |
496 error = error ? error : dsl_prop_register(ds, | |
497 "snapdir", snapdir_changed_cb, zfsvfs); | |
498 error = error ? error : dsl_prop_register(ds, | |
499 "aclmode", acl_mode_changed_cb, zfsvfs); | |
500 error = error ? error : dsl_prop_register(ds, | |
501 "aclinherit", acl_inherit_changed_cb, zfsvfs); | |
502 if (error) | |
503 goto unregister; | |
504 | |
505 /* | |
506 * Invoke our callbacks to restore temporary mount options. | |
507 */ | |
508 if (do_readonly) | |
509 readonly_changed_cb(zfsvfs, readonly); | |
510 if (do_setuid) | |
511 setuid_changed_cb(zfsvfs, setuid); | |
512 if (do_exec) | |
513 exec_changed_cb(zfsvfs, exec); | |
514 if (do_devices) | |
515 devices_changed_cb(zfsvfs, devices); | |
3234 | 516 if (do_xattr) |
517 xattr_changed_cb(zfsvfs, xattr); | |
1544 | 518 |
519 return (0); | |
520 | |
521 unregister: | |
522 /* | |
523 * We may attempt to unregister some callbacks that are not | |
524 * registered, but this is OK; it will simply return ENOMSG, | |
525 * which we will ignore. | |
526 */ | |
527 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); | |
3234 | 528 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); |
1544 | 529 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); |
530 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); | |
531 (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); | |
532 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); | |
533 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); | |
534 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); | |
535 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); | |
536 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, | |
537 zfsvfs); | |
538 return (error); | |
539 | |
540 } | |
541 | |
542 static int | |
543 zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) | |
544 { | |
545 dev_t mount_dev; | |
546 uint64_t recordsize, readonly; | |
547 int error = 0; | |
548 int mode; | |
549 zfsvfs_t *zfsvfs; | |
550 znode_t *zp = NULL; | |
551 | |
552 ASSERT(vfsp); | |
553 ASSERT(osname); | |
554 | |
555 /* | |
556 * Initialize the zfs-specific filesystem structure. | |
557 * Should probably make this a kmem cache, shuffle fields, | |
558 * and just bzero up to z_hold_mtx[]. | |
559 */ | |
560 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); | |
561 zfsvfs->z_vfs = vfsp; | |
562 zfsvfs->z_parent = zfsvfs; | |
563 zfsvfs->z_assign = TXG_NOWAIT; | |
564 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; | |
565 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; | |
566 | |
567 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); | |
568 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), | |
569 offsetof(znode_t, z_link_node)); | |
570 rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); | |
571 | |
572 /* Initialize the generic filesystem structure. */ | |
573 vfsp->vfs_bcount = 0; | |
574 vfsp->vfs_data = NULL; | |
575 | |
576 if (zfs_create_unique_device(&mount_dev) == -1) { | |
577 error = ENODEV; | |
578 goto out; | |
579 } | |
580 ASSERT(vfs_devismounted(mount_dev) == 0); | |
581 | |
582 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, | |
583 NULL)) | |
584 goto out; | |
585 | |
586 vfsp->vfs_dev = mount_dev; | |
587 vfsp->vfs_fstype = zfsfstype; | |
588 vfsp->vfs_bsize = recordsize; | |
589 vfsp->vfs_flag |= VFS_NOTRUNC; | |
590 vfsp->vfs_data = zfsvfs; | |
591 | |
592 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) | |
593 goto out; | |
594 | |
595 if (readonly) | |
596 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; | |
597 else | |
598 mode = DS_MODE_PRIMARY; | |
599 | |
600 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); | |
601 if (error == EROFS) { | |
602 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; | |
603 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, | |
604 &zfsvfs->z_os); | |
605 } | |
606 | |
607 if (error) | |
608 goto out; | |
609 | |
610 if (error = zfs_init_fs(zfsvfs, &zp, cr)) | |
611 goto out; | |
612 | |
613 /* The call to zfs_init_fs leaves the vnode held, release it here. */ | |
614 VN_RELE(ZTOV(zp)); | |
615 | |
616 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { | |
3234 | 617 uint64_t xattr; |
618 | |
1544 | 619 ASSERT(mode & DS_MODE_READONLY); |
620 atime_changed_cb(zfsvfs, B_FALSE); | |
621 readonly_changed_cb(zfsvfs, B_TRUE); | |
3234 | 622 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) |
623 goto out; | |
624 xattr_changed_cb(zfsvfs, xattr); | |
1544 | 625 zfsvfs->z_issnap = B_TRUE; |
626 } else { | |
627 error = zfs_register_callbacks(vfsp); | |
628 if (error) | |
629 goto out; | |
630 | |
4577 | 631 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) |
632 zfs_unlinked_drain(zfsvfs); | |
1544 | 633 |
634 /* | |
635 * Parse and replay the intent log. | |
4577 | 636 * |
637 * Because of ziltest, this must be done after | |
638 * zfs_unlinked_drain(). (Further note: ziltest doesn't | |
639 * use readonly mounts, where zfs_unlinked_drain() isn't | |
640 * called.) This is because ziltest causes spa_sync() | |
641 * to think it's committed, but actually it is not, so | |
642 * the intent log contains many txg's worth of changes. | |
643 * | |
644 * In particular, if object N is in the unlinked set in | |
645 * the last txg to actually sync, then it could be | |
646 * actually freed in a later txg and then reallocated in | |
647 * a yet later txg. This would write a "create object | |
648 * N" record to the intent log. Normally, this would be | |
649 * fine because the spa_sync() would have written out | |
650 * the fact that object N is free, before we could write | |
651 * the "create object N" intent log record. | |
652 * | |
653 * But when we are in ziltest mode, we advance the "open | |
654 * txg" without actually spa_sync()-ing the changes to | |
655 * disk. So we would see that object N is still | |
656 * allocated and in the unlinked set, and there is an | |
657 * intent log record saying to allocate it. | |
1544 | 658 */ |
659 zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, | |
3461 | 660 zfs_replay_vector); |
1544 | 661 |
662 if (!zil_disable) | |
663 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); | |
664 } | |
665 | |
666 if (!zfsvfs->z_issnap) | |
667 zfsctl_create(zfsvfs); | |
668 out: | |
669 if (error) { | |
670 if (zfsvfs->z_os) | |
671 dmu_objset_close(zfsvfs->z_os); | |
672 kmem_free(zfsvfs, sizeof (zfsvfs_t)); | |
673 } else { | |
674 atomic_add_32(&zfs_active_fs_count, 1); | |
675 } | |
676 | |
677 return (error); | |
678 } | |
679 | |
680 void | |
681 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) | |
682 { | |
683 objset_t *os = zfsvfs->z_os; | |
684 struct dsl_dataset *ds; | |
685 | |
686 /* | |
687 * Unregister properties. | |
688 */ | |
689 if (!dmu_objset_is_snapshot(os)) { | |
690 ds = dmu_objset_ds(os); | |
691 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, | |
692 zfsvfs) == 0); | |
693 | |
3234 | 694 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, |
695 zfsvfs) == 0); | |
696 | |
1544 | 697 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, |
698 zfsvfs) == 0); | |
699 | |
700 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, | |
701 zfsvfs) == 0); | |
702 | |
703 VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, | |
704 zfsvfs) == 0); | |
705 | |
706 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, | |
707 zfsvfs) == 0); | |
708 | |
709 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, | |
710 zfsvfs) == 0); | |
711 | |
712 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, | |
713 zfsvfs) == 0); | |
714 | |
715 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, | |
716 zfsvfs) == 0); | |
717 | |
718 VERIFY(dsl_prop_unregister(ds, "aclinherit", | |
719 acl_inherit_changed_cb, zfsvfs) == 0); | |
720 } | |
721 } | |
722 | |
3912 | 723 /* |
724 * Convert a decimal digit string to a uint64_t integer. | |
725 */ | |
726 static int | |
727 str_to_uint64(char *str, uint64_t *objnum) | |
728 { | |
729 uint64_t num = 0; | |
730 | |
731 while (*str) { | |
732 if (*str < '0' || *str > '9') | |
733 return (EINVAL); | |
734 | |
735 num = num*10 + *str++ - '0'; | |
736 } | |
737 | |
738 *objnum = num; | |
739 return (0); | |
740 } | |
741 | |
742 /* | |
743 * The boot path passed from the boot loader is in the form of | |
744 * "rootpool-name/root-filesystem-object-number'. Convert this | |
745 * string to a dataset name: "rootpool-name/root-filesystem-name". | |
746 */ | |
747 static int | |
748 parse_bootpath(char *bpath, char *outpath) | |
749 { | |
750 char *slashp; | |
751 uint64_t objnum; | |
752 int error; | |
753 | |
754 if (*bpath == 0 || *bpath == '/') | |
755 return (EINVAL); | |
756 | |
757 slashp = strchr(bpath, '/'); | |
758 | |
759 /* if no '/', just return the pool name */ | |
760 if (slashp == NULL) { | |
761 (void) strcpy(outpath, bpath); | |
762 return (0); | |
763 } | |
764 | |
765 if (error = str_to_uint64(slashp+1, &objnum)) | |
766 return (error); | |
767 | |
768 *slashp = '\0'; | |
769 error = dsl_dsobj_to_dsname(bpath, objnum, outpath); | |
770 *slashp = '/'; | |
771 | |
772 return (error); | |
773 } | |
774 | |
1544 | 775 static int |
776 zfs_mountroot(vfs_t *vfsp, enum whymountroot why) | |
777 { | |
778 int error = 0; | |
779 int ret = 0; | |
780 static int zfsrootdone = 0; | |
781 zfsvfs_t *zfsvfs = NULL; | |
782 znode_t *zp = NULL; | |
783 vnode_t *vp = NULL; | |
3912 | 784 char *zfs_bootpath; |
1544 | 785 |
786 ASSERT(vfsp); | |
787 | |
788 /* | |
3912 | 789 * The filesystem that we mount as root is defined in the |
790 * "zfs-bootfs" property. | |
1544 | 791 */ |
792 if (why == ROOT_INIT) { | |
793 if (zfsrootdone++) | |
794 return (EBUSY); | |
795 | |
3912 | 796 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), |
797 DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != | |
798 DDI_SUCCESS) | |
799 return (EIO); | |
800 | |
801 error = parse_bootpath(zfs_bootpath, rootfs.bo_name); | |
802 ddi_prop_free(zfs_bootpath); | |
803 | |
804 if (error) | |
805 return (error); | |
1544 | 806 |
807 if (error = vfs_lock(vfsp)) | |
808 return (error); | |
809 | |
3912 | 810 if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) |
1544 | 811 goto out; |
812 | |
813 zfsvfs = (zfsvfs_t *)vfsp->vfs_data; | |
814 ASSERT(zfsvfs); | |
815 if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) | |
816 goto out; | |
817 | |
818 vp = ZTOV(zp); | |
819 mutex_enter(&vp->v_lock); | |
820 vp->v_flag |= VROOT; | |
821 mutex_exit(&vp->v_lock); | |
822 rootvp = vp; | |
823 | |
824 /* | |
825 * The zfs_zget call above returns with a hold on vp, we release | |
826 * it here. | |
827 */ | |
828 VN_RELE(vp); | |
829 | |
830 /* | |
831 * Mount root as readonly initially, it will be remouted | |
832 * read/write by /lib/svc/method/fs-usr. | |
833 */ | |
834 readonly_changed_cb(vfsp->vfs_data, B_TRUE); | |
835 vfs_add((struct vnode *)0, vfsp, | |
836 (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); | |
837 out: | |
838 vfs_unlock(vfsp); | |
839 ret = (error) ? error : 0; | |
840 return (ret); | |
841 } else if (why == ROOT_REMOUNT) { | |
842 readonly_changed_cb(vfsp->vfs_data, B_FALSE); | |
843 vfsp->vfs_flag |= VFS_REMOUNT; | |
844 return (zfs_refresh_properties(vfsp)); | |
845 } else if (why == ROOT_UNMOUNT) { | |
846 zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); | |
847 (void) zfs_sync(vfsp, 0, 0); | |
848 return (0); | |
849 } | |
850 | |
851 /* | |
852 * if "why" is equal to anything else other than ROOT_INIT, | |
853 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. | |
854 */ | |
855 return (ENOTSUP); | |
856 } | |
857 | |
789 | 858 /*ARGSUSED*/ |
859 static int | |
860 zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) | |
861 { | |
862 char *osname; | |
863 pathname_t spn; | |
864 int error = 0; | |
865 uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? | |
3912 | 866 UIO_SYSSPACE : UIO_USERSPACE; |
789 | 867 int canwrite; |
868 | |
869 if (mvp->v_type != VDIR) | |
870 return (ENOTDIR); | |
871 | |
872 mutex_enter(&mvp->v_lock); | |
873 if ((uap->flags & MS_REMOUNT) == 0 && | |
874 (uap->flags & MS_OVERLAY) == 0 && | |
875 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { | |
876 mutex_exit(&mvp->v_lock); | |
877 return (EBUSY); | |
878 } | |
879 mutex_exit(&mvp->v_lock); | |
880 | |
881 /* | |
882 * ZFS does not support passing unparsed data in via MS_DATA. | |
883 * Users should use the MS_OPTIONSTR interface; this means | |
884 * that all option parsing is already done and the options struct | |
885 * can be interrogated. | |
886 */ | |
887 if ((uap->flags & MS_DATA) && uap->datalen > 0) | |
888 return (EINVAL); | |
889 | |
890 /* | |
891 * When doing a remount, we simply refresh our temporary properties | |
892 * according to those options set in the current VFS options. | |
893 */ | |
4577 | 894 if (uap->flags & MS_REMOUNT) |
1544 | 895 return (zfs_refresh_properties(vfsp)); |
789 | 896 |
897 /* | |
898 * Get the objset name (the "special" mount argument). | |
899 */ | |
900 if (error = pn_get(uap->spec, fromspace, &spn)) | |
901 return (error); | |
902 | |
903 osname = spn.pn_path; | |
904 | |
4543 | 905 /* |
906 * Check for mount privilege? | |
907 * | |
908 * If we don't have privilege then see if | |
909 * we have local permission to allow it | |
910 */ | |
911 error = secpolicy_fs_mount(cr, mvp, vfsp); | |
912 if (error) { | |
913 error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); | |
914 if (error == 0) { | |
915 vattr_t vattr; | |
916 | |
917 /* | |
918 * Make sure user is the owner of the mount point | |
919 * or has sufficient privileges. | |
920 */ | |
921 | |
922 vattr.va_mask = AT_UID; | |
923 | |
924 if (VOP_GETATTR(mvp, &vattr, 0, cr)) { | |
925 goto out; | |
926 } | |
927 | |
928 if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { | |
929 goto out; | |
930 } | |
931 | |
932 if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { | |
933 goto out; | |
934 } | |
935 | |
936 secpolicy_fs_mount_clearopts(cr, vfsp); | |
937 } else { | |
938 goto out; | |
939 } | |
940 } | |
789 | 941 |
942 /* | |
943 * Refuse to mount a filesystem if we are in a local zone and the | |
944 * dataset is not visible. | |
945 */ | |
946 if (!INGLOBALZONE(curproc) && | |
947 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { | |
948 error = EPERM; | |
949 goto out; | |
950 } | |
951 | |
1544 | 952 error = zfs_domount(vfsp, osname, cr); |
789 | 953 |
954 out: | |
955 pn_free(&spn); | |
956 return (error); | |
957 } | |
958 | |
959 static int | |
960 zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) | |
961 { | |
962 zfsvfs_t *zfsvfs = vfsp->vfs_data; | |
963 dev32_t d32; | |
2885 | 964 uint64_t refdbytes, availbytes, usedobjs, availobjs; |
789 | 965 |
966 ZFS_ENTER(zfsvfs); | |
967 | |
2885 | 968 dmu_objset_space(zfsvfs->z_os, |
969 &refdbytes, &availbytes, &usedobjs, &availobjs); | |
789 | 970 |
971 /* | |
972 * The underlying storage pool actually uses multiple block sizes. | |
973 * We report the fragsize as the smallest block size we support, | |
974 * and we report our blocksize as the filesystem's maximum blocksize. | |
975 */ | |
976 statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; | |
977 statp->f_bsize = zfsvfs->z_max_blksz; | |
978 | |
979 /* | |
980 * The following report "total" blocks of various kinds in the | |
981 * file system, but reported in terms of f_frsize - the | |
982 * "fragment" size. | |
983 */ | |
984 | |
2885 | 985 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; |
986 statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; | |
789 | 987 statp->f_bavail = statp->f_bfree; /* no root reservation */ |
988 | |
989 /* | |
990 * statvfs() should really be called statufs(), because it assumes | |
991 * static metadata. ZFS doesn't preallocate files, so the best | |
992 * we can do is report the max that could possibly fit in f_files, | |
993 * and that minus the number actually used in f_ffree. | |
994 * For f_ffree, report the smaller of the number of object available | |
995 * and the number of blocks (each object will take at least a block). | |
996 */ | |
2885 | 997 statp->f_ffree = MIN(availobjs, statp->f_bfree); |
789 | 998 statp->f_favail = statp->f_ffree; /* no "root reservation" */ |
2885 | 999 statp->f_files = statp->f_ffree + usedobjs; |
789 | 1000 |
1001 (void) cmpldev(&d32, vfsp->vfs_dev); | |
1002 statp->f_fsid = d32; | |
1003 | |
1004 /* | |
1005 * We're a zfs filesystem. | |
1006 */ | |
1007 (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); | |
1008 | |
1123
02a0390fbc7d
6363529 UNIX03/UNIX98 *vsx* CAPI.os/files/fstatvfs/T.fstatvfs 11 FAILS
marks
parents:
849
diff
changeset
|
1009 statp->f_flag = vf_to_stf(vfsp->vfs_flag); |
789 | 1010 |
1011 statp->f_namemax = ZFS_MAXNAMELEN; | |
1012 | |
1013 /* | |
1014 * We have all of 32 characters to stuff a string here. | |
1015 * Is there anything useful we could/should provide? | |
1016 */ | |
1017 bzero(statp->f_fstr, sizeof (statp->f_fstr)); | |
1018 | |
1019 ZFS_EXIT(zfsvfs); | |
1020 return (0); | |
1021 } | |
1022 | |
1023 static int | |
1024 zfs_root(vfs_t *vfsp, vnode_t **vpp) | |
1025 { | |
1026 zfsvfs_t *zfsvfs = vfsp->vfs_data; | |
1027 znode_t *rootzp; | |
1028 int error; | |
1029 | |
1030 ZFS_ENTER(zfsvfs); | |
1031 | |
1032 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); | |
1033 if (error == 0) | |
1034 *vpp = ZTOV(rootzp); | |
1035 | |
1036 ZFS_EXIT(zfsvfs); | |
1037 return (error); | |
1038 } | |
1039 | |
1040 /*ARGSUSED*/ | |
1041 static int | |
1042 zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) | |
1043 { | |
1044 zfsvfs_t *zfsvfs = vfsp->vfs_data; | |
1045 int ret; | |
1046 | |
4543 | 1047 ret = secpolicy_fs_unmount(cr, vfsp); |
1048 if (ret) { | |
1049 ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), | |
1050 ZFS_DELEG_PERM_MOUNT, cr); | |
1051 if (ret) | |
1052 return (ret); | |
1053 } | |
1484
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
1054 |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
1055 (void) dnlc_purge_vfsp(vfsp, 0); |
d330e98f8ed7
6350001 ZFS lookup performance still much slower than UFS : help tar : help spec SFS
ek110237
parents:
1298
diff
changeset
|
1056 |
789 | 1057 /* |
1058 * Unmount any snapshots mounted under .zfs before unmounting the | |
1059 * dataset itself. | |
1060 */ | |
1061 if (zfsvfs->z_ctldir != NULL && | |
4543 | 1062 (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { |
789 | 1063 return (ret); |
4543 | 1064 } |
789 | 1065 |
1066 if (fflag & MS_FORCE) { | |
1067 vfsp->vfs_flag |= VFS_UNMOUNTED; | |
1068 zfsvfs->z_unmounted1 = B_TRUE; | |
1069 | |
1070 /* | |
4480
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
3912
diff
changeset
|
1071 * Ensure that z_unmounted1 reaches global visibility |
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
3912
diff
changeset
|
1072 * before z_op_cnt. |
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
3912
diff
changeset
|
1073 */ |
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
3912
diff
changeset
|
1074 membar_producer(); |
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
3912
diff
changeset
|
1075 |
0976678e58c5
6544140 assertion failed: err == 0 (0x11 == 0x0), file: ../../common/fs/zfs/zfs_znode.c, line: 555
gw25295
parents:
3912
diff
changeset
|
1076 /* |
789 | 1077 * Wait for all zfs threads to leave zfs. |
1078 * Grabbing a rwlock as reader in all vops and | |
1079 * as writer here doesn't work because it too easy to get | |
1080 * multiple reader enters as zfs can re-enter itself. | |
1081 * This can lead to deadlock if there is an intervening | |
1082 * rw_enter as writer. | |
1083 * So a file system threads ref count (z_op_cnt) is used. | |
1084 * A polling loop on z_op_cnt may seem inefficient, but | |
1085 * - this saves all threads on exit from having to grab a | |
1086 * mutex in order to cv_signal | |
1087 * - only occurs on forced unmount in the rare case when | |
1088 * there are outstanding threads within the file system. | |
1089 */ | |
1090 while (zfsvfs->z_op_cnt) { | |
1091 delay(1); | |
1092 } | |
1093 | |
1094 zfs_objset_close(zfsvfs); | |
1095 | |
1096 return (0); | |
1097 } | |
1098 /* | |
1099 * Check the number of active vnodes in the file system. | |
1100 * Our count is maintained in the vfs structure, but the number | |
1101 * is off by 1 to indicate a hold on the vfs structure itself. | |
1102 * | |
1103 * The '.zfs' directory maintains a reference of its own, and any active | |
1104 * references underneath are reflected in the vnode count. | |
1105 */ | |
1106 if (zfsvfs->z_ctldir == NULL) { | |
3461 | 1107 if (vfsp->vfs_count > 1) |
789 | 1108 return (EBUSY); |
1109 } else { | |
1110 if (vfsp->vfs_count > 2 || | |
1111 (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { | |
1112 return (EBUSY); | |
1113 } | |
1114 } | |
1115 | |
1116 vfsp->vfs_flag |= VFS_UNMOUNTED; | |
1117 zfs_objset_close(zfsvfs); | |
1118 | |
1119 return (0); | |
1120 } | |
1121 | |
1122 static int | |
1123 zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) | |
1124 { | |
1125 zfsvfs_t *zfsvfs = vfsp->vfs_data; | |
1126 znode_t *zp; | |
1127 uint64_t object = 0; | |
1128 uint64_t fid_gen = 0; | |
1129 uint64_t gen_mask; | |
1130 uint64_t zp_gen; | |
1131 int i, err; | |
1132 | |
1133 *vpp = NULL; | |
1134 | |
1135 ZFS_ENTER(zfsvfs); | |
1136 | |
1137 if (fidp->fid_len == LONG_FID_LEN) { | |
1138 zfid_long_t *zlfid = (zfid_long_t *)fidp; | |
1139 uint64_t objsetid = 0; | |
1140 uint64_t setgen = 0; | |
1141 | |
1142 for (i = 0; i < sizeof (zlfid->zf_setid); i++) | |
1143 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); | |
1144 | |
1145 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) | |
1146 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); | |
1147 | |
1148 ZFS_EXIT(zfsvfs); | |
1149 | |
1150 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); | |
1151 if (err) | |
1152 return (EINVAL); | |
1153 ZFS_ENTER(zfsvfs); | |
1154 } | |
1155 | |
1156 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { | |
1157 zfid_short_t *zfid = (zfid_short_t *)fidp; | |
1158 | |
1159 for (i = 0; i < sizeof (zfid->zf_object); i++) | |
1160 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); | |
1161 | |
1162 for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
1163 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); | |
1164 } else { | |
1165 ZFS_EXIT(zfsvfs); | |
1166 return (EINVAL); | |
1167 } | |
1168 | |
1169 /* A zero fid_gen means we are in the .zfs control directories */ | |
1170 if (fid_gen == 0 && | |
1171 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { | |
1172 *vpp = zfsvfs->z_ctldir; | |
1173 ASSERT(*vpp != NULL); | |
1174 if (object == ZFSCTL_INO_SNAPDIR) { | |
1175 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, | |
1176 0, NULL, NULL) == 0); | |
1177 } else { | |
1178 VN_HOLD(*vpp); | |
1179 } | |
1180 ZFS_EXIT(zfsvfs); | |
1181 return (0); | |
1182 } | |
1183 | |
1184 gen_mask = -1ULL >> (64 - 8 * i); | |
1185 | |
1186 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); | |
1187 if (err = zfs_zget(zfsvfs, object, &zp)) { | |
1188 ZFS_EXIT(zfsvfs); | |
1189 return (err); | |
1190 } | |
1191 zp_gen = zp->z_phys->zp_gen & gen_mask; | |
1192 if (zp_gen == 0) | |
1193 zp_gen = 1; | |
3461 | 1194 if (zp->z_unlinked || zp_gen != fid_gen) { |
789 | 1195 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); |
1196 VN_RELE(ZTOV(zp)); | |
1197 ZFS_EXIT(zfsvfs); | |
1198 return (EINVAL); | |
1199 } | |
1200 | |
1201 *vpp = ZTOV(zp); | |
1202 ZFS_EXIT(zfsvfs); | |
1203 return (0); | |
1204 } | |
1205 | |
1206 static void | |
1207 zfs_objset_close(zfsvfs_t *zfsvfs) | |
1208 { | |
1209 znode_t *zp, *nextzp; | |
1210 objset_t *os = zfsvfs->z_os; | |
1211 | |
1212 /* | |
1213 * For forced unmount, at this point all vops except zfs_inactive | |
1214 * are erroring EIO. We need to now suspend zfs_inactive threads | |
1215 * while we are freeing dbufs before switching zfs_inactive | |
1216 * to use behaviour without a objset. | |
1217 */ | |
1218 rw_enter(&zfsvfs->z_um_lock, RW_WRITER); | |
1219 | |
1220 /* | |
1221 * Release all holds on dbufs | |
1222 * Note, although we have stopped all other vop threads and | |
1223 * zfs_inactive(), the dmu can callback via znode_pageout_func() | |
1224 * which can zfs_znode_free() the znode. | |
1225 * So we lock z_all_znodes; search the list for a held | |
1226 * dbuf; drop the lock (we know zp can't disappear if we hold | |
1227 * a dbuf lock; then regrab the lock and restart. | |
1228 */ | |
1229 mutex_enter(&zfsvfs->z_znodes_lock); | |
1230 for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { | |
1231 nextzp = list_next(&zfsvfs->z_all_znodes, zp); | |
1232 if (zp->z_dbuf_held) { | |
1233 /* dbufs should only be held when force unmounting */ | |
1234 zp->z_dbuf_held = 0; | |
1235 mutex_exit(&zfsvfs->z_znodes_lock); | |
1544 | 1236 dmu_buf_rele(zp->z_dbuf, NULL); |
789 | 1237 /* Start again */ |
1238 mutex_enter(&zfsvfs->z_znodes_lock); | |
1239 nextzp = list_head(&zfsvfs->z_all_znodes); | |
1240 } | |
1241 } | |
1242 mutex_exit(&zfsvfs->z_znodes_lock); | |
1243 | |
1244 /* | |
1245 * Unregister properties. | |
1246 */ | |
1544 | 1247 if (!dmu_objset_is_snapshot(os)) |
1248 zfs_unregister_callbacks(zfsvfs); | |
789 | 1249 |
1250 /* | |
1251 * Switch zfs_inactive to behaviour without an objset. | |
1252 * It just tosses cached pages and frees the znode & vnode. | |
1253 * Then re-enable zfs_inactive threads in that new behaviour. | |
1254 */ | |
1255 zfsvfs->z_unmounted2 = B_TRUE; | |
1256 rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ | |
1257 | |
1258 /* | |
1259 * Close the zil. Can't close the zil while zfs_inactive | |
1260 * threads are blocked as zil_close can call zfs_inactive. | |
1261 */ | |
1262 if (zfsvfs->z_log) { | |
1263 zil_close(zfsvfs->z_log); | |
1264 zfsvfs->z_log = NULL; | |
1265 } | |
1266 | |
1267 /* | |
1544 | 1268 * Evict all dbufs so that cached znodes will be freed |
1269 */ | |
1646
b4e43ae19fff
6393443 Remove remaining txg_wait_synced() from zfs unmount path.
perrin
parents:
1544
diff
changeset
|
1270 if (dmu_objset_evict_dbufs(os, 1)) { |
b4e43ae19fff
6393443 Remove remaining txg_wait_synced() from zfs unmount path.
perrin
parents:
1544
diff
changeset
|
1271 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); |
b4e43ae19fff
6393443 Remove remaining txg_wait_synced() from zfs unmount path.
perrin
parents:
1544
diff
changeset
|
1272 (void) dmu_objset_evict_dbufs(os, 0); |
b4e43ae19fff
6393443 Remove remaining txg_wait_synced() from zfs unmount path.
perrin
parents:
1544
diff
changeset
|
1273 } |
1544 | 1274 |
1275 /* | |
789 | 1276 * Finally close the objset |
1277 */ | |
1278 dmu_objset_close(os); | |
1279 | |
1298 | 1280 /* |
1281 * We can now safely destroy the '.zfs' directory node. | |
1282 */ | |
1283 if (zfsvfs->z_ctldir != NULL) | |
1284 zfsctl_destroy(zfsvfs); | |
1285 | |
789 | 1286 } |
1287 | |
1288 static void | |
1289 zfs_freevfs(vfs_t *vfsp) | |
1290 { | |
1291 zfsvfs_t *zfsvfs = vfsp->vfs_data; | |
1292 | |
1293 kmem_free(zfsvfs, sizeof (zfsvfs_t)); | |
1294 | |
1295 atomic_add_32(&zfs_active_fs_count, -1); | |
1296 } | |
1297 | |
1298 /* | |
1299 * VFS_INIT() initialization. Note that there is no VFS_FINI(), | |
1300 * so we can't safely do any non-idempotent initialization here. | |
1301 * Leave that to zfs_init() and zfs_fini(), which are called | |
1302 * from the module's _init() and _fini() entry points. | |
1303 */ | |
1304 /*ARGSUSED*/ | |
1305 static int | |
1306 zfs_vfsinit(int fstype, char *name) | |
1307 { | |
1308 int error; | |
1309 | |
1310 zfsfstype = fstype; | |
1311 | |
1312 /* | |
1313 * Setup vfsops and vnodeops tables. | |
1314 */ | |
1315 error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); | |
1316 if (error != 0) { | |
1317 cmn_err(CE_WARN, "zfs: bad vfs ops template"); | |
1318 } | |
1319 | |
1320 error = zfs_create_op_tables(); | |
1321 if (error) { | |
1322 zfs_remove_op_tables(); | |
1323 cmn_err(CE_WARN, "zfs: bad vnode ops template"); | |
1324 (void) vfs_freevfsops_by_type(zfsfstype); | |
1325 return (error); | |
1326 } | |
1327 | |
1328 mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); | |
1329 | |
1330 /* | |
849
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
1331 * Unique major number for all zfs mounts. |
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
1332 * If we run out of 32-bit minors, we'll getudev() another major. |
789 | 1333 */ |
849
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
1334 zfs_major = ddi_name_to_major(ZFS_DRIVER); |
8d799fd81a9b
6345023 /dev/zfs fails to open once ZFS module is unloaded
bonwick
parents:
789
diff
changeset
|
1335 zfs_minor = ZFS_MIN_MINOR; |
789 | 1336 |
1337 return (0); | |
1338 } | |
1339 | |
1340 void | |
1341 zfs_init(void) | |
1342 { | |
1343 /* | |
1344 * Initialize .zfs directory structures | |
1345 */ | |
1346 zfsctl_init(); | |
1347 | |
1348 /* | |
1349 * Initialize znode cache, vnode ops, etc... | |
1350 */ | |
1351 zfs_znode_init(); | |
1352 } | |
1353 | |
1354 void | |
1355 zfs_fini(void) | |
1356 { | |
1357 zfsctl_fini(); | |
1358 zfs_znode_fini(); | |
1359 } | |
1360 | |
1361 int | |
1362 zfs_busy(void) | |
1363 { | |
1364 return (zfs_active_fs_count != 0); | |
1365 } | |
1366 | |
4577 | 1367 int |
1368 zfs_get_stats(objset_t *os, nvlist_t *nv) | |
1369 { | |
1370 int error; | |
1371 uint64_t val; | |
1372 | |
1373 error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &val); | |
1374 if (error == 0) | |
1375 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VERSION, val); | |
1376 | |
1377 return (error); | |
1378 } | |
1379 | |
1380 int | |
1381 zfs_set_version(const char *name, uint64_t newvers) | |
1382 { | |
1383 int error; | |
1384 objset_t *os; | |
1385 dmu_tx_t *tx; | |
1386 uint64_t curvers; | |
1387 | |
1388 /* | |
1389 * XXX for now, require that the filesystem be unmounted. Would | |
1390 * be nice to find the zfsvfs_t and just update that if | |
1391 * possible. | |
1392 */ | |
1393 | |
1394 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) | |
1395 return (EINVAL); | |
1396 | |
1397 error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); | |
1398 if (error) | |
1399 return (error); | |
1400 | |
1401 error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, | |
1402 8, 1, &curvers); | |
1403 if (error) | |
1404 goto out; | |
1405 if (newvers < curvers) { | |
1406 error = EINVAL; | |
1407 goto out; | |
1408 } | |
1409 | |
1410 tx = dmu_tx_create(os); | |
1411 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); | |
1412 error = dmu_tx_assign(tx, TXG_WAIT); | |
1413 if (error) { | |
1414 dmu_tx_abort(tx); | |
1415 goto out; | |
1416 } | |
1417 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, | |
1418 &newvers, tx); | |
1419 | |
1420 spa_history_internal_log(LOG_DS_UPGRADE, | |
1421 dmu_objset_spa(os), tx, CRED(), | |
1422 "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, | |
1423 dmu_objset_id(os)); | |
1424 dmu_tx_commit(tx); | |
1425 | |
1426 out: | |
1427 dmu_objset_close(os); | |
1428 return (error); | |
1429 } | |
1430 | |
789 | 1431 static vfsdef_t vfw = { |
1432 VFSDEF_VERSION, | |
1433 MNTTYPE_ZFS, | |
1434 zfs_vfsinit, | |
1488 | 1435 VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, |
789 | 1436 &zfs_mntopts |
1437 }; | |
1438 | |
1439 struct modlfs zfs_modlfs = { | |
4577 | 1440 &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw |
789 | 1441 }; |