Mercurial > illumos > illumos-gate
diff usr/src/uts/common/fs/zfs/zfs_ioctl.c @ 9396:f41cf682d0d3
PSARC/2009/204 ZFS user/group quotas & space accounting
6501037 want user/group quotas on ZFS
6830813 zfs list -t all fails assertion
6827260 assertion failed in arc_read(): hdr == pbuf->b_hdr
6815592 panic: No such hold X on refcount Y from zfs_znode_move
6759986 zfs list shows temporary %clone when doing online zfs recv
author | Matthew Ahrens <Matthew.Ahrens@Sun.COM> |
---|---|
date | Sat, 18 Apr 2009 13:41:47 -0700 |
parents | 09928982c591 |
children | e7ffacaec3a8 |
line wrap: on
line diff
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c Sat Apr 18 01:13:46 2009 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c Sat Apr 18 13:41:47 2009 -0700 @@ -93,6 +93,15 @@ boolean_t zvec_pool_check; } zfs_ioc_vec_t; +/* This array is indexed by zfs_userquota_prop_t */ +static const char *userquota_perms[] = { + ZFS_DELEG_PERM_USERUSED, + ZFS_DELEG_PERM_USERQUOTA, + ZFS_DELEG_PERM_GROUPUSED, + ZFS_DELEG_PERM_GROUPQUOTA, +}; + +static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc); static void clear_props(char *dataset, nvlist_t *props, nvlist_t *newprops); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); @@ -703,6 +712,55 @@ } } +static int +zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr) +{ + int err = zfs_secpolicy_read(zc, cr); + if (err) + return (err); + + if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) + return (EINVAL); + + if (zc->zc_value[0] == 0) { + /* + * They are asking about a posix uid/gid. If it's + * themself, allow it. + */ + if (zc->zc_objset_type == ZFS_PROP_USERUSED || + zc->zc_objset_type == ZFS_PROP_USERQUOTA) { + if (zc->zc_guid == crgetuid(cr)) + return (0); + } else { + if (groupmember(zc->zc_guid, cr)) + return (0); + } + } + + return (zfs_secpolicy_write_perms(zc->zc_name, + userquota_perms[zc->zc_objset_type], cr)); +} + +static int +zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr) +{ + int err = zfs_secpolicy_read(zc, cr); + if (err) + return (err); + + if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) + return (EINVAL); + + return (zfs_secpolicy_write_perms(zc->zc_name, + userquota_perms[zc->zc_objset_type], cr)); +} + +static int +zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr) +{ + return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, cr)); +} + /* * Returns the nvlist as specified by the user in the zfs_cmd_t. */ @@ -762,6 +820,69 @@ } static int +getzfsvfs(const char *dsname, zfsvfs_t **zvp) +{ + objset_t *os; + int error; + + error = dmu_objset_open(dsname, DMU_OST_ZFS, + DS_MODE_USER | DS_MODE_READONLY, &os); + if (error) + return (error); + + mutex_enter(&os->os->os_user_ptr_lock); + *zvp = dmu_objset_get_user(os); + if (*zvp) { + VFS_HOLD((*zvp)->z_vfs); + } else { + error = ESRCH; + } + mutex_exit(&os->os->os_user_ptr_lock); + dmu_objset_close(os); + return (error); +} + +/* + * Find a zfsvfs_t for a mounted filesystem, or create our own, in which + * case its z_vfs will be NULL, and it will be opened as the owner. + */ +static int +zfsvfs_hold(const char *name, boolean_t readonly, void *tag, zfsvfs_t **zvp) +{ + int error = 0; + int mode = DS_MODE_OWNER | (readonly ? DS_MODE_READONLY : 0); + + if (getzfsvfs(name, zvp) != 0) + error = zfsvfs_create(name, mode, zvp); + if (error == 0) { + rrw_enter(&(*zvp)->z_teardown_lock, RW_READER, tag); + if ((*zvp)->z_unmounted) { + /* + * XXX we could probably try again, since the unmounting + * thread should be just about to disassociate the + * objset from the zfsvfs. + */ + rrw_exit(&(*zvp)->z_teardown_lock, tag); + return (EBUSY); + } + } + return (error); +} + +static void +zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) +{ + rrw_exit(&zfsvfs->z_teardown_lock, tag); + + if (zfsvfs->z_vfs) { + VFS_RELE(zfsvfs->z_vfs); + } else { + dmu_objset_close(zfsvfs->z_os); + zfsvfs_free(zfsvfs); + } +} + +static int zfs_ioc_pool_create(zfs_cmd_t *zc) { int error; @@ -1313,6 +1434,23 @@ return (err); } +static boolean_t +dataset_name_hidden(const char *name) +{ + /* + * Skip over datasets that are not visible in this zone, + * internal datasets (which have a $ in their name), and + * temporary datasets (which have a % in their name). + */ + if (strchr(name, '$') != NULL) + return (B_TRUE); + if (strchr(name, '%') != NULL) + return (B_TRUE); + if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL)) + return (B_TRUE); + return (B_FALSE); +} + /* * inputs: * zc_name name of filesystem @@ -1321,6 +1459,7 @@ * * outputs: * zc_name name of next filesystem + * zc_cookie zap cursor * zc_objset_stats stats * zc_nvlist_dst property nvlist * zc_nvlist_dst_size size of property nvlist @@ -1362,15 +1501,10 @@ NULL, &zc->zc_cookie); if (error == ENOENT) error = ESRCH; - } while (error == 0 && !INGLOBALZONE(curproc) && - !zone_dataset_visible(zc->zc_name, NULL)); + } while (error == 0 && dataset_name_hidden(zc->zc_name)); dmu_objset_close(os); - /* - * If it's a hidden dataset (ie. with a '$' in its name), don't - * try to get stats for it. Userland will skip over it. - */ - if (error == 0 && strchr(zc->zc_name, '$') == NULL) + if (error == 0) error = zfs_ioc_objset_stats(zc); /* fill in the stats */ return (error); @@ -1399,9 +1533,10 @@ if (error) return (error == ENOENT ? ESRCH : error); - if (zc->zc_cookie == 0) + if (zc->zc_cookie == 0) { (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch, NULL, DS_FIND_SNAPSHOTS); + } /* * A dataset name of maximum length cannot have any snapshots, * so exit immediately. @@ -1434,6 +1569,7 @@ uint64_t intval; char *strval; nvlist_t *genericnvl; + boolean_t issnap = (strchr(name, '@') != NULL); /* * First validate permission to set all of the properties @@ -1448,16 +1584,35 @@ * If this is a user-defined property, it must be a * string, and there is no further validation to do. */ - if (!zfs_prop_user(propname) || - nvpair_type(elem) != DATA_TYPE_STRING) - return (EINVAL); - - if (error = zfs_secpolicy_write_perms(name, - ZFS_DELEG_PERM_USERPROP, CRED())) - return (error); - continue; + if (zfs_prop_user(propname) && + nvpair_type(elem) == DATA_TYPE_STRING) { + if (error = zfs_secpolicy_write_perms(name, + ZFS_DELEG_PERM_USERPROP, CRED())) + return (error); + continue; + } + + if (!issnap && zfs_prop_userquota(propname) && + nvpair_type(elem) == DATA_TYPE_UINT64_ARRAY) { + const char *perm; + const char *up = zfs_userquota_prop_prefixes + [ZFS_PROP_USERQUOTA]; + if (strncmp(propname, up, strlen(up)) == 0) + perm = ZFS_DELEG_PERM_USERQUOTA; + else + perm = ZFS_DELEG_PERM_GROUPQUOTA; + if (error = zfs_secpolicy_write_perms(name, + perm, CRED())) + return (error); + continue; + } + + return (EINVAL); } + if (issnap) + return (EINVAL); + if ((error = zfs_secpolicy_setprop(name, prop, CRED())) != 0) return (error); @@ -1493,8 +1648,7 @@ break; case ZFS_PROP_COPIES: - if (zfs_earlier_version(name, - SPA_VERSION_DITTO_BLOCKS)) + if (zfs_earlier_version(name, SPA_VERSION_DITTO_BLOCKS)) return (ENOTSUP); break; @@ -1520,13 +1674,44 @@ zfs_prop_t prop = zfs_name_to_prop(propname); if (prop == ZPROP_INVAL) { - VERIFY(nvpair_value_string(elem, &strval) == 0); - error = dsl_prop_set(name, propname, 1, - strlen(strval) + 1, strval); - if (error == 0) - continue; - else - goto out; + if (zfs_prop_userquota(propname)) { + uint64_t *valary; + unsigned int vallen; + const char *domain; + zfs_userquota_prop_t type; + uint64_t rid; + uint64_t quota; + zfsvfs_t *zfsvfs; + + VERIFY(nvpair_value_uint64_array(elem, + &valary, &vallen) == 0); + VERIFY(vallen == 3); + type = valary[0]; + rid = valary[1]; + quota = valary[2]; + domain = propname + + strlen(zfs_userquota_prop_prefixes[type]); + + error = zfsvfs_hold(name, B_FALSE, FTAG, + &zfsvfs); + if (error == 0) { + error = zfs_set_userquota(zfsvfs, + type, domain, rid, quota); + zfsvfs_rele(zfsvfs, FTAG); + } + if (error == 0) + continue; + else + goto out; + } else if (zfs_prop_user(propname)) { + VERIFY(nvpair_value_string(elem, &strval) == 0); + error = dsl_prop_set(name, propname, 1, + strlen(strval) + 1, strval); + if (error == 0) + continue; + else + goto out; + } } switch (prop) { @@ -1570,10 +1755,26 @@ break; case ZFS_PROP_VERSION: - if ((error = nvpair_value_uint64(elem, &intval)) != 0 || - (error = zfs_set_version(name, intval)) != 0) + { + zfsvfs_t *zfsvfs; + + if ((error = nvpair_value_uint64(elem, &intval)) != 0) + goto out; + if ((error = zfsvfs_hold(name, B_FALSE, FTAG, + &zfsvfs)) != 0) + goto out; + error = zfs_set_version(zfsvfs, intval); + zfsvfs_rele(zfsvfs, FTAG); + + if (error == 0 && intval >= ZPL_VERSION_USERSPACE) { + zfs_cmd_t zc = { 0 }; + (void) strcpy(zc.zc_name, name); + (void) zfs_ioc_userspace_upgrade(&zc); + } + if (error) goto out; break; + } default: if (nvpair_type(elem) == DATA_TYPE_STRING) { @@ -1980,11 +2181,10 @@ * processing. */ static int -zfs_fill_zplprops_impl(objset_t *os, uint64_t default_zplver, +zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver, boolean_t fuids_ok, nvlist_t *createprops, nvlist_t *zplprops, boolean_t *is_ci) { - uint64_t zplver = default_zplver; uint64_t sense = ZFS_PROP_UNDEFINED; uint64_t norm = ZFS_PROP_UNDEFINED; uint64_t u8 = ZFS_PROP_UNDEFINED; @@ -2072,6 +2272,8 @@ ASSERT(cp != NULL); cp[0] = '\0'; + if (zfs_earlier_version(dataset, SPA_VERSION_USERSPACE)) + zplver = ZPL_VERSION_USERSPACE - 1; if (zfs_earlier_version(dataset, SPA_VERSION_FUID)) { zplver = ZPL_VERSION_FUID - 1; fuids_ok = B_FALSE; @@ -2255,6 +2457,7 @@ * zc_name name of filesystem * zc_value short name of snapshot * zc_cookie recursive flag + * zc_nvlist_src[_size] property list * * outputs: none */ @@ -2390,31 +2593,19 @@ if (error) return (error); - if (dmu_objset_type(os) == DMU_OST_ZFS) { - mutex_enter(&os->os->os_user_ptr_lock); - zfsvfs = dmu_objset_get_user(os); - if (zfsvfs != NULL) - VFS_HOLD(zfsvfs->z_vfs); - mutex_exit(&os->os->os_user_ptr_lock); - } - - if (zfsvfs != NULL) { - char *osname; + if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { int mode; - osname = kmem_alloc(MAXNAMELEN, KM_SLEEP); - error = zfs_suspend_fs(zfsvfs, osname, &mode); + error = zfs_suspend_fs(zfsvfs, NULL, &mode); if (error == 0) { int resume_err; - ASSERT(strcmp(osname, zc->zc_name) == 0); error = dmu_objset_rollback(os); - resume_err = zfs_resume_fs(zfsvfs, osname, mode); + resume_err = zfs_resume_fs(zfsvfs, zc->zc_name, mode); error = error ? error : resume_err; } else { dmu_objset_close(os); } - kmem_free(osname, MAXNAMELEN); VFS_RELE(zfsvfs->z_vfs); } else { error = dmu_objset_rollback(os); @@ -2529,32 +2720,26 @@ return (EBADF); } - if (dmu_objset_open(tofs, DMU_OST_ANY, - DS_MODE_USER | DS_MODE_READONLY, &os) == 0) { - /* - * Try to get the zfsvfs for the receiving objset. - * There won't be one if we're operating on a zvol, - * if the objset doesn't exist yet, or is not mounted. - */ - mutex_enter(&os->os->os_user_ptr_lock); - if (zfsvfs = dmu_objset_get_user(os)) { - if (!mutex_tryenter(&zfsvfs->z_online_recv_lock)) { - mutex_exit(&os->os->os_user_ptr_lock); - dmu_objset_close(os); - zfsvfs = NULL; - error = EBUSY; - goto out; - } - VFS_HOLD(zfsvfs->z_vfs); + if (getzfsvfs(tofs, &zfsvfs) == 0) { + if (!mutex_tryenter(&zfsvfs->z_online_recv_lock)) { + VFS_RELE(zfsvfs->z_vfs); + zfsvfs = NULL; + error = EBUSY; + goto out; } - mutex_exit(&os->os->os_user_ptr_lock); - /* * If new properties are supplied, they are to completely * replace the existing ones, so stash away the existing ones. */ if (props) - (void) dsl_prop_get_all(os, &origprops, TRUE); + (void) dsl_prop_get_all(zfsvfs->z_os, &origprops, TRUE); + } else if (props && dmu_objset_open(tofs, DMU_OST_ANY, + DS_MODE_USER | DS_MODE_READONLY, &os) == 0) { + /* + * Get the props even if there was no zfsvfs (zvol or + * unmounted zpl). + */ + (void) dsl_prop_get_all(os, &origprops, TRUE); dmu_objset_close(os); } @@ -2827,6 +3012,120 @@ } /* + * Retrieve a single {user|group}{used|quota}@... property. + * + * inputs: + * zc_name name of filesystem + * zc_objset_type zfs_userquota_prop_t + * zc_value domain name (eg. "S-1-234-567-89") + * zc_guid RID/UID/GID + * + * outputs: + * zc_cookie property value + */ +static int +zfs_ioc_userspace_one(zfs_cmd_t *zc) +{ + zfsvfs_t *zfsvfs; + int error; + + if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) + return (EINVAL); + + error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs); + if (error) + return (error); + + error = zfs_userspace_one(zfsvfs, + zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie); + zfsvfs_rele(zfsvfs, FTAG); + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * zc_cookie zap cursor + * zc_objset_type zfs_userquota_prop_t + * zc_nvlist_dst[_size] buffer to fill (not really an nvlist) + * + * outputs: + * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t) + * zc_cookie zap cursor + */ +static int +zfs_ioc_userspace_many(zfs_cmd_t *zc) +{ + zfsvfs_t *zfsvfs; + int error; + + error = zfsvfs_hold(zc->zc_name, B_TRUE, FTAG, &zfsvfs); + if (error) + return (error); + + int bufsize = zc->zc_nvlist_dst_size; + void *buf = kmem_alloc(bufsize, KM_SLEEP); + + error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie, + buf, &zc->zc_nvlist_dst_size); + + if (error == 0) { + error = xcopyout(buf, + (void *)(uintptr_t)zc->zc_nvlist_dst, + zc->zc_nvlist_dst_size); + } + kmem_free(buf, bufsize); + zfsvfs_rele(zfsvfs, FTAG); + + return (error); +} + +/* + * inputs: + * zc_name name of filesystem + * + * outputs: + * none + */ +static int +zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) +{ + objset_t *os; + int error; + zfsvfs_t *zfsvfs; + + if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { + if (!dmu_objset_userused_enabled(zfsvfs->z_os->os)) { + /* + * If userused is not enabled, it may be because the + * objset needs to be closed & reopened (to grow the + * objset_phys_t). Suspend/resume the fs will do that. + */ + int mode; + error = zfs_suspend_fs(zfsvfs, NULL, &mode); + if (error == 0) { + error = zfs_resume_fs(zfsvfs, + zc->zc_name, mode); + } + } + if (error == 0) + error = dmu_objset_userspace_upgrade(zfsvfs->z_os); + VFS_RELE(zfsvfs->z_vfs); + } else { + error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, + DS_MODE_USER, &os); + if (error) + return (error); + + error = dmu_objset_userspace_upgrade(os); + dmu_objset_close(os); + } + + return (error); +} + +/* * We don't want to have a hard dependency * against some special symbols in sharefs * nfs, and smbsrv. Determine them if needed when @@ -3206,27 +3505,24 @@ { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE, B_TRUE }, { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE, - B_FALSE } + B_FALSE }, + { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, + DATASET_NAME, B_FALSE, B_FALSE }, + { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, + DATASET_NAME, B_FALSE, B_FALSE }, + { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade, + DATASET_NAME, B_FALSE, B_TRUE }, }; int pool_status_check(const char *name, zfs_ioc_namecheck_t type) { spa_t *spa; - char pool[ZFS_MAXNAMELEN]; int error; ASSERT(type == POOL_NAME || type == DATASET_NAME); - (void) strlcpy(pool, name, ZFS_MAXNAMELEN); - if (type == DATASET_NAME) { - char *p; - - if ((p = strpbrk(pool, "/@")) != NULL) - *p = '\0'; - } - - error = spa_open(pool, &spa, FTAG); + error = spa_open(name, &spa, FTAG); if (error == 0) { if (spa_suspended(spa)) error = EAGAIN; @@ -3292,7 +3588,7 @@ rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t)); if (error == 0) { error = rc; - if (zfs_ioc_vec[vec].zvec_his_log == B_TRUE) + if (zfs_ioc_vec[vec].zvec_his_log) zfs_log_history(zc); }