Mercurial > illumos > illumos-gate
changeset 12633:9f2cda0ed938
PSARC 2010/144 lofi(7D) in non global zones
6354954 lofi support in non-global zones
6942891 prof_lookup_globaldev() leaks rootdir refs
6945005 lofiadm -a /dev/lofi/1: recursive mutex enter
6946486 lofi_ioctl() shouldn't allow disk ioctl()s on /dev/lofictl
line wrap: on
line diff
--- a/usr/src/cmd/zoneadmd/vplat.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/cmd/zoneadmd/vplat.c Wed Jun 16 10:02:44 2010 -0700 @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -4109,6 +4108,25 @@ return (Z_OK); } +static void +report_prop_err(zlog_t *zlogp, const char *name, const char *value, int res) +{ + switch (res) { + case Z_TOO_BIG: + zerror(zlogp, B_FALSE, "%s property value is too large.", name); + break; + + case Z_INVALID_PROPERTY: + zerror(zlogp, B_FALSE, "%s property value \"%s\" is not valid", + name, value); + break; + + default: + zerror(zlogp, B_TRUE, "fetching property %s: %d", name, res); + break; + } +} + /* * Sets the hostid of the new zone based on its configured value. The zone's * zone_t structure must already exist in kernel memory. 'zlogp' refers to the @@ -4119,57 +4137,81 @@ * This function returns zero on success and a nonzero error code on failure. */ static int -setup_zone_hostid(zlog_t *zlogp, char *zone_namep, zoneid_t zoneid) +setup_zone_hostid(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid) { int res; - zone_dochandle_t handle; char hostidp[HW_HOSTID_LEN]; unsigned int hostid; + res = zonecfg_get_hostid(handle, hostidp, sizeof (hostidp)); + + if (res == Z_BAD_PROPERTY) { + return (Z_OK); + } else if (res != Z_OK) { + report_prop_err(zlogp, "hostid", hostidp, res); + return (res); + } + + hostid = (unsigned int)strtoul(hostidp, NULL, 16); + if ((res = zone_setattr(zoneid, ZONE_ATTR_HOSTID, &hostid, + sizeof (hostid))) != 0) { + zerror(zlogp, B_TRUE, + "zone hostid is not valid: %s: %d", hostidp, res); + return (Z_SYSTEM); + } + + return (res); +} + +static int +setup_zone_fs_allowed(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid) +{ + char fsallowedp[ZONE_FS_ALLOWED_MAX]; + int res; + + res = zonecfg_get_fs_allowed(handle, fsallowedp, sizeof (fsallowedp)); + + if (res == Z_BAD_PROPERTY) { + return (Z_OK); + } else if (res != Z_OK) { + report_prop_err(zlogp, "fs-allowed", fsallowedp, res); + return (res); + } + + if (zone_setattr(zoneid, ZONE_ATTR_FS_ALLOWED, &fsallowedp, + sizeof (fsallowedp)) != 0) { + zerror(zlogp, B_TRUE, + "fs-allowed couldn't be set: %s: %d", fsallowedp, res); + return (Z_SYSTEM); + } + + return (res); +} + +static int +setup_zone_attrs(zlog_t *zlogp, char *zone_namep, zoneid_t zoneid) +{ + zone_dochandle_t handle; + int res = Z_OK; + if ((handle = zonecfg_init_handle()) == NULL) { zerror(zlogp, B_TRUE, "getting zone configuration handle"); return (Z_BAD_HANDLE); } if ((res = zonecfg_get_snapshot_handle(zone_namep, handle)) != Z_OK) { zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (res); - } - - if ((res = zonecfg_get_hostid(handle, hostidp, sizeof (hostidp))) == - Z_OK) { - if (zonecfg_valid_hostid(hostidp) != Z_OK) { - zerror(zlogp, B_FALSE, - "zone hostid is not valid: %s", hostidp); - zonecfg_fini_handle(handle); - return (Z_HOSTID_FUBAR); - } - hostid = (unsigned int)strtoul(hostidp, NULL, 16); - if (zone_setattr(zoneid, ZONE_ATTR_HOSTID, &hostid, - sizeof (hostid)) != 0) { - zerror(zlogp, B_TRUE, - "zone hostid is not valid: %s", hostidp); - zonecfg_fini_handle(handle); - return (Z_SYSTEM); - } - } else if (res != Z_BAD_PROPERTY) { - /* - * Z_BAD_PROPERTY is an acceptable error value (from - * zonecfg_get_hostid()) because it indicates that the zone - * doesn't have a hostid. - */ - if (res == Z_TOO_BIG) - zerror(zlogp, B_FALSE, "hostid string in zone " - "configuration is too large."); - else - zerror(zlogp, B_TRUE, "fetching zone hostid from " - "configuration"); - zonecfg_fini_handle(handle); - return (res); - } - + goto out; + } + + if ((res = setup_zone_hostid(handle, zlogp, zoneid)) != Z_OK) + goto out; + + if ((res = setup_zone_fs_allowed(handle, zlogp, zoneid)) != Z_OK) + goto out; + +out: zonecfg_fini_handle(handle); - return (Z_OK); + return (res); } zoneid_t @@ -4366,7 +4408,7 @@ struct brand_attr attr; char modname[MAXPATHLEN]; - if (setup_zone_hostid(zlogp, zone_name, zoneid) != Z_OK) + if (setup_zone_attrs(zlogp, zone_name, zoneid) != Z_OK) goto error; if ((bh = brand_open(brand_name)) == NULL) {
--- a/usr/src/cmd/zonecfg/zonecfg.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/cmd/zonecfg/zonecfg.c Wed Jun 16 10:02:44 2010 -0700 @@ -183,6 +183,7 @@ "capped-cpu", "hostid", "admin", + "fs-allowed", NULL }; @@ -227,6 +228,7 @@ "hostid", "user", "auths", + "fs-allowed", NULL }; @@ -342,6 +344,7 @@ "set " ALIAS_MAXSEMIDS "=", "set " ALIAS_SHARES "=", "set hostid=", + "set fs-allowed=", NULL }; @@ -373,6 +376,7 @@ "info cpu-shares", "info hostid", "info admin", + "info fs-allowed", NULL }; @@ -1219,6 +1223,8 @@ (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_HOSTID)); (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), + pt_to_str(PT_FS_ALLOWED)); + (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_MAXLWPS)); (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_MAXSHMMEM)); @@ -1706,6 +1712,7 @@ char sched[MAXNAMELEN]; char brand[MAXNAMELEN]; char hostidp[HW_HOSTID_LEN]; + char fsallowedp[ZONE_FS_ALLOWED_MAX]; char *limitpriv; FILE *of; boolean_t autoboot; @@ -1814,6 +1821,12 @@ pt_to_str(PT_HOSTID), hostidp); } + if (zonecfg_get_fs_allowed(handle, fsallowedp, + sizeof (fsallowedp)) == Z_OK) { + (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET), + pt_to_str(PT_FS_ALLOWED), fsallowedp); + } + if ((err = zonecfg_setipdent(handle)) != Z_OK) { zone_perror(zone, err, B_FALSE); goto done; @@ -2383,7 +2396,7 @@ return (global_zone && (type == RT_ZONENAME || type == RT_ZONEPATH || type == RT_AUTOBOOT || type == RT_LIMITPRIV || type == RT_BOOTARGS || type == RT_BRAND || type == RT_SCHED || - type == RT_IPTYPE || type == RT_HOSTID)); + type == RT_IPTYPE || type == RT_HOSTID || type == RT_FS_ALLOWED)); } static boolean_t @@ -2392,7 +2405,7 @@ return (global_zone && (type == PT_ZONENAME || type == PT_ZONEPATH || type == PT_AUTOBOOT || type == PT_LIMITPRIV || type == PT_BOOTARGS || type == PT_BRAND || type == PT_SCHED || - type == PT_IPTYPE || type == PT_HOSTID)); + type == PT_IPTYPE || type == PT_HOSTID || type == PT_FS_ALLOWED)); } void @@ -3679,6 +3692,12 @@ else need_to_commit = B_TRUE; return; + case PT_FS_ALLOWED: + if ((err = zonecfg_set_fs_allowed(handle, NULL)) != Z_OK) + z_cmd_rt_perror(CMD_CLEAR, RT_FS_ALLOWED, err, B_TRUE); + else + need_to_commit = B_TRUE; + return; default: zone_perror(pt_to_str(type), Z_NO_PROPERTY_TYPE, B_TRUE); long_usage(CMD_CLEAR, B_TRUE); @@ -4146,6 +4165,8 @@ res_type = RT_SHARES; } else if (prop_type == PT_HOSTID) { res_type = RT_HOSTID; + } else if (prop_type == PT_FS_ALLOWED) { + res_type = RT_FS_ALLOWED; } else { zerr(gettext("Cannot set a resource-specific property " "from the global scope.")); @@ -4361,6 +4382,12 @@ } need_to_commit = B_TRUE; return; + case RT_FS_ALLOWED: + if ((err = zonecfg_set_fs_allowed(handle, prop_id)) != Z_OK) + zone_perror(zone, err, B_TRUE); + else + need_to_commit = B_TRUE; + return; case RT_FS: switch (prop_type) { case PT_DIR: @@ -4896,15 +4923,33 @@ info_hostid(zone_dochandle_t handle, FILE *fp) { char hostidp[HW_HOSTID_LEN]; - - /* - * This will display "hostid: " if there isn't a hostid or an - * error occurs while retrieving the hostid from the configuration - * file. - */ - if (zonecfg_get_hostid(handle, hostidp, sizeof (hostidp)) != Z_OK) - hostidp[0] = '\0'; - (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_HOSTID), hostidp); + int err; + + if ((err = zonecfg_get_hostid(handle, hostidp, + sizeof (hostidp))) == Z_OK) { + (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_HOSTID), hostidp); + } else if (err == Z_BAD_PROPERTY) { + (void) fprintf(fp, "%s: \n", pt_to_str(PT_HOSTID)); + } else { + zone_perror(zone, err, B_TRUE); + } +} + +static void +info_fs_allowed(zone_dochandle_t handle, FILE *fp) +{ + char fsallowedp[ZONE_FS_ALLOWED_MAX]; + int err; + + if ((err = zonecfg_get_fs_allowed(handle, fsallowedp, + sizeof (fsallowedp))) == Z_OK) { + (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_FS_ALLOWED), + fsallowedp); + } else if (err == Z_BAD_PROPERTY) { + (void) fprintf(fp, "%s: \n", pt_to_str(PT_FS_ALLOWED)); + } else { + zone_perror(zone, err, B_TRUE); + } } static void @@ -5508,6 +5553,7 @@ info_sched(handle, fp); info_iptype(handle, fp); info_hostid(handle, fp); + info_fs_allowed(handle, fp); } info_aliased_rctl(handle, fp, ALIAS_MAXLWPS); info_aliased_rctl(handle, fp, ALIAS_MAXSHMMEM); @@ -5612,6 +5658,9 @@ case RT_ADMIN: info_auth(handle, fp, cmd); break; + case RT_FS_ALLOWED: + info_fs_allowed(handle, fp); + break; default: zone_perror(rt_to_str(cmd->cmd_res_type), Z_NO_RESOURCE_TYPE, B_TRUE); @@ -5751,6 +5800,7 @@ char sched[MAXNAMELEN]; char brand[MAXNAMELEN]; char hostidp[HW_HOSTID_LEN]; + char fsallowedp[ZONE_FS_ALLOWED_MAX]; int err, ret_val = Z_OK, arg; int pset_res; boolean_t save = B_FALSE; @@ -5825,9 +5875,17 @@ } (void) zonecfg_endipdent(handle); - if (zonecfg_get_hostid(handle, hostidp, sizeof (hostidp)) == Z_OK && - (err = zonecfg_valid_hostid(hostidp)) != Z_OK) { - zone_perror(zone, err, B_TRUE); + if (zonecfg_get_hostid(handle, hostidp, + sizeof (hostidp)) == Z_INVALID_PROPERTY) { + zerr(gettext("%s: invalid hostid: %s"), + zone, hostidp); + return; + } + + if (zonecfg_get_fs_allowed(handle, fsallowedp, + sizeof (fsallowedp)) == Z_INVALID_PROPERTY) { + zerr(gettext("%s: invalid fs-allowed: %s"), + zone, fsallowedp); return; }
--- a/usr/src/cmd/zonecfg/zonecfg.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/cmd/zonecfg/zonecfg.h Wed Jun 16 10:02:44 2010 -0700 @@ -89,9 +89,10 @@ #define RT_PCAP 25 #define RT_HOSTID 26 /* really a property, but for info ... */ #define RT_ADMIN 27 +#define RT_FS_ALLOWED 28 #define RT_MIN RT_UNKNOWN -#define RT_MAX RT_ADMIN +#define RT_MAX RT_FS_ALLOWED /* property types: increment PT_MAX when expanding this list */ #define PT_UNKNOWN 0 @@ -133,9 +134,10 @@ #define PT_HOSTID 36 #define PT_USER 37 #define PT_AUTHS 38 +#define PT_FS_ALLOWED 39 #define PT_MIN PT_UNKNOWN -#define PT_MAX PT_AUTHS +#define PT_MAX PT_FS_ALLOWED #define MAX_EQ_PROP_PAIRS 3
--- a/usr/src/cmd/zonecfg/zonecfg_grammar.y Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/cmd/zonecfg/zonecfg_grammar.y Wed Jun 16 10:02:44 2010 -0700 @@ -123,7 +123,7 @@ %token HELP CREATE EXPORT ADD DELETE REMOVE SELECT SET INFO CANCEL END VERIFY %token COMMIT REVERT EXIT SEMICOLON TOKEN ZONENAME ZONEPATH AUTOBOOT POOL NET %token FS IPD ATTR DEVICE RCTL SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL -%token IPTYPE HOSTID +%token IPTYPE HOSTID FS_ALLOWED %token NAME MATCH PRIV LIMIT ACTION VALUE EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET %token OPEN_PAREN CLOSE_PAREN COMMA DATASET LIMITPRIV BOOTARGS BRAND PSET PCAP %token MCAP NCPUS IMPORTANCE SHARES MAXLWPS MAXSHMMEM MAXSHMIDS MAXMSGIDS @@ -136,7 +136,7 @@ ADMIN %type <ival> property_name SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL NAME MATCH ZONENAME ZONEPATH AUTOBOOT POOL LIMITPRIV BOOTARGS VALUE PRIV LIMIT - ACTION BRAND SCHED IPTYPE DEFROUTER HOSTID USER AUTHS + ACTION BRAND SCHED IPTYPE DEFROUTER HOSTID USER AUTHS FS_ALLOWED %type <cmd> command %type <cmd> add_command ADD %type <cmd> cancel_command CANCEL @@ -616,6 +616,15 @@ $$->cmd_res_type = RT_HOSTID; $$->cmd_prop_nv_pairs = 0; } + | INFO FS_ALLOWED + { + if (($$ = alloc_cmd()) == NULL) + YYERROR; + cmd = $$; + $$->cmd_handler = &info_func; + $$->cmd_res_type = RT_FS_ALLOWED; + $$->cmd_prop_nv_pairs = 0; + } | INFO resource_type property_name EQUAL property_value { if (($$ = alloc_cmd()) == NULL) @@ -962,6 +971,7 @@ | HOSTID { $$ = PT_HOSTID; } | USER { $$ = PT_USER; } | AUTHS { $$ = PT_AUTHS; } + | FS_ALLOWED { $$ = PT_FS_ALLOWED; } /* * The grammar builds data structures from the bottom up. Thus various
--- a/usr/src/cmd/zonecfg/zonecfg_lex.l Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/cmd/zonecfg/zonecfg_lex.l Wed Jun 16 10:02:44 2010 -0700 @@ -281,6 +281,9 @@ <TSTATE>auths { return AUTHS; } <CSTATE>auths { return AUTHS; } +<TSTATE>fs-allowed { return FS_ALLOWED; } +<CSTATE>fs-allowed { return FS_ALLOWED; } + <TSTATE>= { return EQUAL; } <LSTATE>= { return EQUAL; } <CSTATE>= { return EQUAL; }
--- a/usr/src/head/libzonecfg.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/head/libzonecfg.h Wed Jun 16 10:02:44 2010 -0700 @@ -100,7 +100,7 @@ #define Z_NO_POOL 47 /* no such pool configured */ #define Z_POOL_CREATE 48 /* pool create failed */ #define Z_POOL_BIND 49 /* pool bind failed */ -#define Z_HOSTID_FUBAR 50 /* invalid hostid provided */ +#define Z_INVALID_PROPERTY 50 /* invalid property value */ /* * Warning: these are shared with the admin/install consolidation. @@ -362,7 +362,12 @@ */ extern int zonecfg_get_hostid(zone_dochandle_t, char *, size_t); extern int zonecfg_set_hostid(zone_dochandle_t, const char *); -extern int zonecfg_valid_hostid(const char *); + +/* + * Allowed FS mounts configuration. + */ +extern int zonecfg_get_fs_allowed(zone_dochandle_t, char *, size_t); +extern int zonecfg_set_fs_allowed(zone_dochandle_t, const char *); /* * Device configuration and rule matching.
--- a/usr/src/lib/brand/ipkg/zone/platform.xml Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/lib/brand/ipkg/zone/platform.xml Wed Jun 16 10:02:44 2010 -0700 @@ -20,8 +20,7 @@ CDDL HEADER END - Copyright 2010 Sun Microsystems, Inc. All rights reserved. - Use is subject to license terms. + Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. DO NOT EDIT THIS FILE. --> @@ -56,6 +55,8 @@ <device match="ipnet" /> <device match="kstat" /> <device match="lo0" /> + <device match="lofictl" /> + <device match="lofi" /> <device match="log" /> <device match="logindmux" /> <device match="nsmb" /> @@ -68,6 +69,7 @@ <device match="pts/*" /> <device match="random" /> <device match="rdsk" /> + <device match="rlofi" /> <device match="rmt" /> <device match="sad/user" /> <device match="svvslo0" />
--- a/usr/src/lib/brand/sn1/zone/platform.xml Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/lib/brand/sn1/zone/platform.xml Wed Jun 16 10:02:44 2010 -0700 @@ -20,8 +20,7 @@ CDDL HEADER END - Copyright 2009 Sun Microsystems, Inc. All rights reserved. - Use is subject to license terms. + Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. DO NOT EDIT THIS FILE. --> @@ -61,6 +60,8 @@ <device match="ipnet" /> <device match="kstat" /> <device match="lo0" /> + <device match="lofictl" /> + <device match="lofi" /> <device match="log" /> <device match="logindmux" /> <device match="nsmb" /> @@ -73,6 +74,7 @@ <device match="pts/*" /> <device match="random" /> <device match="rdsk" /> + <device match="rlofi" /> <device match="rmt" /> <device match="sad/user" /> <device match="svvslo0" />
--- a/usr/src/lib/brand/solaris10/s10_brand/common/s10_brand.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/lib/brand/solaris10/s10_brand/common/s10_brand.c Wed Jun 16 10:02:44 2010 -0700 @@ -56,6 +56,7 @@ #include <sys/mntio.h> #include <sys/mnttab.h> #include <sys/attr.h> +#include <sys/lofi.h> #include <atomic.h> #include <sys/acl.h> @@ -120,6 +121,41 @@ #define S10_UTS_VERSION "Generic_Virtual" /* + * If the ioctl fd's major doesn't match "major", then pass through the + * ioctl, since it is not the expected device. major should be a + * pointer to a static dev_t initialized to -1, and devname should be + * the path of the device. + * + * Returns 1 if the ioctl was handled (in which case *err contains the + * error code), or 0 if it still needs handling. + */ +static int +passthru_otherdev_ioctl(dev_t *majordev, const char *devname, int *err, + sysret_t *rval, int fdes, int cmd, intptr_t arg) +{ + struct stat sbuf; + + if (*majordev == (dev_t)-1) { + if ((*err = __systemcall(rval, SYS_fstatat + 1024, + AT_FDCWD, devname, &sbuf, 0) != 0) != 0) + goto doioctl; + + *majordev = major(sbuf.st_rdev); + } + + if ((*err = __systemcall(rval, SYS_fstatat + 1024, fdes, + NULL, &sbuf, 0)) != 0) + goto doioctl; + + if (major(sbuf.st_rdev) == *majordev) + return (0); + +doioctl: + *err = (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg)); + return (1); +} + +/* * Figures out the PID of init for the zone. Also returns a boolean * indicating whether this process currently has that pid: if so, * then at this moment, we are init. @@ -414,20 +450,10 @@ s10_crypto_get_function_list_t s10_param; crypto_get_function_list_t native_param; static dev_t crypto_dev = (dev_t)-1; - struct stat sbuf; - if (crypto_dev == (dev_t)-1) { - if ((err = __systemcall(rval, SYS_fstatat + 1024, - AT_FDCWD, "/dev/crypto", &sbuf, 0)) != 0) - goto nonemuioctl; - crypto_dev = major(sbuf.st_rdev); - } - if ((err = __systemcall(rval, SYS_fstatat + 1024, - fdes, NULL, &sbuf, 0)) != 0) + if (passthru_otherdev_ioctl(&crypto_dev, "/dev/crypto", &err, + rval, fdes, cmd, arg) == 1) return (err); - /* Each open fd of /dev/crypto gets a new minor device. */ - if (major(sbuf.st_rdev) != crypto_dev) - goto nonemuioctl; if (brand_uucopy((const void *)arg, &s10_param, sizeof (s10_param)) != 0) @@ -518,9 +544,6 @@ struct_assign(s10_param, native_param, fl_list.prov_hash_limit); return (brand_uucopy(&s10_param, (void *)arg, sizeof (s10_param))); - -nonemuioctl: - return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg)); } /* @@ -584,29 +607,72 @@ static int zfs_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg) { - dev_t zfs_dev; - struct stat sbuf; + static dev_t zfs_dev = (dev_t)-1; + int err; - /* - * See if the ioctl is targeting the ZFS device, /dev/zfs. - * If it isn't, then s10_ioctl() mistook the ioctl for a ZFS ioctl. - * In that case, we don't want to abort, so we pass it along to the - * kernel. - */ - if (__systemcall(rval, SYS_fstatat + 1024, AT_FDCWD, ZFS_DEV, &sbuf, 0) - != 0) - return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg)); - zfs_dev = major(sbuf.st_rdev); - - if (__systemcall(rval, SYS_fstatat + 1024, fdes, NULL, &sbuf, 0) != 0 || - major(sbuf.st_rdev) != zfs_dev) - return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg)); + if (passthru_otherdev_ioctl(&zfs_dev, ZFS_DEV, &err, + rval, fdes, cmd, arg) == 1) + return (err); brand_abort(0, "ZFS ioctl!"); /*NOTREACHED*/ return (0); } +struct s10_lofi_ioctl { + uint32_t li_minor; + boolean_t li_force; + char li_filename[MAXPATHLEN + 1]; +}; + +static int +lofi_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg) +{ + static dev_t lofi_dev = (dev_t)-1; + struct s10_lofi_ioctl s10_param; + struct lofi_ioctl native_param; + int err; + + if (passthru_otherdev_ioctl(&lofi_dev, "/dev/lofictl", &err, + rval, fdes, cmd, arg) == 1) + return (err); + + if (brand_uucopy((const void *)arg, &s10_param, + sizeof (s10_param)) != 0) + return (EFAULT); + + /* + * Somewhat weirdly, EIO is what the S10 lofi driver would + * return for unrecognised cmds. + */ + if (cmd >= LOFI_CHECK_COMPRESSED) + return (EIO); + + bzero(&native_param, sizeof (native_param)); + + struct_assign(native_param, s10_param, li_minor); + struct_assign(native_param, s10_param, li_force); + + /* + * Careful here, this has changed from [MAXPATHLEN + 1] to + * [MAXPATHLEN]. + */ + bcopy(s10_param.li_filename, native_param.li_filename, + sizeof (native_param.li_filename)); + native_param.li_filename[MAXPATHLEN - 1] = '\0'; + + err = __systemcall(rval, SYS_ioctl + 1024, fdes, cmd, &native_param); + + struct_assign(s10_param, native_param, li_minor); + /* li_force is input-only */ + + bcopy(native_param.li_filename, s10_param.li_filename, + sizeof (native_param.li_filename)); + + (void) brand_uucopy(&s10_param, (void *)arg, sizeof (s10_param)); + return (err); +} + int s10_ioctl(sysret_t *rval, int fdes, int cmd, intptr_t arg) { @@ -625,9 +691,17 @@ return (mntfs_ioctl(rval, fdes, cmd, arg)); } - if ((cmd & 0xff00) == ZFS_IOC) + switch (cmd & ~0xff) { + case ZFS_IOC: return (zfs_ioctl(rval, fdes, cmd, arg)); + case LOFI_IOC_BASE: + return (lofi_ioctl(rval, fdes, cmd, arg)); + + default: + break; + } + return (__systemcall(rval, SYS_ioctl + 1024, fdes, cmd, arg)); }
--- a/usr/src/lib/brand/solaris10/zone/platform.xml Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/lib/brand/solaris10/zone/platform.xml Wed Jun 16 10:02:44 2010 -0700 @@ -20,8 +20,7 @@ CDDL HEADER END - Copyright 2009 Sun Microsystems, Inc. All rights reserved. - Use is subject to license terms. + Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. DO NOT EDIT THIS FILE. --> @@ -73,6 +72,8 @@ <device match="ipnet" /> <device match="kstat" /> <device match="lo0" /> + <device match="lofictl" /> + <device match="lofi" /> <device match="log" /> <device match="logindmux" /> <device match="nsmb" /> @@ -85,6 +86,7 @@ <device match="pts/*" /> <device match="random" /> <device match="rdsk" /> + <device match="rlofi" /> <device match="rmt" /> <device match="sad/user" /> <device match="svvslo0" />
--- a/usr/src/lib/libzonecfg/common/libzonecfg.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/lib/libzonecfg/common/libzonecfg.c Wed Jun 16 10:02:44 2010 -0700 @@ -132,6 +132,7 @@ #define DTD_ATTR_HOSTID (const xmlChar *) "hostid" #define DTD_ATTR_USER (const xmlChar *) "user" #define DTD_ATTR_AUTHS (const xmlChar *) "auths" +#define DTD_ATTR_FS_ALLOWED (const xmlChar *) "fs-allowed" #define DTD_ENTITY_BOOLEAN "boolean" #define DTD_ENTITY_DEVPATH "devpath" @@ -2384,6 +2385,113 @@ } /* + * Must be a comma-separated list of alpha-numeric file system names. + */ +static int +zonecfg_valid_fs_allowed(const char *fsallowedp) +{ + char tmp[ZONE_FS_ALLOWED_MAX]; + char *cp = tmp; + char *p; + + if (strlen(fsallowedp) > ZONE_FS_ALLOWED_MAX) + return (Z_TOO_BIG); + + (void) strlcpy(tmp, fsallowedp, sizeof (tmp)); + + while (*cp != '\0') { + p = cp; + while (*p != '\0' && *p != ',') { + if (!isalnum(*p)) + return (Z_INVALID_PROPERTY); + p++; + } + + if (*p == ',') { + if (p == cp) + return (Z_INVALID_PROPERTY); + + p++; + + if (*p == '\0') + return (Z_INVALID_PROPERTY); + } + + cp = p; + } + + return (Z_OK); +} + +int +zonecfg_get_fs_allowed(zone_dochandle_t handle, char *bufp, size_t buflen) +{ + int err; + + if ((err = getrootattr(handle, DTD_ATTR_FS_ALLOWED, + bufp, buflen)) != Z_OK) + return (err); + if (bufp[0] == '\0') + return (Z_BAD_PROPERTY); + return (zonecfg_valid_fs_allowed(bufp)); +} + +int +zonecfg_set_fs_allowed(zone_dochandle_t handle, const char *bufp) +{ + int err; + + if (bufp == NULL || (err = zonecfg_valid_fs_allowed(bufp)) == Z_OK) + return (setrootattr(handle, DTD_ATTR_FS_ALLOWED, bufp)); + return (err); +} + +/* + * Determines if the specified string is a valid hostid string. This function + * returns Z_OK if the string is a valid hostid string. It returns Z_INVAL if + * 'hostidp' is NULL, Z_TOO_BIG if 'hostidp' refers to a string buffer + * containing a hex string with more than 8 digits, and Z_INVALID_PROPERTY if + * the string has an invalid format. + */ +static int +zonecfg_valid_hostid(const char *hostidp) +{ + char *currentp; + u_longlong_t hostidval; + size_t len; + + if (hostidp == NULL) + return (Z_INVAL); + + /* Empty strings and strings with whitespace are invalid. */ + if (*hostidp == '\0') + return (Z_INVALID_PROPERTY); + for (currentp = (char *)hostidp; *currentp != '\0'; ++currentp) { + if (isspace(*currentp)) + return (Z_INVALID_PROPERTY); + } + len = (size_t)(currentp - hostidp); + + /* + * The caller might pass a hostid that is larger than the maximum + * unsigned 32-bit integral value. Check for this! Also, make sure + * that the whole string is converted (this helps us find illegal + * characters) and that the whole string fits within a buffer of size + * HW_HOSTID_LEN. + */ + currentp = (char *)hostidp; + if (strncmp(hostidp, "0x", 2) == 0 || strncmp(hostidp, "0X", 2) == 0) + currentp += 2; + hostidval = strtoull(currentp, ¤tp, 16); + if ((size_t)(currentp - hostidp) >= HW_HOSTID_LEN) + return (Z_TOO_BIG); + if (hostidval > UINT_MAX || hostidval == HW_INVALID_HOSTID || + currentp != hostidp + len) + return (Z_INVALID_PROPERTY); + return (Z_OK); +} + +/* * Gets the zone hostid string stored in the specified zone configuration * document. This function returns Z_OK on success. Z_BAD_PROPERTY is returned * if the config file doesn't specify a hostid or if the hostid is blank. @@ -2399,7 +2507,7 @@ return (err); if (bufp[0] == '\0') return (Z_BAD_PROPERTY); - return (Z_OK); + return (zonecfg_valid_hostid(bufp)); } /* @@ -2422,51 +2530,6 @@ return (err); } -/* - * Determines if the specified string is a valid hostid string. This function - * returns Z_OK if the string is a valid hostid string. It returns Z_INVAL if - * 'hostidp' is NULL, Z_TOO_BIG if 'hostidp' refers to a string buffer - * containing a hex string with more than 8 digits, and Z_HOSTID_FUBAR if the - * string has an invalid format. - */ -int -zonecfg_valid_hostid(const char *hostidp) -{ - char *currentp; - u_longlong_t hostidval; - size_t len; - - if (hostidp == NULL) - return (Z_INVAL); - - /* Empty strings and strings with whitespace are invalid. */ - if (*hostidp == '\0') - return (Z_HOSTID_FUBAR); - for (currentp = (char *)hostidp; *currentp != '\0'; ++currentp) { - if (isspace(*currentp)) - return (Z_HOSTID_FUBAR); - } - len = (size_t)(currentp - hostidp); - - /* - * The caller might pass a hostid that is larger than the maximum - * unsigned 32-bit integral value. Check for this! Also, make sure - * that the whole string is converted (this helps us find illegal - * characters) and that the whole string fits within a buffer of size - * HW_HOSTID_LEN. - */ - currentp = (char *)hostidp; - if (strncmp(hostidp, "0x", 2) == 0 || strncmp(hostidp, "0X", 2) == 0) - currentp += 2; - hostidval = strtoull(currentp, ¤tp, 16); - if ((size_t)(currentp - hostidp) >= HW_HOSTID_LEN) - return (Z_TOO_BIG); - if (hostidval > UINT_MAX || hostidval == HW_INVALID_HOSTID || - currentp != hostidp + len) - return (Z_HOSTID_FUBAR); - return (Z_OK); -} - int zonecfg_lookup_dev(zone_dochandle_t handle, struct zone_devtab *tabptr) { @@ -3651,8 +3714,8 @@ "Could not create a temporary pool")); case Z_POOL_BIND: return (dgettext(TEXT_DOMAIN, "Could not bind zone to pool")); - case Z_HOSTID_FUBAR: - return (dgettext(TEXT_DOMAIN, "Specified hostid is invalid")); + case Z_INVALID_PROPERTY: + return (dgettext(TEXT_DOMAIN, "Specified property is invalid")); case Z_SYSTEM: return (strerror(errno)); default:
--- a/usr/src/lib/libzonecfg/common/mapfile-vers Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/lib/libzonecfg/common/mapfile-vers Wed Jun 16 10:02:44 2010 -0700 @@ -125,6 +125,7 @@ zonecfg_getdevperment; zonecfg_getdsent; zonecfg_getfsent; + zonecfg_get_fs_allowed; zonecfg_get_handle; zonecfg_get_hostid; zonecfg_getipdent; @@ -205,6 +206,7 @@ zonecfg_setdevperment; zonecfg_setdsent; zonecfg_setfsent; + zonecfg_set_fs_allowed; zonecfg_set_hostid; zonecfg_setipdent; zonecfg_set_iptype; @@ -224,7 +226,6 @@ zonecfg_valid_auths; zonecfg_valid_alias_limit; zonecfg_valid_fs_type; - zonecfg_valid_hostid; zonecfg_valid_importance; zonecfg_valid_memlimit; zonecfg_valid_ncpus;
--- a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 Wed Jun 16 10:02:44 2010 -0700 @@ -150,4 +150,5 @@ bootargs CDATA "" brand CDATA "" scheduling-class CDATA "" + fs-allowed CDATA "" version NMTOKEN #FIXED '1'>
--- a/usr/src/uts/common/fs/autofs/auto_vfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/autofs/auto_vfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -87,7 +87,7 @@ VFSDEF_VERSION, "autofs", autofs_init, - VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_STATS, + VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT, &auto_mntopts };
--- a/usr/src/uts/common/fs/ctfs/ctfs_root.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/ctfs/ctfs_root.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/modctl.h> #include <sys/types.h> #include <sys/param.h> @@ -111,7 +108,7 @@ VFSDEF_VERSION, "ctfs", ctfs_init, - VSW_HASPROTO, + VSW_HASPROTO|VSW_ZMOUNT, &ctfs_mntopts, }; @@ -241,10 +238,10 @@ */ vfsp->vfs_bsize = DEV_BSIZE; vfsp->vfs_fstype = ctfs_fstype; - do + do { dev = makedevice(ctfs_major, atomic_add_32_nv(&ctfs_minor, 1) & L_MAXMIN32); - while (vfs_devismounted(dev)); + } while (vfs_devismounted(dev)); vfs_make_fsid(&vfsp->vfs_fsid, dev, ctfs_fstype); vfsp->vfs_data = data; vfsp->vfs_dev = dev;
--- a/usr/src/uts/common/fs/dcfs/dc_vnops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/dcfs/dc_vnops.c Wed Jun 16 10:02:44 2010 -0700 @@ -20,8 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -159,7 +158,7 @@ VFSDEF_VERSION, "dcfs", dcinit, - 0, + VSW_ZMOUNT, NULL };
--- a/usr/src/uts/common/fs/dev/sdev_profile.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/dev/sdev_profile.c Wed Jun 16 10:02:44 2010 -0700 @@ -20,12 +20,9 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * This file implements /dev filesystem operations for non-global * instances. Three major entry points: @@ -245,9 +242,6 @@ prof_lookup_globaldev(struct sdev_node *dir, struct sdev_node *gdir, char *name, char *rename) { - /* global OS rootdir */ - extern vnode_t *rootdir; - int error; struct vnode *avp, *gdv, *gddv; struct sdev_node *newdv; @@ -269,7 +263,6 @@ /* perform a relative lookup of the global /dev instance */ gddv = SDEVTOV(gdir); VN_HOLD(gddv); - VN_HOLD(rootdir); error = lookuppnvp(&pn, NULL, FOLLOW, NULLVPP, &gdv, rootdir, gddv, kcred); pn_free(&pn); @@ -528,25 +521,96 @@ kmem_free(dbuf, dlen); } +/* + * Last chance for a zone to see a node. If our parent dir is + * SDEV_ZONED, then we look up the "zone" property for the node. If the + * property is found and matches the current zone name, we allow it. + * Note that this isn't quite correct for the global zone peeking inside + * a zone's /dev - for that to work, we'd have to have a per-dev-mount + * zone ref squirreled away. + */ static int -prof_make_name(char *nm, void *arg) +prof_zone_matched(char *name, struct sdev_node *dir) +{ + vnode_t *gvn = SDEVTOV(dir->sdev_origin); + struct pathname pn; + vnode_t *vn = NULL; + char zonename[ZONENAME_MAX]; + int znlen = ZONENAME_MAX; + int ret; + + ASSERT((dir->sdev_flags & SDEV_ZONED) != 0); + + sdcmn_err10(("sdev_node %p is zoned, looking for %s\n", + (void *)dir, name)); + + if (pn_get(name, UIO_SYSSPACE, &pn)) + return (0); + + VN_HOLD(gvn); + + ret = lookuppnvp(&pn, NULL, FOLLOW, NULLVPP, &vn, rootdir, gvn, kcred); + + pn_free(&pn); + + if (ret != 0) { + sdcmn_err10(("prof_zone_matched: %s not found\n", name)); + return (0); + } + + /* + * VBLK doesn't matter, and the property name is in fact treated + * as a const char *. + */ + ret = e_ddi_getlongprop_buf(vn->v_rdev, VBLK, (char *)"zone", + DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, (caddr_t)zonename, &znlen); + + VN_RELE(vn); + + if (ret == DDI_PROP_NOT_FOUND) { + sdcmn_err10(("vnode %p: no zone prop\n", (void *)vn)); + return (0); + } else if (ret != DDI_PROP_SUCCESS) { + sdcmn_err10(("vnode %p: zone prop error: %d\n", + (void *)vn, ret)); + return (0); + } + + sdcmn_err10(("vnode %p zone prop: %s\n", (void *)vn, zonename)); + return (strcmp(zonename, curproc->p_zone->zone_name) == 0); +} + +static int +prof_make_name_glob(char *nm, void *arg) { struct sdev_node *ddv = (struct sdev_node *)arg; if (prof_name_matched(nm, ddv)) prof_lookup_globaldev(ddv, ddv->sdev_origin, nm, nm); + + return (WALK_DIR_CONTINUE); +} + +static int +prof_make_name_zone(char *nm, void *arg) +{ + struct sdev_node *ddv = (struct sdev_node *)arg; + + if (prof_zone_matched(nm, ddv)) + prof_lookup_globaldev(ddv, ddv->sdev_origin, nm, nm); + return (WALK_DIR_CONTINUE); } static void -prof_make_names_glob(struct sdev_node *ddv) +prof_make_names_walk(struct sdev_node *ddv, int (*cb)(char *, void *)) { struct sdev_node *gdir; gdir = ddv->sdev_origin; if (gdir == NULL) return; - walk_dir(SDEVTOV(gdir), (void *)ddv, prof_make_name); + walk_dir(SDEVTOV(gdir), (void *)ddv, cb); } static void @@ -559,11 +623,14 @@ ASSERT(RW_WRITE_HELD(&dir->sdev_contents)); + if ((dir->sdev_flags & SDEV_ZONED) != 0) + prof_make_names_walk(dir, prof_make_name_zone); + if (nvl == NULL) return; if (dir->sdev_prof.has_glob) { - prof_make_names_glob(dir); + prof_make_names_walk(dir, prof_make_name_glob); return; }
--- a/usr/src/uts/common/fs/dev/sdev_subr.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/dev/sdev_subr.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -548,6 +547,9 @@ { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, + { "lofi", NULL, NULL, NULL, NULL, SDEV_ZONED }, + { "rlofi", NULL, NULL, NULL, NULL, SDEV_ZONED }, + { NULL, NULL, NULL, NULL, NULL, 0} };
--- a/usr/src/uts/common/fs/fd/fdops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/fd/fdops.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All rights reserved. */ @@ -549,7 +546,7 @@ VFSDEF_VERSION, "fd", fdinit, - VSW_HASPROTO, + VSW_HASPROTO | VSW_ZMOUNT, &fdfs_mntopts };
--- a/usr/src/uts/common/fs/fifofs/fifosubr.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/fifofs/fifosubr.c Wed Jun 16 10:02:44 2010 -0700 @@ -21,12 +21,9 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * The routines defined in this file are supporting routines for FIFOFS * file system type. @@ -80,7 +77,7 @@ VFSDEF_VERSION, "fifofs", fifoinit, - 0, + VSW_ZMOUNT, NULL };
--- a/usr/src/uts/common/fs/hsfs/hsfs_vfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/hsfs/hsfs_vfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * VFS operations for High Sierra filesystem */ @@ -151,7 +148,7 @@ "hsfs", hsfsinit, /* We don't suppport remounting */ - VSW_HASPROTO|VSW_STATS|VSW_CANLOFI, + VSW_HASPROTO|VSW_STATS|VSW_CANLOFI|VSW_ZMOUNT, &hsfs_proto_opttbl };
--- a/usr/src/uts/common/fs/lofs/lofs_vfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/lofs/lofs_vfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -58,7 +58,7 @@ VFSDEF_VERSION, "lofs", lofsinit, - VSW_HASPROTO|VSW_STATS, + VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT, &lofs_mntopts };
--- a/usr/src/uts/common/fs/mntfs/mntvfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/mntfs/mntvfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -66,7 +65,7 @@ VFSDEF_VERSION, "mntfs", mntinit, - VSW_HASPROTO|VSW_STATS, + VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT, &mnt_mntopts };
--- a/usr/src/uts/common/fs/namefs/namevfs.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/namefs/namevfs.c Wed Jun 16 10:02:44 2010 -0700 @@ -730,7 +730,7 @@ VFSDEF_VERSION, "namefs", nameinit, - VSW_HASPROTO, + VSW_HASPROTO | VSW_ZMOUNT, &nm_mntopts };
--- a/usr/src/uts/common/fs/nfs/nfs4_common.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/nfs/nfs4_common.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -49,7 +48,7 @@ VFSDEF_VERSION, "nfs4", nfs4init, - VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, + VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, NULL };
--- a/usr/src/uts/common/fs/nfs/nfs_common.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/nfs/nfs_common.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -28,8 +27,6 @@ * All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/errno.h> #include <sys/param.h> #include <sys/types.h> @@ -125,7 +122,7 @@ VFSDEF_VERSION, "nfsdyn", nfsdyninit, - 0, + VSW_ZMOUNT, NULL }; @@ -142,7 +139,7 @@ VFSDEF_VERSION, "nfs", nfsinit, - VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, + VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, NULL }; @@ -159,7 +156,7 @@ VFSDEF_VERSION, "nfs3", nfs3init, - VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, + VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS|VSW_ZMOUNT, NULL }; @@ -410,10 +407,10 @@ vfsflags = 0; if (error = mount_root(*name ? name : "root", root_path, NFS_V4, - &args, &vfsflags)) { + &args, &vfsflags)) { if (error != EPROTONOSUPPORT) { nfs_cmn_err(error, CE_WARN, - "Unable to mount NFS root filesystem: %m"); + "Unable to mount NFS root filesystem: %m"); sv_free(svp); pn_free(&pn); vfs_setops(vfsp, nfsdyn_vfsops); @@ -432,7 +429,7 @@ vfsflags = 0; if (error = mount_root(*name ? name : "root", root_path, - NFS_V3, &args, &vfsflags)) { + NFS_V3, &args, &vfsflags)) { if (error != EPROTONOSUPPORT) { nfs_cmn_err(error, CE_WARN, "Unable to mount NFS root filesystem: %m"); @@ -455,8 +452,7 @@ vfs_setops(vfsp, nfs_vfsops); if (error = mount_root(*name ? name : "root", - root_path, NFS_VERSION, &args, - &vfsflags)) { + root_path, NFS_VERSION, &args, &vfsflags)) { nfs_cmn_err(error, CE_WARN, "Unable to mount NFS root filesystem: %m"); sv_free(svp);
--- a/usr/src/uts/common/fs/objfs/objfs_vfs.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/objfs/objfs_vfs.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/atomic.h> #include <sys/cmn_err.h> #include <sys/errno.h> @@ -76,7 +73,7 @@ VFSDEF_VERSION, "objfs", objfs_init, - VSW_HASPROTO, + VSW_HASPROTO | VSW_ZMOUNT, &objfs_mntopts, };
--- a/usr/src/uts/common/fs/proc/prvfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/proc/prvfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,16 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.25 */ - #include <sys/types.h> #include <sys/param.h> #include <sys/cmn_err.h> @@ -72,7 +69,7 @@ VFSDEF_VERSION, "proc", prinit, - VSW_HASPROTO|VSW_STATS|VSW_XID, + VSW_HASPROTO|VSW_STATS|VSW_XID|VSW_ZMOUNT, &proc_mntopts };
--- a/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/sharefs/sharefs_vfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -20,12 +20,9 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/atomic.h> #include <sys/cmn_err.h> #include <sys/errno.h> @@ -94,7 +91,7 @@ VFSDEF_VERSION, "sharefs", sharefs_init, - VSW_HASPROTO, + VSW_HASPROTO | VSW_ZMOUNT, &sharefs_mntopts, };
--- a/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -33,8 +33,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/systm.h> @@ -116,7 +115,7 @@ VFSDEF_VERSION, (char *)fs_type_name, smbfsinit, /* init routine */ - VSW_HASPROTO|VSW_NOTZONESAFE, /* flags */ + VSW_HASPROTO|VSW_NOTZONESAFE|VSW_ZMOUNT, /* flags */ &smbfs_mntopts /* mount options table prototype */ };
--- a/usr/src/uts/common/fs/sockfs/sockvfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/sockfs/sockvfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,9 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/t_lock.h> #include <sys/param.h> @@ -59,7 +55,7 @@ VFSDEF_VERSION, "sockfs", sockinit, - 0, + VSW_ZMOUNT, NULL };
--- a/usr/src/uts/common/fs/specfs/specvfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/specfs/specvfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -37,8 +36,6 @@ */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/t_lock.h> #include <sys/param.h> @@ -64,7 +61,7 @@ VFSDEF_VERSION, "specfs", specinit, - 0, + VSW_ZMOUNT, NULL };
--- a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -77,7 +76,7 @@ VFSDEF_VERSION, "tmpfs", tmpfsinit, - VSW_HASPROTO|VSW_STATS, + VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT, &tmpfs_proto_opttbl };
--- a/usr/src/uts/common/fs/vfs.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/vfs.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -1014,8 +1013,7 @@ int minor; int err = 0; - if (fsname == NULL || - (vfssw = vfs_getvfssw(fsname)) == NULL) + if ((vfssw = vfs_getvfssw(fsname)) == NULL) return (0); if (!(vfssw->vsw_flag & VSW_CANLOFI)) { @@ -1049,29 +1047,16 @@ li = kmem_zalloc(sizeof (*li), KM_SLEEP); (void) strlcpy(li->li_filename, pn.pn_path, MAXPATHLEN); - /* - * The lofi control node is currently exclusive-open. We'd like - * to improve this, but in the meantime, we'll loop waiting for - * access. - */ - for (;;) { - err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL, - kcred, &ldi_hdl, ldi_id); - - if (err != EBUSY) - break; - - if ((err = delay_sig(hz / 8)) == EINTR) - break; - } + err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE, kcred, + &ldi_hdl, ldi_id); if (err) goto out2; err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, - FREAD | FWRITE | FEXCL | FKIOCTL, kcred, &minor); - - (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred); + FREAD | FWRITE | FKIOCTL, kcred, &minor); + + (void) ldi_close(ldi_hdl, FREAD | FWRITE, kcred); if (!err) vfsp->vfs_lofi_minor = minor; @@ -1104,18 +1089,16 @@ li->li_minor = vfsp->vfs_lofi_minor; li->li_cleanup = B_TRUE; - do { - err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE | FEXCL, - kcred, &ldi_hdl, ldi_id); - } while (err == EBUSY); + err = ldi_open_by_name("/dev/lofictl", FREAD | FWRITE, kcred, + &ldi_hdl, ldi_id); if (err) goto out; err = ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE_MINOR, (intptr_t)li, - FREAD | FWRITE | FEXCL | FKIOCTL, kcred, NULL); - - (void) ldi_close(ldi_hdl, FREAD | FWRITE | FEXCL, kcred); + FREAD | FWRITE | FKIOCTL, kcred, NULL); + + (void) ldi_close(ldi_hdl, FREAD | FWRITE, kcred); if (!err) vfsp->vfs_lofi_minor = 0; @@ -1251,9 +1234,16 @@ } else { if ((vswp = vfs_getvfsswbyvfsops(vfs_getops(rootvfs))) == NULL) return (EINVAL); + fsname = vswp->vsw_name; } if (!VFS_INSTALLED(vswp)) return (EINVAL); + + if ((error = secpolicy_fs_allowed_mount(fsname)) != 0) { + vfs_unrefvfssw(vswp); + return (error); + } + vfsops = &vswp->vsw_vfsops; vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts); @@ -4782,7 +4772,7 @@ } } -#define LOFICTL_PATH "/devices/pseudo/lofi@0:%d" +#define LOFINODE_PATH "/dev/lofi/%d" /* * Return the vnode for the lofi node if there's a lofi mount in place. @@ -4801,11 +4791,23 @@ return (-1); } - strsize = snprintf(NULL, 0, LOFICTL_PATH, vfsp->vfs_lofi_minor); + strsize = snprintf(NULL, 0, LOFINODE_PATH, vfsp->vfs_lofi_minor); path = kmem_alloc(strsize + 1, KM_SLEEP); - (void) snprintf(path, strsize + 1, LOFICTL_PATH, vfsp->vfs_lofi_minor); - - err = lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, vpp); + (void) snprintf(path, strsize + 1, LOFINODE_PATH, vfsp->vfs_lofi_minor); + + /* + * We may be inside a zone, so we need to use the /dev path, but + * it's created asynchronously, so we wait here. + */ + for (;;) { + err = lookupname(path, UIO_SYSSPACE, FOLLOW, NULLVPP, vpp); + + if (err != ENOENT) + break; + + if ((err = delay_sig(hz / 8)) == EINTR) + break; + } if (err) *vpp = NULL;
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c Wed Jun 16 10:02:44 2010 -0700 @@ -2277,7 +2277,7 @@ MNTTYPE_ZFS, zfs_vfsinit, VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS| - VSW_XID, + VSW_XID|VSW_ZMOUNT, &zfs_mntopts };
--- a/usr/src/uts/common/io/lofi.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/io/lofi.c Wed Jun 16 10:02:44 2010 -0700 @@ -76,12 +76,6 @@ * enable direct I/O on the underlying file. Don't, because that deadlocks. * I think to fix the cache-twice problem we might need filesystem support. * - * lofi on itself. The simple lock strategy (lofi_lock) precludes this - * because you'll be in lofi_ioctl, holding the lock when you open the - * file, which, if it's lofi, will grab lofi_lock. We prevent this for - * now, though not using ddi_soft_state(9F) would make it possible to - * do. Though it would still be silly. - * * Interesting things to do: * * Allow multiple files for each device. A poor-man's metadisk, basically. @@ -129,8 +123,11 @@ #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/zmod.h> +#include <sys/id_space.h> +#include <sys/mkdev.h> #include <sys/crypto/common.h> #include <sys/crypto/api.h> +#include <sys/rctl.h> #include <LzmaDec.h> /* @@ -144,6 +141,7 @@ #define NBLOCKS_PROP_NAME "Nblocks" #define SIZE_PROP_NAME "Size" +#define ZONE_PROP_NAME "zone" #define SETUP_C_DATA(cd, buf, len) \ (cd).cd_format = CRYPTO_DATA_RAW; \ @@ -162,6 +160,9 @@ static dev_info_t *lofi_dip = NULL; static void *lofi_statep = NULL; static kmutex_t lofi_lock; /* state lock */ +static id_space_t *lofi_minor_id; +static list_t lofi_list; +static zone_key_t lofi_zone_key; /* * Because lofi_taskq_nthreads limits the actual swamping of the device, the @@ -178,7 +179,6 @@ static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ -uint32_t lofi_max_files = LOFI_MAX_FILES; const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC; /* @@ -244,36 +244,16 @@ } static int -lofi_busy(void) -{ - minor_t minor; - - /* - * We need to make sure no mappings exist - mod_remove won't - * help because the device isn't open. - */ - mutex_enter(&lofi_lock); - for (minor = 1; minor <= lofi_max_files; minor++) { - if (ddi_get_soft_state(lofi_statep, minor) != NULL) { - mutex_exit(&lofi_lock); - return (EBUSY); - } - } - mutex_exit(&lofi_lock); - return (0); -} - -static int is_opened(struct lofi_state *lsp) { - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); return (lsp->ls_chr_open || lsp->ls_blk_open || lsp->ls_lyr_open_count); } static int mark_opened(struct lofi_state *lsp, int otyp) { - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); switch (otyp) { case OTYP_CHR: lsp->ls_chr_open = 1; @@ -293,7 +273,7 @@ static void mark_closed(struct lofi_state *lsp, int otyp) { - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); switch (otyp) { case OTYP_CHR: lsp->ls_chr_open = 0; @@ -312,19 +292,21 @@ static void lofi_free_crypto(struct lofi_state *lsp) { - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); if (lsp->ls_crypto_enabled) { /* * Clean up the crypto state so that it doesn't hang around * in memory after we are done with it. */ - bzero(lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - kmem_free(lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - lsp->ls_key.ck_data = NULL; - lsp->ls_key.ck_length = 0; + if (lsp->ls_key.ck_data != NULL) { + bzero(lsp->ls_key.ck_data, + CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); + kmem_free(lsp->ls_key.ck_data, + CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); + lsp->ls_key.ck_data = NULL; + lsp->ls_key.ck_length = 0; + } if (lsp->ls_mech.cm_param != NULL) { kmem_free(lsp->ls_mech.cm_param, @@ -345,52 +327,17 @@ } static void -lofi_free_handle(dev_t dev, minor_t minor, struct lofi_state *lsp, - cred_t *credp) +lofi_destroy(struct lofi_state *lsp, cred_t *credp) { - dev_t newdev; - char namebuf[50]; - int i; + minor_t minor = getminor(lsp->ls_dev); + int i; - ASSERT(mutex_owned(&lofi_lock)); + ASSERT(MUTEX_HELD(&lofi_lock)); + + list_remove(&lofi_list, lsp); lofi_free_crypto(lsp); - if (lsp->ls_vp) { - (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, - 1, 0, credp, NULL); - VN_RELE(lsp->ls_vp); - lsp->ls_vp = NULL; - } - - newdev = makedevice(getmajor(dev), minor); - (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); - (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); - - (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); - ddi_remove_minor_node(lofi_dip, namebuf); - (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); - ddi_remove_minor_node(lofi_dip, namebuf); - - kmem_free(lsp->ls_filename, lsp->ls_filename_sz); - taskq_destroy(lsp->ls_taskq); - if (lsp->ls_kstat) { - kstat_delete(lsp->ls_kstat); - mutex_destroy(&lsp->ls_kstat_lock); - } - - /* - * Free cached decompressed segment data - */ - lofi_free_comp_cache(lsp); - list_destroy(&lsp->ls_comp_cache); - mutex_destroy(&lsp->ls_comp_cache_lock); - - if (lsp->ls_uncomp_seg_sz > 0) { - kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); - lsp->ls_uncomp_seg_sz = 0; - } - /* * Free pre-allocated compressed buffers */ @@ -402,12 +349,93 @@ } kmem_free(lsp->ls_comp_bufs, sizeof (struct compbuf) * lofi_taskq_nthreads); - mutex_destroy(&lsp->ls_comp_bufs_lock); + } + + (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, + 1, 0, credp, NULL); + VN_RELE(lsp->ls_vp); + if (lsp->ls_stacked_vp != lsp->ls_vp) + VN_RELE(lsp->ls_stacked_vp); + + taskq_destroy(lsp->ls_taskq); + + if (lsp->ls_kstat != NULL) + kstat_delete(lsp->ls_kstat); + + /* + * Free cached decompressed segment data + */ + lofi_free_comp_cache(lsp); + list_destroy(&lsp->ls_comp_cache); + + if (lsp->ls_uncomp_seg_sz > 0) { + kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); + lsp->ls_uncomp_seg_sz = 0; } + rctl_decr_lofi(lsp->ls_zone, 1); + zone_rele(lsp->ls_zone); + + mutex_destroy(&lsp->ls_comp_cache_lock); + mutex_destroy(&lsp->ls_comp_bufs_lock); + mutex_destroy(&lsp->ls_kstat_lock); mutex_destroy(&lsp->ls_vp_lock); + ASSERT(ddi_get_soft_state(lofi_statep, minor) == lsp); ddi_soft_state_free(lofi_statep, minor); + id_free(lofi_minor_id, minor); +} + +static void +lofi_free_dev(dev_t dev) +{ + minor_t minor = getminor(dev); + char namebuf[50]; + + ASSERT(MUTEX_HELD(&lofi_lock)); + + (void) ddi_prop_remove(dev, lofi_dip, ZONE_PROP_NAME); + (void) ddi_prop_remove(dev, lofi_dip, SIZE_PROP_NAME); + (void) ddi_prop_remove(dev, lofi_dip, NBLOCKS_PROP_NAME); + + (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); + ddi_remove_minor_node(lofi_dip, namebuf); + (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); + ddi_remove_minor_node(lofi_dip, namebuf); +} + +/*ARGSUSED*/ +static void +lofi_zone_shutdown(zoneid_t zoneid, void *arg) +{ + struct lofi_state *lsp; + struct lofi_state *next; + + mutex_enter(&lofi_lock); + + for (lsp = list_head(&lofi_list); lsp != NULL; lsp = next) { + + /* lofi_destroy() frees lsp */ + next = list_next(&lofi_list, lsp); + + if (lsp->ls_zone->zone_id != zoneid) + continue; + + /* + * No in-zone processes are running, but something has this + * open. It's either a global zone process, or a lofi + * mount. In either case we set ls_cleanup so the last + * user destroys the device. + */ + if (is_opened(lsp)) { + lsp->ls_cleanup = 1; + } else { + lofi_free_dev(lsp->ls_dev); + lofi_destroy(lsp, kcred); + } + } + + mutex_exit(&lofi_lock); } /*ARGSUSED*/ @@ -417,25 +445,18 @@ minor_t minor; struct lofi_state *lsp; + /* + * lofiadm -a /dev/lofi/1 gets us here. + */ + if (mutex_owner(&lofi_lock) == curthread) + return (EINVAL); + mutex_enter(&lofi_lock); + minor = getminor(*devp); + + /* master control device */ if (minor == 0) { - /* master control device */ - /* must be opened exclusively */ - if (((flag & FEXCL) != FEXCL) || (otyp != OTYP_CHR)) { - mutex_exit(&lofi_lock); - return (EINVAL); - } - lsp = ddi_get_soft_state(lofi_statep, 0); - if (lsp == NULL) { - mutex_exit(&lofi_lock); - return (ENXIO); - } - if (is_opened(lsp)) { - mutex_exit(&lofi_lock); - return (EBUSY); - } - (void) mark_opened(lsp, OTYP_CHR); mutex_exit(&lofi_lock); return (0); } @@ -475,6 +496,12 @@ mutex_exit(&lofi_lock); return (EINVAL); } + + if (minor == 0) { + mutex_exit(&lofi_lock); + return (0); + } + mark_closed(lsp, otyp); /* @@ -482,9 +509,10 @@ * asked for cleanup (li_cleanup), finish up if we're the last * out of the door. */ - if (minor != 0 && !is_opened(lsp) && - (lsp->ls_cleanup || lsp->ls_vp == NULL)) - lofi_free_handle(dev, minor, lsp, credp); + if (!is_opened(lsp) && (lsp->ls_cleanup || lsp->ls_vp == NULL)) { + lofi_free_dev(dev); + lofi_destroy(lsp, credp); + } mutex_exit(&lofi_lock); return (0); @@ -508,7 +536,7 @@ void *data; size_t datasz; - ASSERT(mutex_owned(&lsp->ls_crypto_lock)); + ASSERT(MUTEX_HELD(&lsp->ls_crypto_lock)); if (lsp == NULL) return (CRYPTO_DEVICE_ERROR); @@ -843,7 +871,7 @@ { struct lofi_comp_cache *lc; - ASSERT(mutex_owned(&lsp->ls_comp_cache_lock)); + ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); for (lc = list_head(&lsp->ls_comp_cache); lc != NULL; lc = list_next(&lsp->ls_comp_cache, lc)) { @@ -877,7 +905,7 @@ { struct lofi_comp_cache *lc; - ASSERT(mutex_owned(&lsp->ls_comp_cache_lock)); + ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); while (lsp->ls_comp_cache_count > lofi_max_comp_cache) { lc = list_remove_tail(&lsp->ls_comp_cache); @@ -1443,14 +1471,22 @@ if (cmd != DDI_ATTACH) return (DDI_FAILURE); + + lofi_minor_id = id_space_create("lofi_minor_id", 1, L_MAXMIN32 + 1); + + if (!lofi_minor_id) + return (DDI_FAILURE); + error = ddi_soft_state_zalloc(lofi_statep, 0); if (error == DDI_FAILURE) { + id_space_destroy(lofi_minor_id); return (DDI_FAILURE); } error = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, DDI_PSEUDO, NULL); if (error == DDI_FAILURE) { ddi_soft_state_free(lofi_statep, 0); + id_space_destroy(lofi_minor_id); return (DDI_FAILURE); } /* driver handles kernel-issued IOCTLs */ @@ -1458,8 +1494,12 @@ DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { ddi_remove_minor_node(dip, NULL); ddi_soft_state_free(lofi_statep, 0); + id_space_destroy(lofi_minor_id); return (DDI_FAILURE); } + + zone_key_create(&lofi_zone_key, NULL, lofi_zone_shutdown, NULL); + lofi_dip = dip; ddi_report_dev(dip); return (DDI_SUCCESS); @@ -1470,12 +1510,27 @@ { if (cmd != DDI_DETACH) return (DDI_FAILURE); - if (lofi_busy()) + + mutex_enter(&lofi_lock); + + if (!list_is_empty(&lofi_list)) { + mutex_exit(&lofi_lock); return (DDI_FAILURE); + } + lofi_dip = NULL; ddi_remove_minor_node(dip, NULL); ddi_prop_remove_all(dip); + + mutex_exit(&lofi_lock); + + if (zone_key_delete(lofi_zone_key) != 0) + cmn_err(CE_WARN, "failed to delete zone key"); + ddi_soft_state_free(lofi_statep, 0); + + id_space_destroy(lofi_minor_id); + return (DDI_SUCCESS); } @@ -1496,30 +1551,34 @@ * These two just simplify the rest of the ioctls that need to copyin/out * the lofi_ioctl structure. */ -struct lofi_ioctl * -copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, int flag) +int +copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, struct lofi_ioctl **klipp, + int flag) { struct lofi_ioctl *klip; int error; - klip = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); + klip = *klipp = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); - if (error) { - free_lofi_ioctl(klip); - return (NULL); + if (error) + goto err; + + /* ensure NULL termination */ + klip->li_filename[MAXPATHLEN-1] = '\0'; + klip->li_algorithm[MAXALGLEN-1] = '\0'; + klip->li_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; + klip->li_iv_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; + + if (klip->li_minor > L_MAXMIN32) { + error = EINVAL; + goto err; } - /* make sure filename is always null-terminated */ - klip->li_filename[MAXPATHLEN-1] = '\0'; + return (0); - /* validate minor number */ - if (klip->li_minor > lofi_max_files) { - free_lofi_ioctl(klip); - cmn_err(CE_WARN, "attempt to map more than lofi_max_files (%d)", - lofi_max_files); - return (NULL); - } - return (klip); +err: + free_lofi_ioctl(klip); + return (error); } int @@ -1547,45 +1606,76 @@ return (0); } -/* - * Return the minor number 'filename' is mapped to, if it is. - */ static int -file_to_minor(char *filename) +lofi_access(struct lofi_state *lsp) { - minor_t minor; - struct lofi_state *lsp; - - ASSERT(mutex_owned(&lofi_lock)); - for (minor = 1; minor <= lofi_max_files; minor++) { - lsp = ddi_get_soft_state(lofi_statep, minor); - if (lsp == NULL) - continue; - if (strcmp(lsp->ls_filename, filename) == 0) - return (minor); - } - return (0); + ASSERT(MUTEX_HELD(&lofi_lock)); + if (INGLOBALZONE(curproc) || lsp->ls_zone == curproc->p_zone) + return (0); + return (EPERM); } /* - * lofiadm does some validation, but since Joe Random (or crashme) could - * do our ioctls, we need to do some validation too. + * Find the lofi state for the given filename. We compare by vnode to + * allow the global zone visibility into NGZ lofi nodes. */ static int -valid_filename(const char *filename) +file_to_lofi_nocheck(char *filename, struct lofi_state **lspp) { - static char *blkprefix = "/dev/" LOFI_BLOCK_NAME "/"; - static char *charprefix = "/dev/" LOFI_CHAR_NAME "/"; + struct lofi_state *lsp; + vnode_t *vp = NULL; + int err = 0; + + ASSERT(MUTEX_HELD(&lofi_lock)); + + if ((err = lookupname(filename, UIO_SYSSPACE, FOLLOW, + NULLVPP, &vp)) != 0) + goto out; + + if (vp->v_type == VREG) { + vnode_t *realvp; + if (VOP_REALVP(vp, &realvp, NULL) == 0) { + VN_HOLD(realvp); + VN_RELE(vp); + vp = realvp; + } + } - /* must be absolute path */ - if (filename[0] != '/') - return (0); - /* must not be lofi */ - if (strncmp(filename, blkprefix, strlen(blkprefix)) == 0) - return (0); - if (strncmp(filename, charprefix, strlen(charprefix)) == 0) - return (0); - return (1); + for (lsp = list_head(&lofi_list); lsp != NULL; + lsp = list_next(&lofi_list, lsp)) { + if (lsp->ls_vp == vp) { + if (lspp != NULL) + *lspp = lsp; + goto out; + } + } + + err = ENOENT; + +out: + if (vp != NULL) + VN_RELE(vp); + return (err); +} + +/* + * Find the minor for the given filename, checking the zone can access + * it. + */ +static int +file_to_lofi(char *filename, struct lofi_state **lspp) +{ + int err = 0; + + ASSERT(MUTEX_HELD(&lofi_lock)); + + if ((err = file_to_lofi_nocheck(filename, lspp)) != 0) + return (err); + + if ((err = lofi_access(*lspp)) != 0) + return (err); + + return (0); } /* @@ -1790,342 +1880,89 @@ BE_64(lsp->ls_comp_seg_index[i]); } - /* - * Finally setup per-thread pre-allocated buffers - */ - lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads * - sizeof (struct compbuf), KM_SLEEP); - mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL); - return (error); } -/* - * Check to see if the passed in signature is a valid - * one. If it is valid, return the index into - * lofi_compress_table. - * - * Return -1 if it is invalid - */ -static int lofi_compress_select(char *signature) +static int +lofi_init_crypto(struct lofi_state *lsp, struct lofi_ioctl *klip) { + struct crypto_meta chead; + char buf[DEV_BSIZE]; + ssize_t resid; + char *marker; + int error; + int ret; int i; - for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) { - if (strcmp(lofi_compress_table[i].l_name, signature) == 0) - return (i); - } + if (!klip->li_crypto_enabled) + return (0); - return (-1); -} + /* + * All current algorithms have a max of 448 bits. + */ + if (klip->li_iv_len > CRYPTO_BITS2BYTES(512)) + return (EINVAL); -/* - * map a file to a minor number. Return the minor number. - */ -static int -lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, - int *rvalp, struct cred *credp, int ioctl_flag) -{ - minor_t newminor; - struct lofi_state *lsp; - struct lofi_ioctl *klip; - int error; - struct vnode *vp; - int64_t Nblocks_prop_val; - int64_t Size_prop_val; - int compress_index; - vattr_t vattr; - int flag; - enum vtype v_type; - int zalloced = 0; - dev_t newdev; - char namebuf[50]; - char buf[DEV_BSIZE]; - char crybuf[DEV_BSIZE]; - ssize_t resid; - boolean_t need_vn_close = B_FALSE; - boolean_t keycopied = B_FALSE; - boolean_t need_size_update = B_FALSE; + if (CRYPTO_BITS2BYTES(klip->li_key_len) > sizeof (klip->li_key)) + return (EINVAL); + + lsp->ls_crypto_enabled = klip->li_crypto_enabled; - klip = copy_in_lofi_ioctl(ulip, ioctl_flag); - if (klip == NULL) - return (EFAULT); - - mutex_enter(&lofi_lock); - - if (!valid_filename(klip->li_filename)) { - error = EINVAL; - goto out; - } - - if (file_to_minor(klip->li_filename) != 0) { - error = EBUSY; - goto out; - } + mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); - if (pickminor) { - /* Find a free one */ - for (newminor = 1; newminor <= lofi_max_files; newminor++) - if (ddi_get_soft_state(lofi_statep, newminor) == NULL) - break; - if (newminor >= lofi_max_files) { - error = EAGAIN; - goto out; - } - } else { - newminor = klip->li_minor; - if (ddi_get_soft_state(lofi_statep, newminor) != NULL) { - error = EEXIST; - goto out; - } + lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); + if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { + cmn_err(CE_WARN, "invalid cipher %s requested for %s", + klip->li_cipher, klip->li_filename); + return (EINVAL); } - /* make sure it's valid */ - error = lookupname(klip->li_filename, UIO_SYSSPACE, FOLLOW, - NULLVPP, &vp); - if (error) { - goto out; - } - v_type = vp->v_type; - VN_RELE(vp); - if (!V_ISLOFIABLE(v_type)) { - error = EINVAL; - goto out; - } - flag = FREAD | FWRITE | FOFFMAX | FEXCL; - error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); - if (error) { - /* try read-only */ - flag &= ~FWRITE; - error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, - &vp, 0, 0); - if (error) { - goto out; - } - } - need_vn_close = B_TRUE; + /* this is just initialization here */ + lsp->ls_mech.cm_param = NULL; + lsp->ls_mech.cm_param_len = 0; - vattr.va_mask = AT_SIZE; - error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); - if (error) { - goto out; - } - /* the file needs to be a multiple of the block size */ - if ((vattr.va_size % DEV_BSIZE) != 0) { - error = EINVAL; - goto out; - } - newdev = makedevice(getmajor(dev), newminor); - Size_prop_val = vattr.va_size; - if ((ddi_prop_update_int64(newdev, lofi_dip, - SIZE_PROP_NAME, Size_prop_val)) != DDI_PROP_SUCCESS) { - error = EINVAL; - goto out; - } - Nblocks_prop_val = vattr.va_size / DEV_BSIZE; - if ((ddi_prop_update_int64(newdev, lofi_dip, - NBLOCKS_PROP_NAME, Nblocks_prop_val)) != DDI_PROP_SUCCESS) { - error = EINVAL; - goto propout; + lsp->ls_iv_type = klip->li_iv_type; + lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); + if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { + cmn_err(CE_WARN, "invalid iv cipher %s requested" + " for %s", klip->li_iv_cipher, klip->li_filename); + return (EINVAL); } - error = ddi_soft_state_zalloc(lofi_statep, newminor); - if (error == DDI_FAILURE) { - error = ENOMEM; - goto propout; - } - zalloced = 1; - (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); - error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, newminor, - DDI_PSEUDO, NULL); - if (error != DDI_SUCCESS) { - error = ENXIO; - goto propout; - } - (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", newminor); - error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, newminor, - DDI_PSEUDO, NULL); - if (error != DDI_SUCCESS) { - /* remove block node */ - (void) snprintf(namebuf, sizeof (namebuf), "%d", newminor); - ddi_remove_minor_node(lofi_dip, namebuf); - error = ENXIO; - goto propout; - } - lsp = ddi_get_soft_state(lofi_statep, newminor); - lsp->ls_filename_sz = strlen(klip->li_filename) + 1; - lsp->ls_filename = kmem_alloc(lsp->ls_filename_sz, KM_SLEEP); - (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", - LOFI_DRIVER_NAME, newminor); - lsp->ls_taskq = taskq_create(namebuf, lofi_taskq_nthreads, - minclsyspri, 1, lofi_taskq_maxalloc, 0); - lsp->ls_kstat = kstat_create(LOFI_DRIVER_NAME, newminor, - NULL, "disk", KSTAT_TYPE_IO, 1, 0); - if (lsp->ls_kstat) { - mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); - lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; - kstat_install(lsp->ls_kstat); - } - cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); - mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); - list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache), - offsetof(struct lofi_comp_cache, lc_list)); - mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL); - - /* - * save open mode so file can be closed properly and vnode counts - * updated correctly. - */ - lsp->ls_openflag = flag; + /* iv mech must itself take a null iv */ + lsp->ls_iv_mech.cm_param = NULL; + lsp->ls_iv_mech.cm_param_len = 0; + lsp->ls_iv_len = klip->li_iv_len; /* - * Try to handle stacked lofs vnodes. + * Create ctx using li_cipher & the raw li_key after checking + * that it isn't a weak key. */ - if (vp->v_type == VREG) { - if (VOP_REALVP(vp, &lsp->ls_vp, NULL) != 0) { - lsp->ls_vp = vp; - } else { - /* - * Even though vp was obtained via vn_open(), we - * can't call vn_close() on it, since lofs will - * pass the VOP_CLOSE() on down to the realvp - * (which we are about to use). Hence we merely - * drop the reference to the lofs vnode and hold - * the realvp so things behave as if we've - * opened the realvp without any interaction - * with lofs. - */ - VN_HOLD(lsp->ls_vp); - VN_RELE(vp); - } - } else { - lsp->ls_vp = vp; + lsp->ls_key.ck_format = CRYPTO_KEY_RAW; + lsp->ls_key.ck_length = klip->li_key_len; + lsp->ls_key.ck_data = kmem_alloc( + CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); + bcopy(klip->li_key, lsp->ls_key.ck_data, + CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); + + ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); + if (ret != CRYPTO_SUCCESS) { + cmn_err(CE_WARN, "weak key check failed for cipher " + "%s on file %s (0x%x)", klip->li_cipher, + klip->li_filename, ret); + return (EINVAL); } - lsp->ls_vp_size = vattr.va_size; - (void) strcpy(lsp->ls_filename, klip->li_filename); - if (rvalp) - *rvalp = (int)newminor; - klip->li_minor = newminor; + + error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, + CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + if (error != 0) + return (error); /* - * Initialize crypto details for encrypted lofi + * This is the case where the header in the lofi image is already + * initialized to indicate it is encrypted. */ - if (klip->li_crypto_enabled) { - int ret; - - mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); - - lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); - if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { - cmn_err(CE_WARN, "invalid cipher %s requested for %s", - klip->li_cipher, lsp->ls_filename); - error = EINVAL; - goto propout; - } - - /* this is just initialization here */ - lsp->ls_mech.cm_param = NULL; - lsp->ls_mech.cm_param_len = 0; - - lsp->ls_iv_type = klip->li_iv_type; - lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); - if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { - cmn_err(CE_WARN, "invalid iv cipher %s requested" - " for %s", klip->li_iv_cipher, lsp->ls_filename); - error = EINVAL; - goto propout; - } - - /* iv mech must itself take a null iv */ - lsp->ls_iv_mech.cm_param = NULL; - lsp->ls_iv_mech.cm_param_len = 0; - lsp->ls_iv_len = klip->li_iv_len; - - /* - * Create ctx using li_cipher & the raw li_key after checking - * that it isn't a weak key. - */ - lsp->ls_key.ck_format = CRYPTO_KEY_RAW; - lsp->ls_key.ck_length = klip->li_key_len; - lsp->ls_key.ck_data = kmem_alloc( - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); - bcopy(klip->li_key, lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - keycopied = B_TRUE; - - ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); - if (ret != CRYPTO_SUCCESS) { - error = EINVAL; - cmn_err(CE_WARN, "weak key check failed for cipher " - "%s on file %s (0x%x)", klip->li_cipher, - lsp->ls_filename, ret); - goto propout; - } - } - lsp->ls_crypto_enabled = klip->li_crypto_enabled; - - /* - * Read the file signature to check if it is compressed or encrypted. - * Crypto signature is in a different location; both areas should - * read to keep compression and encryption mutually exclusive. - */ - if (lsp->ls_crypto_enabled) { - error = vn_rdwr(UIO_READ, lsp->ls_vp, crybuf, DEV_BSIZE, - CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); - if (error != 0) - goto propout; - } - error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, - 0, RLIM64_INFINITY, kcred, &resid); - if (error != 0) - goto propout; - - /* initialize these variables for all lofi files */ - lsp->ls_comp_bufs = NULL; - lsp->ls_uncomp_seg_sz = 0; - lsp->ls_vp_comp_size = lsp->ls_vp_size; - lsp->ls_comp_algorithm[0] = '\0'; - - /* encrypted lofi reads/writes shifted by crypto metadata size */ - lsp->ls_crypto_offset = 0; - - /* this is a compressed lofi */ - if ((compress_index = lofi_compress_select(buf)) != -1) { - - /* compression and encryption are mutually exclusive */ - if (klip->li_crypto_enabled) { - error = ENOTSUP; - goto propout; - } - - /* initialize compression info for compressed lofi */ - lsp->ls_comp_algorithm_index = compress_index; - (void) strlcpy(lsp->ls_comp_algorithm, - lofi_compress_table[compress_index].l_name, - sizeof (lsp->ls_comp_algorithm)); - - error = lofi_map_compressed_file(lsp, buf); - if (error != 0) - goto propout; - need_size_update = B_TRUE; - - /* this is an encrypted lofi */ - } else if (strncmp(crybuf, lofi_crypto_magic, - sizeof (lofi_crypto_magic)) == 0) { - - char *marker = crybuf; - - /* - * This is the case where the header in the lofi image is - * already initialized to indicate it is encrypted. - * There is another case (see below) where encryption is - * requested but the lofi image has never been used yet, - * so the header needs to be written with encryption magic. - */ - - /* indicate this must be an encrypted lofi due to magic */ - klip->li_crypto_enabled = B_TRUE; - + if (strncmp(buf, lofi_crypto_magic, sizeof (lofi_crypto_magic)) == 0) { /* * The encryption header information is laid out this way: * 6 bytes: hex "CFLOFI" @@ -2135,6 +1972,8 @@ * more... not implemented yet */ + marker = buf; + /* copy the magic */ bcopy(marker, lsp->ls_crypto.magic, sizeof (lsp->ls_crypto.magic)); @@ -2160,106 +1999,326 @@ /* and ignore the rest until it is implemented */ lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; - need_size_update = B_TRUE; - - /* neither compressed nor encrypted, BUT could be new encrypted lofi */ - } else if (klip->li_crypto_enabled) { - - /* - * This is the case where encryption was requested but the - * appears to be entirely blank where the encryption header - * would have been in the lofi image. If it is blank, - * assume it is a brand new lofi image and initialize the - * header area with encryption magic and current version - * header data. If it is not blank, that's an error. - */ - int i; - char *marker; - struct crypto_meta chead; - - for (i = 0; i < sizeof (struct crypto_meta); i++) - if (crybuf[i] != '\0') - break; - if (i != sizeof (struct crypto_meta)) { - error = EINVAL; - goto propout; - } - - /* nothing there, initialize as encrypted lofi */ - marker = crybuf; - bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); - marker += sizeof (lofi_crypto_magic); - chead.version = htons(LOFI_CRYPTO_VERSION); - bcopy(&(chead.version), marker, sizeof (chead.version)); - marker += sizeof (chead.version); - marker += sizeof (chead.reserved1); - chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); - bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); - - /* write the header */ - error = vn_rdwr(UIO_WRITE, lsp->ls_vp, crybuf, DEV_BSIZE, - CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); - if (error != 0) - goto propout; - - /* fix things up so it looks like we read this info */ - bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, - sizeof (lofi_crypto_magic)); - lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; - lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; - - lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; - need_size_update = B_TRUE; + return (0); } /* - * Either lsp->ls_vp_size or lsp->ls_crypto_offset changed; - * for encrypted lofi, advertise that it is somewhat shorter - * due to embedded crypto metadata section + * We've requested encryption, but no magic was found, so it must be + * a new image. */ - if (need_size_update) { - /* update DDI properties */ - Size_prop_val = lsp->ls_vp_size - lsp->ls_crypto_offset; - if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME, - Size_prop_val)) != DDI_PROP_SUCCESS) { - error = EINVAL; - goto propout; + + for (i = 0; i < sizeof (struct crypto_meta); i++) { + if (buf[i] != '\0') + return (EINVAL); + } + + marker = buf; + bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); + marker += sizeof (lofi_crypto_magic); + chead.version = htons(LOFI_CRYPTO_VERSION); + bcopy(&(chead.version), marker, sizeof (chead.version)); + marker += sizeof (chead.version); + marker += sizeof (chead.reserved1); + chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); + bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); + + /* write the header */ + error = vn_rdwr(UIO_WRITE, lsp->ls_vp, buf, DEV_BSIZE, + CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); + if (error != 0) + return (error); + + /* fix things up so it looks like we read this info */ + bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, + sizeof (lofi_crypto_magic)); + lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; + lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; + lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; + return (0); +} + +/* + * Check to see if the passed in signature is a valid one. If it is + * valid, return the index into lofi_compress_table. + * + * Return -1 if it is invalid + */ +static int +lofi_compress_select(const char *signature) +{ + int i; + + for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) { + if (strcmp(lofi_compress_table[i].l_name, signature) == 0) + return (i); + } + + return (-1); +} + +static int +lofi_init_compress(struct lofi_state *lsp) +{ + char buf[DEV_BSIZE]; + int compress_index; + ssize_t resid; + int error; + + error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, + 0, RLIM64_INFINITY, kcred, &resid); + + if (error != 0) + return (error); + + if ((compress_index = lofi_compress_select(buf)) == -1) + return (0); + + /* compression and encryption are mutually exclusive */ + if (lsp->ls_crypto_enabled) + return (ENOTSUP); + + /* initialize compression info for compressed lofi */ + lsp->ls_comp_algorithm_index = compress_index; + (void) strlcpy(lsp->ls_comp_algorithm, + lofi_compress_table[compress_index].l_name, + sizeof (lsp->ls_comp_algorithm)); + + /* Finally setup per-thread pre-allocated buffers */ + lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads * + sizeof (struct compbuf), KM_SLEEP); + + return (lofi_map_compressed_file(lsp, buf)); +} + +/* + * map a file to a minor number. Return the minor number. + */ +static int +lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, + int *rvalp, struct cred *credp, int ioctl_flag) +{ + minor_t minor = (minor_t)-1; + struct lofi_state *lsp = NULL; + struct lofi_ioctl *klip; + int error; + struct vnode *vp = NULL; + vattr_t vattr; + int flag; + dev_t newdev; + char namebuf[50]; + + error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); + if (error != 0) + return (error); + + mutex_enter(&lofi_lock); + + mutex_enter(&curproc->p_lock); + if ((error = rctl_incr_lofi(curproc, curproc->p_zone, 1)) != 0) { + mutex_exit(&curproc->p_lock); + mutex_exit(&lofi_lock); + free_lofi_ioctl(klip); + return (error); + } + mutex_exit(&curproc->p_lock); + + if (file_to_lofi_nocheck(klip->li_filename, NULL) == 0) { + error = EBUSY; + goto err; + } + + if (pickminor) { + minor = (minor_t)id_allocff_nosleep(lofi_minor_id); + if (minor == (minor_t)-1) { + error = EAGAIN; + goto err; } - Nblocks_prop_val = - (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE; - if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME, - Nblocks_prop_val)) != DDI_PROP_SUCCESS) { - error = EINVAL; - goto propout; + } else { + if (ddi_get_soft_state(lofi_statep, klip->li_minor) != NULL) { + error = EEXIST; + goto err; + } + + minor = (minor_t) + id_alloc_specific_nosleep(lofi_minor_id, klip->li_minor); + ASSERT(minor != (minor_t)-1); + } + + flag = FREAD | FWRITE | FOFFMAX | FEXCL; + error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); + if (error) { + /* try read-only */ + flag &= ~FWRITE; + error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, + &vp, 0, 0); + if (error) + goto err; + } + + if (!V_ISLOFIABLE(vp->v_type)) { + error = EINVAL; + goto err; + } + + vattr.va_mask = AT_SIZE; + error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); + if (error) + goto err; + + /* the file needs to be a multiple of the block size */ + if ((vattr.va_size % DEV_BSIZE) != 0) { + error = EINVAL; + goto err; + } + + /* lsp alloc+init */ + + error = ddi_soft_state_zalloc(lofi_statep, minor); + if (error == DDI_FAILURE) { + error = ENOMEM; + goto err; + } + + lsp = ddi_get_soft_state(lofi_statep, minor); + list_insert_tail(&lofi_list, lsp); + + newdev = makedevice(getmajor(dev), minor); + lsp->ls_dev = newdev; + lsp->ls_zone = zone_find_by_id(getzoneid()); + ASSERT(lsp->ls_zone != NULL); + lsp->ls_uncomp_seg_sz = 0; + lsp->ls_comp_algorithm[0] = '\0'; + lsp->ls_crypto_offset = 0; + + cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); + mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); + + (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", + LOFI_DRIVER_NAME, minor); + lsp->ls_taskq = taskq_create_proc(namebuf, lofi_taskq_nthreads, + minclsyspri, 1, lofi_taskq_maxalloc, curzone->zone_zsched, 0); + + list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache), + offsetof(struct lofi_comp_cache, lc_list)); + + /* + * save open mode so file can be closed properly and vnode counts + * updated correctly. + */ + lsp->ls_openflag = flag; + + lsp->ls_vp = vp; + lsp->ls_stacked_vp = vp; + /* + * Try to handle stacked lofs vnodes. + */ + if (vp->v_type == VREG) { + vnode_t *realvp; + + if (VOP_REALVP(vp, &realvp, NULL) == 0) { + /* + * We need to use the realvp for uniqueness + * checking, but keep the stacked vp for + * LOFI_GET_FILENAME display. + */ + VN_HOLD(realvp); + lsp->ls_vp = realvp; } } + lsp->ls_vp_size = vattr.va_size; + lsp->ls_vp_comp_size = lsp->ls_vp_size; + + lsp->ls_kstat = kstat_create_zone(LOFI_DRIVER_NAME, minor, + NULL, "disk", KSTAT_TYPE_IO, 1, 0, getzoneid()); + + if (lsp->ls_kstat == NULL) { + error = ENOMEM; + goto err; + } + + lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; + kstat_zone_add(lsp->ls_kstat, GLOBAL_ZONEID); + + if ((error = lofi_init_crypto(lsp, klip)) != 0) + goto err; + + if ((error = lofi_init_compress(lsp)) != 0) + goto err; + fake_disk_geometry(lsp); + + /* create minor nodes */ + + (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); + error = ddi_create_minor_node(lofi_dip, namebuf, S_IFBLK, minor, + DDI_PSEUDO, NULL); + if (error != DDI_SUCCESS) { + error = ENXIO; + goto err; + } + + (void) snprintf(namebuf, sizeof (namebuf), "%d,raw", minor); + error = ddi_create_minor_node(lofi_dip, namebuf, S_IFCHR, minor, + DDI_PSEUDO, NULL); + if (error != DDI_SUCCESS) { + /* remove block node */ + (void) snprintf(namebuf, sizeof (namebuf), "%d", minor); + ddi_remove_minor_node(lofi_dip, namebuf); + error = ENXIO; + goto err; + } + + /* create DDI properties */ + + if ((ddi_prop_update_int64(newdev, lofi_dip, SIZE_PROP_NAME, + lsp->ls_vp_size - lsp->ls_crypto_offset)) != DDI_PROP_SUCCESS) { + error = EINVAL; + goto nodeerr; + } + + if ((ddi_prop_update_int64(newdev, lofi_dip, NBLOCKS_PROP_NAME, + (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE)) + != DDI_PROP_SUCCESS) { + error = EINVAL; + goto nodeerr; + } + + if (ddi_prop_update_string(newdev, lofi_dip, ZONE_PROP_NAME, + (char *)curproc->p_zone->zone_name) != DDI_PROP_SUCCESS) { + error = EINVAL; + goto nodeerr; + } + + kstat_install(lsp->ls_kstat); + mutex_exit(&lofi_lock); + + if (rvalp) + *rvalp = (int)minor; + klip->li_minor = minor; (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); free_lofi_ioctl(klip); return (0); -propout: - if (keycopied) { - bzero(lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - kmem_free(lsp->ls_key.ck_data, - CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); - lsp->ls_key.ck_data = NULL; - lsp->ls_key.ck_length = 0; - } +nodeerr: + lofi_free_dev(newdev); +err: + if (lsp != NULL) { + lofi_destroy(lsp, credp); + } else { + if (vp != NULL) { + (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); + VN_RELE(vp); + } - if (zalloced) - ddi_soft_state_free(lofi_statep, newminor); + if (minor != (minor_t)-1) + id_free(lofi_minor_id, minor); - (void) ddi_prop_remove(newdev, lofi_dip, SIZE_PROP_NAME); - (void) ddi_prop_remove(newdev, lofi_dip, NBLOCKS_PROP_NAME); - -out: - if (need_vn_close) { - (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); - VN_RELE(vp); + rctl_decr_lofi(curproc->p_zone, 1); } mutex_exit(&lofi_lock); @@ -2276,29 +2335,33 @@ { struct lofi_state *lsp; struct lofi_ioctl *klip; - minor_t minor; + int err; - klip = copy_in_lofi_ioctl(ulip, ioctl_flag); - if (klip == NULL) - return (EFAULT); + err = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); + if (err != 0) + return (err); mutex_enter(&lofi_lock); if (byfilename) { - minor = file_to_minor(klip->li_filename); + if ((err = file_to_lofi(klip->li_filename, &lsp)) != 0) { + mutex_exit(&lofi_lock); + return (err); + } + } else if (klip->li_minor == 0) { + mutex_exit(&lofi_lock); + free_lofi_ioctl(klip); + return (ENXIO); } else { - minor = klip->li_minor; + lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); } - if (minor == 0) { + + if (lsp == NULL || lsp->ls_vp == NULL || lofi_access(lsp) != 0) { mutex_exit(&lofi_lock); free_lofi_ioctl(klip); return (ENXIO); } - lsp = ddi_get_soft_state(lofi_statep, minor); - if (lsp == NULL || lsp->ls_vp == NULL) { - mutex_exit(&lofi_lock); - free_lofi_ioctl(klip); - return (ENXIO); - } + + klip->li_minor = getminor(lsp->ls_dev); /* * If it's still held open, we'll do one of three things: @@ -2331,13 +2394,8 @@ while (lsp->ls_vp_iocount > 0) cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); mutex_exit(&lsp->ls_vp_lock); - lofi_free_handle(dev, minor, lsp, credp); - klip->li_minor = minor; - mutex_exit(&lofi_lock); - (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); - free_lofi_ioctl(klip); - return (0); + goto out; } else if (klip->li_cleanup) { lsp->ls_cleanup = 1; mutex_exit(&lofi_lock); @@ -2350,9 +2408,10 @@ return (EBUSY); } - lofi_free_handle(dev, minor, lsp, credp); +out: + lofi_free_dev(dev); + lofi_destroy(lsp, credp); - klip->li_minor = minor; mutex_exit(&lofi_lock); (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); free_lofi_ioctl(klip); @@ -2368,31 +2427,39 @@ lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, struct cred *credp, int ioctl_flag) { + struct lofi_ioctl *klip; struct lofi_state *lsp; - struct lofi_ioctl *klip; int error; - minor_t minor; - klip = copy_in_lofi_ioctl(ulip, ioctl_flag); - if (klip == NULL) - return (EFAULT); + error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); + if (error != 0) + return (error); switch (which) { case LOFI_GET_FILENAME: - minor = klip->li_minor; - if (minor == 0) { + if (klip->li_minor == 0) { free_lofi_ioctl(klip); return (EINVAL); } mutex_enter(&lofi_lock); - lsp = ddi_get_soft_state(lofi_statep, minor); - if (lsp == NULL) { + lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); + if (lsp == NULL || lofi_access(lsp) != 0) { mutex_exit(&lofi_lock); free_lofi_ioctl(klip); return (ENXIO); } - (void) strcpy(klip->li_filename, lsp->ls_filename); + + /* + * This may fail if, for example, we're trying to look + * up a zoned NFS path from the global zone. + */ + if (vnodetopath(NULL, lsp->ls_stacked_vp, klip->li_filename, + sizeof (klip->li_filename), CRED()) != 0) { + (void) strlcpy(klip->li_filename, "?", + sizeof (klip->li_filename)); + } + (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, sizeof (klip->li_algorithm)); klip->li_crypto_enabled = lsp->ls_crypto_enabled; @@ -2402,35 +2469,29 @@ return (error); case LOFI_GET_MINOR: mutex_enter(&lofi_lock); - klip->li_minor = file_to_minor(klip->li_filename); - /* caller should not depend on klip->li_crypto_enabled here */ + error = file_to_lofi(klip->li_filename, &lsp); + if (error == 0) + klip->li_minor = getminor(lsp->ls_dev); mutex_exit(&lofi_lock); - if (klip->li_minor == 0) { - free_lofi_ioctl(klip); - return (ENOENT); - } - error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); + + if (error == 0) + error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); + free_lofi_ioctl(klip); return (error); case LOFI_CHECK_COMPRESSED: mutex_enter(&lofi_lock); - klip->li_minor = file_to_minor(klip->li_filename); - mutex_exit(&lofi_lock); - if (klip->li_minor == 0) { - free_lofi_ioctl(klip); - return (ENOENT); - } - mutex_enter(&lofi_lock); - lsp = ddi_get_soft_state(lofi_statep, klip->li_minor); - if (lsp == NULL) { + error = file_to_lofi(klip->li_filename, &lsp); + if (error != 0) { mutex_exit(&lofi_lock); free_lofi_ioctl(klip); - return (ENXIO); + return (error); } - ASSERT(strcmp(klip->li_filename, lsp->ls_filename) == 0); + klip->li_minor = getminor(lsp->ls_dev); (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, sizeof (klip->li_algorithm)); + mutex_exit(&lofi_lock); error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); free_lofi_ioctl(klip); @@ -2439,7 +2500,6 @@ free_lofi_ioctl(klip); return (EINVAL); } - } static int @@ -2484,17 +2544,41 @@ case LOFI_GET_MINOR: return (lofi_get_info(dev, lip, LOFI_GET_MINOR, credp, flag)); + + /* + * This API made limited sense when this value was fixed + * at LOFI_MAX_FILES. However, its use to iterate + * across all possible devices in lofiadm means we don't + * want to return L_MAXMIN32, but the highest + * *allocated* minor. + */ case LOFI_GET_MAXMINOR: - error = ddi_copyout(&lofi_max_files, &lip->li_minor, - sizeof (lofi_max_files), flag); + minor = 0; + + mutex_enter(&lofi_lock); + + for (lsp = list_head(&lofi_list); lsp != NULL; + lsp = list_next(&lofi_list, lsp)) { + if (lofi_access(lsp) != 0) + continue; + + if (getminor(lsp->ls_dev) > minor) + minor = getminor(lsp->ls_dev); + } + + mutex_exit(&lofi_lock); + + error = ddi_copyout(&minor, &lip->li_minor, + sizeof (minor), flag); if (error) return (EFAULT); return (0); + case LOFI_CHECK_COMPRESSED: return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED, credp, flag)); default: - break; + return (EINVAL); } } @@ -2644,16 +2728,21 @@ { int error; + list_create(&lofi_list, sizeof (struct lofi_state), + offsetof(struct lofi_state, ls_list)); + error = ddi_soft_state_init(&lofi_statep, sizeof (struct lofi_state), 0); if (error) return (error); mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); + error = mod_install(&modlinkage); if (error) { mutex_destroy(&lofi_lock); ddi_soft_state_fini(&lofi_statep); + list_destroy(&lofi_list); } return (error); @@ -2664,8 +2753,14 @@ { int error; - if (lofi_busy()) + mutex_enter(&lofi_lock); + + if (!list_is_empty(&lofi_list)) { + mutex_exit(&lofi_lock); return (EBUSY); + } + + mutex_exit(&lofi_lock); error = mod_remove(&modlinkage); if (error) @@ -2673,6 +2768,7 @@ mutex_destroy(&lofi_lock); ddi_soft_state_fini(&lofi_statep); + list_destroy(&lofi_list); return (error); }
--- a/usr/src/uts/common/os/id_space.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/os/id_space.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -47,7 +46,7 @@ * As an ID space is designed for representing a range of id_t's, there * is a preexisting maximal range: [0, MAXUID]. ID space requests outside * that range will fail on a DEBUG kernel. The id_allocff*() functions - * return the first available id, and should be used when there is benifit + * return the first available id, and should be used when there is benefit * to having a compact allocated range. * * (Presently, the id_space_t abstraction supports only direct allocations; ID @@ -56,6 +55,9 @@ * arrives.) */ +#define ID_TO_ADDR(id) ((void *)(uintptr_t)(id + 1)) +#define ADDR_TO_ID(addr) ((id_t)((uintptr_t)addr - 1)) + /* * Create an arena to represent the range [low, high). * Caller must be in a context in which VM_SLEEP is legal. @@ -66,7 +68,7 @@ ASSERT(low >= 0); ASSERT(low < high); - return (vmem_create(name, (void *)(uintptr_t)(low + 1), high - low, 1, + return (vmem_create(name, ID_TO_ADDR(low), high - low, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER)); } @@ -83,8 +85,7 @@ void id_space_extend(id_space_t *isp, id_t low, id_t high) { - (void) vmem_add(isp, - (void *)(uintptr_t)(low + 1), high - low, VM_SLEEP); + (void) vmem_add(isp, ID_TO_ADDR(low), high - low, VM_SLEEP); } /* @@ -94,8 +95,7 @@ id_t id_alloc(id_space_t *isp) { - return ((id_t)(uintptr_t) - vmem_alloc(isp, 1, VM_SLEEP | VM_NEXTFIT) - 1); + return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_SLEEP | VM_NEXTFIT))); } /* @@ -106,8 +106,7 @@ id_t id_alloc_nosleep(id_space_t *isp) { - return ((id_t)(uintptr_t) - vmem_alloc(isp, 1, VM_NOSLEEP | VM_NEXTFIT) - 1); + return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_NOSLEEP | VM_NEXTFIT))); } /* @@ -117,8 +116,7 @@ id_t id_allocff(id_space_t *isp) { - return ((id_t)(uintptr_t) - vmem_alloc(isp, 1, VM_SLEEP | VM_FIRSTFIT) - 1); + return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_SLEEP | VM_FIRSTFIT))); } /* @@ -129,8 +127,25 @@ id_t id_allocff_nosleep(id_space_t *isp) { - return ((id_t)(uintptr_t) - vmem_alloc(isp, 1, VM_NOSLEEP | VM_FIRSTFIT) - 1); + return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_NOSLEEP | VM_FIRSTFIT))); +} + +/* + * Allocate a specific identifier if possible, returning the id if + * successful, or -1 on failure. + */ +id_t +id_alloc_specific_nosleep(id_space_t *isp, id_t id) +{ + void *minaddr = ID_TO_ADDR(id); + void *maxaddr = ID_TO_ADDR(id + 1); + + /* + * Note that even though we're vmem_free()ing this later, it + * should be OK, since there's no quantum cache. + */ + return (ADDR_TO_ID(vmem_xalloc(isp, 1, 1, 0, 0, + minaddr, maxaddr, VM_NOSLEEP))); } /* @@ -140,5 +155,5 @@ void id_free(id_space_t *isp, id_t id) { - vmem_free(isp, (void *)(uintptr_t)(id + 1), 1); + vmem_free(isp, ID_TO_ADDR(id), 1); }
--- a/usr/src/uts/common/os/policy.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/os/policy.c Wed Jun 16 10:02:44 2010 -0700 @@ -755,6 +755,48 @@ } +int +secpolicy_fs_allowed_mount(const char *fsname) +{ + struct vfssw *vswp; + const char *p; + size_t len; + + ASSERT(fsname != NULL); + ASSERT(fsname[0] != '\0'); + + if (INGLOBALZONE(curproc)) + return (0); + + vswp = vfs_getvfssw(fsname); + if (vswp == NULL) + return (ENOENT); + + if ((vswp->vsw_flag & VSW_ZMOUNT) != 0) { + vfs_unrefvfssw(vswp); + return (0); + } + + vfs_unrefvfssw(vswp); + + p = curzone->zone_fs_allowed; + len = strlen(fsname); + + while (p != NULL && *p != '\0') { + if (strncmp(p, fsname, len) == 0) { + char c = *(p + len); + if (c == '\0' || c == ',') + return (0); + } + + /* skip to beyond the next comma */ + if ((p = strchr(p, ',')) != NULL) + p++; + } + + return (EPERM); +} + extern vnode_t *rootvp; extern vfs_t *rootvfs;
--- a/usr/src/uts/common/os/rctl.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/os/rctl.c Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/atomic.h> @@ -3058,6 +3057,64 @@ } /* + * rctl_incr_lofi(proc_t *, zone_t *, size_t) + * + * Overview + * Increments the number of lofi devices for the zone. + * + * Return values + * 0 on success. EAGAIN if increment fails due an rctl value + * on the zone. + * + * Callers context + * p_lock held on specified proc. + */ +int +rctl_incr_lofi(proc_t *proc, zone_t *zone, size_t incr) +{ + rctl_entity_p_t e; + + ASSERT(MUTEX_HELD(&proc->p_lock)); + ASSERT(incr > 0); + + e.rcep_p.zone = zone; + e.rcep_t = RCENTITY_ZONE; + + mutex_enter(&zone->zone_rctl_lock); + + /* Check for overflow */ + if ((zone->zone_max_lofi + incr) < zone->zone_max_lofi) { + mutex_exit(&zone->zone_rctl_lock); + return (EAGAIN); + } + if ((zone->zone_max_lofi + incr) > zone->zone_max_lofi_ctl) { + if (rctl_test_entity(rc_zone_max_lofi, zone->zone_rctls, + proc, &e, incr, 0) & RCT_DENY) { + mutex_exit(&zone->zone_rctl_lock); + return (EAGAIN); + } + } + zone->zone_max_lofi += incr; + mutex_exit(&zone->zone_rctl_lock); + return (0); +} + +/* + * rctl_decr_lofi(zone_t *, size_t) + * + * Overview + * Decrements the number of lofi devices for the zone. + */ +void +rctl_decr_lofi(zone_t *zone, size_t decr) +{ + mutex_enter(&zone->zone_rctl_lock); + ASSERT(zone->zone_max_lofi >= decr); + zone->zone_max_lofi -= decr; + mutex_exit(&zone->zone_rctl_lock); +} + +/* * Create resource kstat */ static kstat_t *
--- a/usr/src/uts/common/os/zone.c Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/os/zone.c Wed Jun 16 10:02:44 2010 -0700 @@ -159,6 +159,8 @@ * related to the zone.max-lwps rctl. * zone_mem_lock: This is a per-zone lock used to protect the fields * related to the zone.max-locked-memory and zone.max-swap rctls. + * zone_rctl_lock: This is a per-zone lock used to protect other rctls, + * currently just max_lofi * zsd_key_lock: This is a global lock protecting the key state for ZSD. * zone_deathrow_lock: This is a global lock protecting the "deathrow" * list (a list of zones in the ZONE_IS_DEAD state). @@ -340,6 +342,7 @@ rctl_hndl_t rc_zone_cpu_shares; rctl_hndl_t rc_zone_locked_mem; rctl_hndl_t rc_zone_max_swap; +rctl_hndl_t rc_zone_max_lofi; rctl_hndl_t rc_zone_cpu_cap; rctl_hndl_t rc_zone_nlwps; rctl_hndl_t rc_zone_shmmax; @@ -1584,6 +1587,57 @@ zone_max_swap_test }; +/*ARGSUSED*/ +static rctl_qty_t +zone_max_lofi_usage(rctl_t *rctl, struct proc *p) +{ + rctl_qty_t q; + zone_t *z = p->p_zone; + + ASSERT(MUTEX_HELD(&p->p_lock)); + mutex_enter(&z->zone_rctl_lock); + q = z->zone_max_lofi; + mutex_exit(&z->zone_rctl_lock); + return (q); +} + +/*ARGSUSED*/ +static int +zone_max_lofi_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, + rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags) +{ + rctl_qty_t q; + zone_t *z; + + z = e->rcep_p.zone; + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(MUTEX_HELD(&z->zone_rctl_lock)); + q = z->zone_max_lofi; + if (q + incr > rcntl->rcv_value) + return (1); + return (0); +} + +/*ARGSUSED*/ +static int +zone_max_lofi_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e, + rctl_qty_t nv) +{ + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(e->rcep_t == RCENTITY_ZONE); + if (e->rcep_p.zone == NULL) + return (0); + e->rcep_p.zone->zone_max_lofi_ctl = nv; + return (0); +} + +static rctl_ops_t zone_max_lofi_ops = { + rcop_no_action, + zone_max_lofi_usage, + zone_max_lofi_set, + zone_max_lofi_test +}; + /* * Helper function to brand the zone with a unique ID. */ @@ -1732,6 +1786,8 @@ zone0.zone_locked_mem_ctl = UINT64_MAX; ASSERT(zone0.zone_max_swap == 0); zone0.zone_max_swap_ctl = UINT64_MAX; + zone0.zone_max_lofi = 0; + zone0.zone_max_lofi_ctl = UINT64_MAX; zone0.zone_shmmax = 0; zone0.zone_ipc.ipcq_shmmni = 0; zone0.zone_ipc.ipcq_semmni = 0; @@ -1740,6 +1796,7 @@ zone0.zone_nodename = utsname.nodename; zone0.zone_domain = srpc_domain; zone0.zone_hostid = HW_INVALID_HOSTID; + zone0.zone_fs_allowed = NULL; zone0.zone_ref = 1; zone0.zone_id = GLOBAL_ZONEID; zone0.zone_status = ZONE_IS_RUNNING; @@ -1902,6 +1959,11 @@ RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX, &zone_max_swap_ops); + rc_zone_max_lofi = rctl_register("zone.max-lofi", + RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | + RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX, + &zone_max_lofi_ops); + /* * Initialize the ``global zone''. */ @@ -2040,9 +2102,11 @@ if (zone->zone_rctls != NULL) rctl_set_free(zone->zone_rctls); if (zone->zone_bootargs != NULL) - kmem_free(zone->zone_bootargs, strlen(zone->zone_bootargs) + 1); + strfree(zone->zone_bootargs); if (zone->zone_initname != NULL) - kmem_free(zone->zone_initname, strlen(zone->zone_initname) + 1); + strfree(zone->zone_initname); + if (zone->zone_fs_allowed != NULL) + strfree(zone->zone_fs_allowed); if (zone->zone_pfexecd != NULL) klpd_freelist(&zone->zone_pfexecd); id_free(zoneid_space, zone->zone_id); @@ -2104,21 +2168,20 @@ static int zone_set_bootargs(zone_t *zone, const char *zone_bootargs) { - char *bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); + char *buf = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); int err = 0; ASSERT(zone != global_zone); - if ((err = copyinstr(zone_bootargs, bootargs, BOOTARGS_MAX, NULL)) != 0) + if ((err = copyinstr(zone_bootargs, buf, BOOTARGS_MAX, NULL)) != 0) goto done; /* EFAULT or ENAMETOOLONG */ if (zone->zone_bootargs != NULL) - kmem_free(zone->zone_bootargs, strlen(zone->zone_bootargs) + 1); - - zone->zone_bootargs = kmem_alloc(strlen(bootargs) + 1, KM_SLEEP); - (void) strcpy(zone->zone_bootargs, bootargs); + strfree(zone->zone_bootargs); + + zone->zone_bootargs = strdup(buf); done: - kmem_free(bootargs, BOOTARGS_MAX); + kmem_free(buf, BOOTARGS_MAX); return (err); } @@ -2164,6 +2227,27 @@ } static int +zone_set_fs_allowed(zone_t *zone, const char *zone_fs_allowed) +{ + char *buf = kmem_zalloc(ZONE_FS_ALLOWED_MAX, KM_SLEEP); + int err = 0; + + ASSERT(zone != global_zone); + if ((err = copyinstr(zone_fs_allowed, buf, + ZONE_FS_ALLOWED_MAX, NULL)) != 0) + goto done; + + if (zone->zone_fs_allowed != NULL) + strfree(zone->zone_fs_allowed); + + zone->zone_fs_allowed = strdup(buf); + +done: + kmem_free(buf, ZONE_FS_ALLOWED_MAX); + return (err); +} + +static int zone_set_initname(zone_t *zone, const char *zone_initname) { char initname[INITNAME_SZ]; @@ -2175,7 +2259,7 @@ return (err); /* EFAULT or ENAMETOOLONG */ if (zone->zone_initname != NULL) - kmem_free(zone->zone_initname, strlen(zone->zone_initname) + 1); + strfree(zone->zone_initname); zone->zone_initname = kmem_alloc(strlen(initname) + 1, KM_SLEEP); (void) strcpy(zone->zone_initname, initname); @@ -3856,6 +3940,7 @@ zone->zone_ipc.ipcq_semmni = 0; zone->zone_ipc.ipcq_msgmni = 0; zone->zone_bootargs = NULL; + zone->zone_fs_allowed = NULL; zone->zone_initname = kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP); (void) strcpy(zone->zone_initname, zone_default_initname); @@ -3865,6 +3950,8 @@ zone->zone_locked_mem_ctl = UINT64_MAX; zone->zone_max_swap = 0; zone->zone_max_swap_ctl = UINT64_MAX; + zone->zone_max_lofi = 0; + zone->zone_max_lofi_ctl = UINT64_MAX; zone0.zone_lockedmem_kstat = NULL; zone0.zone_swapresv_kstat = NULL; @@ -4790,6 +4877,20 @@ error = EINVAL; } break; + case ZONE_ATTR_FS_ALLOWED: + if (zone->zone_fs_allowed == NULL) + outstr = ""; + else + outstr = zone->zone_fs_allowed; + size = strlen(outstr) + 1; + if (bufsize > size) + bufsize = size; + if (buf != NULL) { + err = copyoutstr(outstr, buf, bufsize, NULL); + if (err != 0 && err != ENAMETOOLONG) + error = EFAULT; + } + break; default: if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) { size = bufsize; @@ -4853,6 +4954,9 @@ case ZONE_ATTR_BRAND: err = zone_set_brand(zone, (const char *)buf); break; + case ZONE_ATTR_FS_ALLOWED: + err = zone_set_fs_allowed(zone, (const char *)buf); + break; case ZONE_ATTR_PHYS_MCAP: err = zone_set_phys_mcap(zone, (const uint64_t *)buf); break;
--- a/usr/src/uts/common/sys/fs/sdev_impl.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/sys/fs/sdev_impl.h Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_SDEV_IMPL_H @@ -230,6 +229,7 @@ #define SDEV_ATTR_INVALID 0x0080 /* invalid node attributes, */ /* need update */ #define SDEV_SUBDIR 0x0100 /* match all subdirs under here */ +#define SDEV_ZONED 0x0200 /* zoned subdir */ /* sdev_lookup_flags */ #define SDEV_LOOKUP 0x0001 /* node creation in progress */
--- a/usr/src/uts/common/sys/id_space.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/sys/id_space.h Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _ID_SPACE_H @@ -46,6 +45,7 @@ id_t id_alloc_nosleep(id_space_t *); id_t id_allocff(id_space_t *); id_t id_allocff_nosleep(id_space_t *); +id_t id_alloc_specific_nosleep(id_space_t *, id_t); void id_free(id_space_t *, id_t); #endif /* _KERNEL */
--- a/usr/src/uts/common/sys/lofi.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/sys/lofi.h Wed Jun 16 10:02:44 2010 -0700 @@ -34,6 +34,7 @@ #include <sys/vnode.h> #include <sys/list.h> #include <sys/crypto/api.h> +#include <sys/zone.h> #ifdef __cplusplus extern "C" { @@ -176,15 +177,6 @@ uint64_t lc_index; /* segment index */ }; -/* - * We limit the maximum number of active lofi devices to 128, which seems very - * large. You can tune this by changing lofi_max_files in /etc/system. - * If you change it dynamically, which you probably shouldn't do, make sure - * to only _increase_ it. - */ -#define LOFI_MAX_FILES 128 -extern uint32_t lofi_max_files; - #define V_ISLOFIABLE(vtype) \ ((vtype == VREG) || (vtype == VBLK) || (vtype == VCHR)) @@ -219,9 +211,8 @@ }; struct lofi_state { - char *ls_filename; /* filename to open */ - size_t ls_filename_sz; - struct vnode *ls_vp; /* open vnode */ + vnode_t *ls_vp; /* open real vnode */ + vnode_t *ls_stacked_vp; /* open vnode */ kmutex_t ls_vp_lock; /* protects ls_vp */ kcondvar_t ls_vp_cv; /* signal changes to ls_vp */ uint32_t ls_vp_iocount; /* # pending I/O requests */ @@ -238,6 +229,9 @@ struct dk_geom ls_dkg; struct vtoc ls_vtoc; struct dk_cinfo ls_ci; + zone_t *ls_zone; + list_node_t ls_list; /* all lofis */ + dev_t ls_dev; /* this node's dev_t */ /* the following fields are required for compression support */ int ls_comp_algorithm_index; /* idx into compress_table */
--- a/usr/src/uts/common/sys/policy.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/sys/policy.h Wed Jun 16 10:02:44 2010 -0700 @@ -89,12 +89,13 @@ int secpolicy_dispadm(const cred_t *); int secpolicy_error_inject(const cred_t *); int secpolicy_excl_open(const cred_t *); -int secpolicy_fs_mount(cred_t *, vnode_t *, struct vfs *); -int secpolicy_fs_unmount(cred_t *, struct vfs *); +int secpolicy_fs_allowed_mount(const char *); int secpolicy_fs_config(const cred_t *, const struct vfs *); int secpolicy_fs_linkdir(const cred_t *, const struct vfs *); int secpolicy_fs_minfree(const cred_t *, const struct vfs *); +int secpolicy_fs_mount(cred_t *, vnode_t *, struct vfs *); int secpolicy_fs_quota(const cred_t *, const struct vfs *); +int secpolicy_fs_unmount(cred_t *, struct vfs *); int secpolicy_idmap(const cred_t *); int secpolicy_ip(const cred_t *, int, boolean_t); int secpolicy_ip_config(const cred_t *, boolean_t);
--- a/usr/src/uts/common/sys/rctl.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/sys/rctl.h Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_RCTL_H @@ -337,6 +336,9 @@ int rctl_incr_swap(struct proc *, struct zone *, size_t); void rctl_decr_swap(struct zone *, size_t); +int rctl_incr_lofi(struct proc *, struct zone *, size_t); +void rctl_decr_lofi(struct zone *, size_t); + struct kstat *rctl_kstat_create_zone(struct zone *, char *, uchar_t, uint_t, uchar_t);
--- a/usr/src/uts/common/sys/vfs.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/sys/vfs.h Wed Jun 16 10:02:44 2010 -0700 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -439,6 +438,7 @@ #define VSW_STATS 0x20 /* file system can collect stats */ #define VSW_XID 0x40 /* file system supports extended ids */ #define VSW_CANLOFI 0x80 /* file system supports lofi mounts */ +#define VSW_ZMOUNT 0x100 /* file system always allowed in a zone */ #define VSW_INSTALLED 0x8000 /* this vsw is associated with a file system */
--- a/usr/src/uts/common/sys/zone.h Wed Jun 16 07:19:49 2010 -0700 +++ b/usr/src/uts/common/sys/zone.h Wed Jun 16 10:02:44 2010 -0700 @@ -96,10 +96,13 @@ #define ZONE_ATTR_SCHED_CLASS 13 #define ZONE_ATTR_FLAGS 14 #define ZONE_ATTR_HOSTID 15 +#define ZONE_ATTR_FS_ALLOWED 16 /* Start of the brand-specific attribute namespace */ #define ZONE_ATTR_BRAND_ATTRS 32768 +#define ZONE_FS_ALLOWED_MAX 1024 + #define ZONE_EVENT_CHANNEL "com.sun:zones:status" #define ZONE_EVENT_STATUS_CLASS "status" #define ZONE_EVENT_STATUS_SUBCLASS "change" @@ -379,6 +382,11 @@ rctl_qty_t zone_max_swap_ctl; /* current swap limit. */ /* Protected by */ /* zone_rctls->rcs_lock */ + kmutex_t zone_rctl_lock; /* protects zone_max_lofi */ + rctl_qty_t zone_max_lofi; /* lofi devs for zone */ + rctl_qty_t zone_max_lofi_ctl; /* current lofi limit. */ + /* Protected by */ + /* zone_rctls->rcs_lock */ list_t zone_zsd; /* list of Zone-Specific Data values */ kcondvar_t zone_cv; /* used to signal state changes */ struct proc *zone_zsched; /* Dummy kernel "zsched" process */ @@ -443,6 +451,8 @@ krwlock_t zone_mntfs_db_lock; struct klpd_reg *zone_pfexecd; + + char *zone_fs_allowed; } zone_t; /* @@ -664,6 +674,7 @@ extern rctl_hndl_t rc_zone_locked_mem; extern rctl_hndl_t rc_zone_max_swap; +extern rctl_hndl_t rc_zone_max_lofi; #endif /* _KERNEL */