view usr/src/uts/common/syscall/rctlsys.c @ 3684:a0773f73b68d

PSARC 2006/554 setproject(3PROJECT) defining, and enhancing behaviour 6194864 simultaneous setproject()'s on the same project can fail to set rctl 6449567 setproject(3PROJECT) deletes resource controls set through prctl(1M) 6450539 projmod(1M) does not provide a mechanism to refresh "in-core" enforced resource controls 6491754 project.max-contracts should not allow basic privileges 6491804 task.final project property is not honoured if pools are not enabled
author rd117015
date Tue, 20 Feb 2007 10:39:20 -0800
parents 68f95e015346
children c390df7eb79c
line wrap: on
line source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/types.h>

#include <sys/cmn_err.h>
#include <sys/cred.h>
#include <sys/errno.h>
#include <sys/rctl.h>
#include <sys/rctl_impl.h>
#include <sys/strlog.h>
#include <sys/syslog.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/policy.h>
#include <sys/proc.h>
#include <sys/task.h>

/*
 * setrctl(2), getrctl(2), and private rctlsys(2*) system calls
 *
 * Resource control block (rctlblk_ptr_t, rctl_opaque_t)
 *   The resource control system call interfaces present the resource control
 *   values and flags via the resource control block abstraction, made manifest
 *   via an opaque data type with strict type definitions.  Keeping the formal
 *   definitions in the rcontrol block allows us to be clever in the kernel,
 *   combining attributes where appropriate in the current implementation while
 *   preserving binary compatibility in the face of implementation changes.
 */

#define	RBX_TO_BLK	0x1
#define	RBX_FROM_BLK	0x2
#define	RBX_VAL		0x4
#define	RBX_CTL		0x8

static void
rctlsys_rblk_xfrm(rctl_opaque_t *blk, rctl_dict_entry_t *rde,
    rctl_val_t *val, int flags)
{
	if (flags & RBX_FROM_BLK) {
		if (flags & RBX_VAL) {
			/*
			 * Firing time cannot be set.
			 */
			val->rcv_privilege = blk->rcq_privilege;
			val->rcv_value = blk->rcq_value;
			val->rcv_flagaction = blk->rcq_local_flagaction;
			val->rcv_action_signal = blk->rcq_local_signal;
			val->rcv_action_recip_pid =
			    blk->rcq_local_recipient_pid;
		}
		if (flags & RBX_CTL) {
			rde->rcd_flagaction = blk->rcq_global_flagaction;
			rde->rcd_syslog_level = blk->rcq_global_syslog_level;

			/*
			 * Because the strlog() interface supports fewer options
			 * than are made available via the syslog() interface to
			 * userland, we map the syslog level down to a smaller
			 * set of distinct logging behaviours.
			 */
			rde->rcd_strlog_flags = 0;
			switch (blk->rcq_global_syslog_level) {
				case LOG_EMERG:
				case LOG_ALERT:
				case LOG_CRIT:
					rde->rcd_strlog_flags |= SL_CONSOLE;
					/*FALLTHROUGH*/
				case LOG_ERR:
					rde->rcd_strlog_flags |= SL_ERROR;
					/*FALLTHROUGH*/
				case LOG_WARNING:
					rde->rcd_strlog_flags |= SL_WARN;
					break;
				case LOG_NOTICE:
					rde->rcd_strlog_flags |= SL_CONSOLE;
					/*FALLTHROUGH*/
				case LOG_INFO:	/* informational */
				case LOG_DEBUG:	/* debug-level messages */
				default:
					rde->rcd_strlog_flags |= SL_NOTE;
					break;
			}
		}
	} else {
		bzero(blk,  sizeof (rctl_opaque_t));
		if (flags & RBX_VAL) {
			blk->rcq_privilege = val->rcv_privilege;
			blk->rcq_value = val->rcv_value;
			blk->rcq_enforced_value = rctl_model_value(rde,
			    curproc, val->rcv_value);
			blk->rcq_local_flagaction = val->rcv_flagaction;
			blk->rcq_local_signal = val->rcv_action_signal;
			blk->rcq_firing_time = val->rcv_firing_time;
			blk->rcq_local_recipient_pid =
			    val->rcv_action_recip_pid;
		}
		if (flags & RBX_CTL) {
			blk->rcq_global_flagaction = rde->rcd_flagaction;
			blk->rcq_global_syslog_level = rde->rcd_syslog_level;
		}
	}
}

/*
 * int rctl_invalid_value(rctl_dict_entry_t *, rctl_val_t *)
 *
 * Overview
 *   Perform basic validation of proposed new resource control value against the
 *   global properties set on the control.  Any system call operation presented
 *   with an invalid resource control value should return -1 and set errno to
 *   EINVAL.
 *
 * Return values
 *   0 if valid, 1 if invalid.
 *
 * Caller's context
 *   No restriction on context.
 */
int
rctl_invalid_value(rctl_dict_entry_t *rde, rctl_val_t *rval)
{
	rctl_val_t *sys_rval;

	if (rval->rcv_privilege != RCPRIV_BASIC &&
	    rval->rcv_privilege != RCPRIV_PRIVILEGED &&
	    rval->rcv_privilege != RCPRIV_SYSTEM)
		return (1);

	if (rval->rcv_flagaction & ~RCTL_LOCAL_MASK)
		return (1);

	if (rval->rcv_privilege == RCPRIV_BASIC &&
	    (rde->rcd_flagaction & RCTL_GLOBAL_NOBASIC) != 0)
		return (1);

	if ((rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0 &&
	    (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS) != 0)
		return (1);

	if ((rval->rcv_flagaction & RCTL_LOCAL_DENY) &&
	    (rde->rcd_flagaction & RCTL_GLOBAL_DENY_NEVER))
		return (1);

	if ((rval->rcv_flagaction & RCTL_LOCAL_SIGNAL) &&
	    (rde->rcd_flagaction & RCTL_GLOBAL_SIGNAL_NEVER))
		return (1);

	if ((rval->rcv_flagaction & RCTL_LOCAL_SIGNAL) &&
	    rval->rcv_action_signal == 0)
		return (1);

	if (rval->rcv_action_signal == SIGXCPU &&
	    (rde->rcd_flagaction & RCTL_GLOBAL_CPU_TIME) == 0)
		return (1);
	else if (rval->rcv_action_signal == SIGXFSZ &&
	    (rde->rcd_flagaction & RCTL_GLOBAL_FILE_SIZE) == 0)
		return (1);
	else if (rval->rcv_action_signal != SIGHUP &&
	    rval->rcv_action_signal != SIGABRT &&
	    rval->rcv_action_signal != SIGKILL &&
	    rval->rcv_action_signal != SIGTERM &&
	    rval->rcv_action_signal != SIGSTOP &&
	    rval->rcv_action_signal != SIGXCPU &&
	    rval->rcv_action_signal != SIGXFSZ &&
	    rval->rcv_action_signal != SIGXRES &&
	    rval->rcv_action_signal != 0)	/* That is, no signal is ok. */
		return (1);

	sys_rval = rde->rcd_default_value;
	while (sys_rval->rcv_privilege != RCPRIV_SYSTEM)
		sys_rval = sys_rval->rcv_next;

	if (rval->rcv_value > sys_rval->rcv_value)
		return (1);

	return (0);
}

/*
 * static long rctlsys_get(char *name, rctl_opaque_t *old_rblk,
 *   rctl_opaque_t *new_rblk, int flags)
 *
 * Overview
 *   rctlsys_get() is the implementation of the core logic of getrctl(2), the
 *   public system call for fetching resource control values.  Two mutually
 *   exclusive flag values are supported:  RCTL_FIRST and RCTL_NEXT.  When
 *   RCTL_FIRST is presented, the value of old_rblk is ignored, and the first
 *   value in the resource control value sequence for the named control is
 *   transformed and placed in the user memory location at new_rblk.  In the
 *   RCTL_NEXT case, the value of old_rblk is examined, and the next value in
 *   the sequence is transformed and placed at new_rblk.
 */
static long
rctlsys_get(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk,
    int flags)
{
	rctl_val_t *nval;
	rctl_opaque_t *nblk;
	rctl_hndl_t hndl;
	char *kname;
	size_t klen;
	rctl_dict_entry_t *krde;
	int ret;
	int action = flags & (~RCTLSYS_ACTION_MASK);

	if (flags & (~RCTLSYS_MASK))
		return (set_errno(EINVAL));

	if (action != RCTL_FIRST && action != RCTL_NEXT &&
	    action != RCTL_USAGE)
		return (set_errno(EINVAL));

	if (new_rblk == NULL || name == NULL)
		return (set_errno(EFAULT));

	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
	krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);

	if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
		kmem_free(kname, MAXPATHLEN);
		kmem_free(krde, sizeof (rctl_dict_entry_t));
		return (set_errno(EFAULT));
	}

	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
		kmem_free(kname, MAXPATHLEN);
		kmem_free(krde, sizeof (rctl_dict_entry_t));
		return (set_errno(EINVAL));
	}

	if (rctl_global_get(kname, krde) == -1) {
		kmem_free(kname, MAXPATHLEN);
		kmem_free(krde, sizeof (rctl_dict_entry_t));
		return (set_errno(ESRCH));
	}

	kmem_free(kname, MAXPATHLEN);

	nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);

	if (action == RCTL_USAGE) {
		kmem_cache_free(rctl_val_cache, nval);
		kmem_free(krde, sizeof (rctl_dict_entry_t));
		return (set_errno(ENOTSUP));
	} else if (action == RCTL_FIRST) {

		mutex_enter(&curproc->p_lock);
		if (ret = rctl_local_get(hndl, NULL, nval, curproc)) {
			mutex_exit(&curproc->p_lock);
			kmem_cache_free(rctl_val_cache, nval);
			kmem_free(krde, sizeof (rctl_dict_entry_t));
			return (set_errno(ret));
		}
		mutex_exit(&curproc->p_lock);
	} else {
		/*
		 * RCTL_NEXT
		 */
		rctl_val_t *oval;
		rctl_opaque_t *oblk;

		oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);

		if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) {
			kmem_cache_free(rctl_val_cache, nval);
			kmem_free(oblk, sizeof (rctl_opaque_t));
			kmem_free(krde, sizeof (rctl_dict_entry_t));
			return (set_errno(EFAULT));
		}

		oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);

		rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL);
		mutex_enter(&curproc->p_lock);
		ret = rctl_local_get(hndl, oval, nval, curproc);
		mutex_exit(&curproc->p_lock);

		kmem_cache_free(rctl_val_cache, oval);
		kmem_free(oblk, sizeof (rctl_opaque_t));

		if (ret != 0) {
			kmem_cache_free(rctl_val_cache, nval);
			kmem_free(krde, sizeof (rctl_dict_entry_t));
			return (set_errno(ret));
		}
	}

	nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);

	rctlsys_rblk_xfrm(nblk, krde, nval, RBX_TO_BLK | RBX_VAL | RBX_CTL);

	kmem_free(krde, sizeof (rctl_dict_entry_t));
	kmem_cache_free(rctl_val_cache, nval);

	if (copyout(nblk, new_rblk, sizeof (rctl_opaque_t)) == -1) {
		kmem_free(nblk, sizeof (rctl_opaque_t));
		return (set_errno(EFAULT));
	}

	kmem_free(nblk, sizeof (rctl_opaque_t));

	return (0);
}

/*
 * static long rctlsys_set(char *name, rctl_opaque_t *old_rblk,
 *   rctl_opaque_t *new_rblk, int flags)
 *
 * Overview
 *   rctlsys_set() is the implementation of the core login of setrctl(2), which
 *   allows the establishment of resource control values.  Flags may take on any
 *   of three exclusive values:  RCTL_INSERT, RCTL_DELETE, and RCTL_REPLACE.
 *   RCTL_INSERT ignores old_rblk and inserts the value in the appropriate
 *   position in the ordered sequence of resource control values.  RCTL_DELETE
 *   ignores old_rblk and deletes the first resource control value matching
 *   (value, priority) in the given resource block.  If no matching value is
 *   found, -1 is returned and errno is set to ENOENT.  Finally, in the case of
 *   RCTL_REPLACE, old_rblk is used to match (value, priority); the matching
 *   resource control value in the sequence is replaced with the contents of
 *   new_rblk.  Again, if no match is found, -1 is returned and errno is set to
 *   ENOENT.
 *
 *   rctlsys_set() causes a cursor test, which can reactivate resource controls
 *   that have previously fired.
 */
static long
rctlsys_set(char *name, rctl_opaque_t *old_rblk, rctl_opaque_t *new_rblk,
    int flags)
{
	rctl_val_t *nval;
	rctl_dict_entry_t *rde;
	rctl_opaque_t *nblk;
	rctl_hndl_t hndl;
	char *kname;
	size_t klen;
	long ret = 0;
	proc_t *pp = NULL;
	pid_t pid;
	int action = flags & (~RCTLSYS_ACTION_MASK);
	rctl_val_t *oval;
	rctl_val_t *rval1;
	rctl_val_t *rval2;
	rctl_val_t *tval;
	rctl_opaque_t *oblk;

	if (flags & (~RCTLSYS_MASK))
		return (set_errno(EINVAL));

	if (action != RCTL_INSERT &&
	    action != RCTL_DELETE &&
	    action != RCTL_REPLACE)
		return (set_errno(EINVAL));

	if (new_rblk == NULL || name == NULL)
		return (set_errno(EFAULT));

	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
	if (copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EFAULT));
	}

	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EINVAL));
	}

	kmem_free(kname, MAXPATHLEN);

	rde = rctl_dict_lookup_hndl(hndl);

	nblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);

	if (copyin(new_rblk, nblk, sizeof (rctl_opaque_t)) == -1) {
		kmem_free(nblk, sizeof (rctl_opaque_t));
		return (set_errno(EFAULT));
	}

	nval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);

	rctlsys_rblk_xfrm(nblk, NULL, nval, RBX_FROM_BLK | RBX_VAL);

	if (rctl_invalid_value(rde, nval)) {
		kmem_free(nblk, sizeof (rctl_opaque_t));
		kmem_cache_free(rctl_val_cache, nval);
		return (set_errno(EINVAL));
	}

	/* allocate what we might need before potentially grabbing p_lock */
	oblk = kmem_alloc(sizeof (rctl_opaque_t), KM_SLEEP);
	oval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
	rval1 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
	rval2 = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);

	if (nval->rcv_privilege == RCPRIV_BASIC) {
		if (flags & RCTL_USE_RECIPIENT_PID) {
			pid = nval->rcv_action_recip_pid;

			/* case for manipulating rctl values on other procs */
			if (pid != curproc->p_pid) {
				/* cannot be other pid on process rctls */
				if (rde->rcd_entity == RCENTITY_PROCESS) {
					ret = set_errno(EINVAL);
					goto rctlsys_out;
				}
				/*
				 * must have privilege to manipulate controls
				 * on other processes
				 */
				if (secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
					ret = set_errno(EACCES);
					goto rctlsys_out;
				}

				pid = nval->rcv_action_recip_pid;
				mutex_enter(&pidlock);
				pp = prfind(pid);
				if (!pp) {
					mutex_exit(&pidlock);
					ret = set_errno(ESRCH);
					goto rctlsys_out;
				}

				/*
				 * idle or zombie procs have either not yet
				 * set up their rctls or have already done
				 * their rctl_set_tearoff's.
				 */
				if (pp->p_stat == SZOMB ||
				    pp->p_stat == SIDL) {
					mutex_exit(&pidlock);
					ret = set_errno(ESRCH);
					goto rctlsys_out;
				}

				/*
				 * hold this pp's p_lock to ensure that
				 * it does not do it's rctl_set_tearoff
				 * If we did not do this, we could
				 * potentially add rctls to the entity
				 * with a recipient that is a process
				 * that has exited.
				 */
				mutex_enter(&pp->p_lock);
				mutex_exit(&pidlock);

				/*
				 * We know that curproc's task, project,
				 * and zone pointers will not change
				 * because functions that change them
				 * call holdlwps(SHOLDFORK1) first.
				 */

				/*
				 * verify that the found pp is in the
				 * current task.  If it is, then it
				 * is also within the current project
				 * and zone.
				 */
				if (rde->rcd_entity == RCENTITY_TASK &&
				    pp->p_task != curproc->p_task) {
					ret = set_errno(ESRCH);
					goto rctlsys_out;
				}

				ASSERT(pp->p_task->tk_proj ==
				    curproc->p_task->tk_proj);
				ASSERT(pp->p_zone == curproc->p_zone);


				nval->rcv_action_recipient = pp;
				nval->rcv_action_recip_pid = pid;

			} else {
				/* for manipulating rctl values on this proc */
				mutex_enter(&curproc->p_lock);
				pp = curproc;
				nval->rcv_action_recipient = curproc;
				nval->rcv_action_recip_pid = curproc->p_pid;
			}

		} else {
			/* RCTL_USE_RECIPIENT_PID not set, use this proc */
			mutex_enter(&curproc->p_lock);
			pp = curproc;
			nval->rcv_action_recipient = curproc;
			nval->rcv_action_recip_pid = curproc->p_pid;
		}

	} else {
		/* privileged controls have no recipient pid */
		mutex_enter(&curproc->p_lock);
		pp = curproc;
		nval->rcv_action_recipient = NULL;
		nval->rcv_action_recip_pid = -1;
	}

	nval->rcv_firing_time = 0;

	if (action == RCTL_REPLACE) {

		if (copyin(old_rblk, oblk, sizeof (rctl_opaque_t)) == -1) {
			ret = set_errno(EFAULT);
			goto rctlsys_out;
		}

		rctlsys_rblk_xfrm(oblk, NULL, oval, RBX_FROM_BLK | RBX_VAL);

		if (rctl_invalid_value(rde, oval)) {
			ret = set_errno(EINVAL);
			goto rctlsys_out;
		}

		if (oval->rcv_privilege == RCPRIV_BASIC) {
			if (!(flags & RCTL_USE_RECIPIENT_PID)) {
				oval->rcv_action_recipient = curproc;
				oval->rcv_action_recip_pid = curproc->p_pid;
			}
		} else {
			oval->rcv_action_recipient = NULL;
			oval->rcv_action_recip_pid = -1;
		}

		/*
		 * Find the real value we're attempting to replace on the
		 * sequence, rather than trusting the one delivered from
		 * userland.
		 */
		if (ret = rctl_local_get(hndl, NULL, rval1, pp)) {
			(void) set_errno(ret);
			goto rctlsys_out;
		}

		do {
			if (rval1->rcv_privilege == RCPRIV_SYSTEM ||
			    rctl_val_cmp(oval, rval1, 0) == 0)
				break;

			tval = rval1;
			rval1 = rval2;
			rval2 = tval;
		} while (rctl_local_get(hndl, rval2, rval1, pp) == 0);

		if (rval1->rcv_privilege == RCPRIV_SYSTEM) {
			if (rctl_val_cmp(oval, rval1, 1) == 0)
				ret = set_errno(EPERM);
			else
				ret = set_errno(ESRCH);

			goto rctlsys_out;
		}

		bcopy(rval1, oval, sizeof (rctl_val_t));

		/*
		 * System controls are immutable.
		 */
		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
			ret = set_errno(EPERM);
			goto rctlsys_out;
		}

		/*
		 * Only privileged processes in the global zone can modify
		 * privileged rctls of type RCENTITY_ZONE; replacing privileged
		 * controls with basic ones are not allowed either.  Lowering a
		 * lowerable one might be OK for privileged processes in a
		 * non-global zone, but lowerable rctls probably don't make
		 * sense for zones (hence, not modifiable from within a zone).
		 */
		if (rde->rcd_entity == RCENTITY_ZONE &&
		    (nval->rcv_privilege == RCPRIV_PRIVILEGED ||
		    oval->rcv_privilege == RCPRIV_PRIVILEGED) &&
		    secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
			ret = set_errno(EACCES);
			goto rctlsys_out;
		}

		/*
		 * Must be privileged to replace a privileged control with
		 * a basic one.
		 */
		if (oval->rcv_privilege == RCPRIV_PRIVILEGED &&
		    nval->rcv_privilege != RCPRIV_PRIVILEGED &&
		    secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
			ret = set_errno(EACCES);
			goto rctlsys_out;
		}

		/*
		 * Must have lowerable global property for non-privileged
		 * to lower the value of a privileged control; otherwise must
		 * have sufficient privileges to modify privileged controls
		 * at all.
		 */
		if (oval->rcv_privilege == RCPRIV_PRIVILEGED &&
		    nval->rcv_privilege == RCPRIV_PRIVILEGED &&
		    ((((rde->rcd_flagaction & RCTL_GLOBAL_LOWERABLE) == 0) ||
		    oval->rcv_flagaction != nval->rcv_flagaction ||
		    oval->rcv_action_signal != nval->rcv_action_signal ||
		    oval->rcv_value < nval->rcv_value)) &&
		    secpolicy_rctlsys(CRED(), B_FALSE) != 0) {
			ret = set_errno(EACCES);
			goto rctlsys_out;
		}

		if (ret = rctl_local_replace(hndl, oval, nval, pp)) {
			(void) set_errno(ret);
			goto rctlsys_out;
		}

		/* ensure that nval is not freed */
		nval = NULL;

	} else if (action == RCTL_INSERT) {
		/*
		 * System controls are immutable.
		 */
		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
			ret = set_errno(EPERM);
			goto rctlsys_out;
		}

		/*
		 * Only privileged processes in the global zone may add
		 * privileged zone.* rctls.  Only privileged processes
		 * may add other privileged rctls.
		 */
		if (nval->rcv_privilege == RCPRIV_PRIVILEGED) {
			if ((rde->rcd_entity == RCENTITY_ZONE &&
			    secpolicy_rctlsys(CRED(), B_TRUE) != 0) ||
			    (rde->rcd_entity != RCENTITY_ZONE &&
			    secpolicy_rctlsys(CRED(), B_FALSE) != 0)) {
				ret = set_errno(EACCES);
				goto rctlsys_out;
			}
		}

		/*
		 * Only one basic control is allowed per rctl.
		 * If a basic control is being inserted, delete
		 * any other basic control.
		 */
		if ((nval->rcv_privilege == RCPRIV_BASIC) &&
		    (rctl_local_get(hndl, NULL, rval1, pp) == 0)) {
			do {
				if (rval1->rcv_privilege == RCPRIV_BASIC &&
				    rval1->rcv_action_recipient == curproc) {
					(void) rctl_local_delete(hndl, rval1,
					    pp);
					if (rctl_local_get(hndl, NULL, rval1,
					    pp) != 0)
						break;
				}

				tval = rval1;
				rval1 = rval2;
				rval2 = tval;
			} while (rctl_local_get(hndl, rval2, rval1, pp)
			    == 0);
		}


		if (ret = rctl_local_insert(hndl, nval, pp)) {
			(void) set_errno(ret);
			goto rctlsys_out;
		}

		/* ensure that nval is not freed */
		nval = NULL;

	} else {
		/*
		 * RCTL_DELETE
		 */
		if (nval->rcv_privilege == RCPRIV_SYSTEM) {
			ret = set_errno(EPERM);
			goto rctlsys_out;
		}

		if (nval->rcv_privilege == RCPRIV_PRIVILEGED) {
			if ((rde->rcd_entity == RCENTITY_ZONE &&
			    secpolicy_rctlsys(CRED(), B_TRUE) != 0) ||
			    (rde->rcd_entity != RCENTITY_ZONE &&
			    secpolicy_rctlsys(CRED(), B_FALSE) != 0)) {
				ret = set_errno(EACCES);
				goto rctlsys_out;
			}
		}

		if (ret = rctl_local_delete(hndl, nval, pp)) {
			(void) set_errno(ret);
			goto rctlsys_out;
		}
	}

rctlsys_out:

	if (pp)
		mutex_exit(&pp->p_lock);

	kmem_free(nblk, sizeof (rctl_opaque_t));
	kmem_free(oblk, sizeof (rctl_opaque_t));

	/* only free nval if we did not rctl_local_insert it */
	if (nval)
		kmem_cache_free(rctl_val_cache, nval);

	kmem_cache_free(rctl_val_cache, oval);
	kmem_cache_free(rctl_val_cache, rval1);
	kmem_cache_free(rctl_val_cache, rval2);

	return (ret);
}

static long
rctlsys_lst(char *ubuf, size_t ubufsz)
{
	char *kbuf;
	size_t kbufsz;

	kbufsz = rctl_build_name_buf(&kbuf);

	if (kbufsz <= ubufsz &&
	    copyout(kbuf, ubuf, kbufsz) != 0) {
		kmem_free(kbuf, kbufsz);
		return (set_errno(EFAULT));
	}

	kmem_free(kbuf, kbufsz);

	return (kbufsz);
}

static long
rctlsys_ctl(char *name, rctl_opaque_t *rblk, int flags)
{
	rctl_dict_entry_t *krde;
	rctl_opaque_t *krblk;
	char *kname;
	size_t klen;

	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);

	if (name == NULL || copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EFAULT));
	}

	switch (flags) {
	case RCTLCTL_GET:
		krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
		krblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);

		if (rctl_global_get(kname, krde) == -1) {
			kmem_free(krde, sizeof (rctl_dict_entry_t));
			kmem_free(krblk, sizeof (rctl_opaque_t));
			kmem_free(kname, MAXPATHLEN);
			return (set_errno(ESRCH));
		}

		rctlsys_rblk_xfrm(krblk, krde, NULL, RBX_TO_BLK | RBX_CTL);

		if (copyout(krblk, rblk, sizeof (rctl_opaque_t)) != 0) {
			kmem_free(krde, sizeof (rctl_dict_entry_t));
			kmem_free(krblk, sizeof (rctl_opaque_t));
			kmem_free(kname, MAXPATHLEN);
			return (set_errno(EFAULT));
		}

		kmem_free(krde, sizeof (rctl_dict_entry_t));
		kmem_free(krblk, sizeof (rctl_opaque_t));
		kmem_free(kname, MAXPATHLEN);
		break;
	case RCTLCTL_SET:
		if (secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
			kmem_free(kname, MAXPATHLEN);
			return (set_errno(EPERM));
		}

		krde = kmem_alloc(sizeof (rctl_dict_entry_t), KM_SLEEP);
		krblk = kmem_zalloc(sizeof (rctl_opaque_t), KM_SLEEP);

		if (rctl_global_get(kname, krde) == -1) {
			kmem_free(krde, sizeof (rctl_dict_entry_t));
			kmem_free(krblk, sizeof (rctl_opaque_t));
			kmem_free(kname, MAXPATHLEN);
			return (set_errno(ESRCH));
		}

		if (copyin(rblk, krblk, sizeof (rctl_opaque_t)) != 0) {
			kmem_free(krde, sizeof (rctl_dict_entry_t));
			kmem_free(krblk, sizeof (rctl_opaque_t));
			kmem_free(kname, MAXPATHLEN);
			return (set_errno(EFAULT));
		}

		rctlsys_rblk_xfrm(krblk, krde, NULL, RBX_FROM_BLK | RBX_CTL);

		if (rctl_global_set(kname, krde) == -1) {
			kmem_free(krde, sizeof (rctl_dict_entry_t));
			kmem_free(krblk, sizeof (rctl_opaque_t));
			kmem_free(kname, MAXPATHLEN);
			return (set_errno(ESRCH));
		}

		kmem_free(krde, sizeof (rctl_dict_entry_t));
		kmem_free(krblk, sizeof (rctl_opaque_t));
		kmem_free(kname, MAXPATHLEN);

		break;
	default:
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EINVAL));
	}

	return (0);
}

/*
 * The arbitrary maximum number of rctl_opaque_t that we can pass to
 * rctl_projset().
 */
#define	RCTL_PROJSET_MAXSIZE	1024

static long
rctlsys_projset(char *name, rctl_opaque_t *rblk, size_t size, int flags)
{
	rctl_dict_entry_t *krde;
	rctl_opaque_t *krblk;
	char *kname;
	size_t klen;
	rctl_hndl_t hndl;
	rctl_val_t *new_values = NULL;
	rctl_val_t *alloc_values = NULL;
	rctl_val_t *new_val;
	rctl_val_t *alloc_val;
	int error = 0;
	int count;

	kname = kmem_alloc(MAXPATHLEN, KM_SLEEP);

	if (name == NULL || copyinstr(name, kname, MAXPATHLEN, &klen) != 0) {
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EFAULT));
	}

	if (secpolicy_rctlsys(CRED(), B_TRUE) != 0) {
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EPERM));
	}

	if (size > RCTL_PROJSET_MAXSIZE) {
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EINVAL));
	}

	if ((hndl = rctl_hndl_lookup(kname)) == -1) {
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EINVAL));
	}

	krde = rctl_dict_lookup_hndl(hndl);

	/* If not a project entity then exit */
	if ((krde->rcd_entity != RCENTITY_PROJECT) || (size <= 0)) {
		kmem_free(kname, MAXPATHLEN);
		return (set_errno(EINVAL));
	}

	/* Allocate an array large enough for all resource control blocks */
	krblk = kmem_zalloc(sizeof (rctl_opaque_t) * size, KM_SLEEP);

	if (copyin(rblk, krblk, sizeof (rctl_opaque_t) * size) == 0) {

		for (count = 0; (count < size) && (error == 0); count++) {
			new_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
			alloc_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);

			rctlsys_rblk_xfrm(&krblk[count], NULL, new_val,
			    RBX_FROM_BLK | RBX_VAL);

			/*
			 * Project entity resource control values should always
			 * be privileged
			 */
			if (new_val->rcv_privilege != RCPRIV_PRIVILEGED) {
				kmem_cache_free(rctl_val_cache, new_val);
				kmem_cache_free(rctl_val_cache, alloc_val);

				error = EPERM;
			} else if (rctl_invalid_value(krde, new_val) == 0) {

				/*
				 * This is a project entity; we do not set
				 * rcv_action_recipient or rcv_action_recip_pid
				 */
				new_val->rcv_action_recipient = NULL;
				new_val->rcv_action_recip_pid = -1;
				new_val->rcv_flagaction |= RCTL_LOCAL_PROJDB;
				new_val->rcv_firing_time = 0;

				new_val->rcv_prev = NULL;
				new_val->rcv_next = new_values;
				new_values = new_val;

				/*
				 * alloc_val is left largely uninitialized, it
				 * is a pre-allocated rctl_val_t which is used
				 * later in rctl_local_replace_all() /
				 * rctl_local_insert_all().
				 */
				alloc_val->rcv_prev = NULL;
				alloc_val->rcv_next = alloc_values;
				alloc_values = alloc_val;
			} else {
				kmem_cache_free(rctl_val_cache, new_val);
				kmem_cache_free(rctl_val_cache, alloc_val);

				error = EINVAL;
			}
		}

		kmem_free(krblk, sizeof (rctl_opaque_t) * size);
	} else {
		error = EFAULT;
	}

	kmem_free(kname, MAXPATHLEN);

	if (error) {
		/*
		 * We will have the same number of items in the alloc_values
		 * linked list, as we have in new_values.  However, we remain
		 * cautious, and teardown the linked lists individually.
		 */
		while (new_values != NULL) {
			new_val = new_values;
			new_values = new_values->rcv_next;
			kmem_cache_free(rctl_val_cache, new_val);
		}

		while (alloc_values != NULL) {
			alloc_val = alloc_values;
			alloc_values = alloc_values->rcv_next;
			kmem_cache_free(rctl_val_cache, alloc_val);
		}

		return (set_errno(error));
	}

	/*
	 * We take the p_lock here to maintain consistency with other functions
	 * - rctlsys_get() and rctlsys_set()
	 */
	mutex_enter(&curproc->p_lock);
	if (flags & TASK_PROJ_PURGE)  {
		(void) rctl_local_replace_all(hndl, new_values, alloc_values,
		    curproc);
	} else {
		(void) rctl_local_insert_all(hndl, new_values, alloc_values,
		    curproc);
	}
	mutex_exit(&curproc->p_lock);

	return (0);
}

long
rctlsys(int code, char *name, void *obuf, void *nbuf, size_t obufsz, int flags)
{
	switch (code) {
	case 0:
		return (rctlsys_get(name, obuf, nbuf, flags));

	case 1:
		return (rctlsys_set(name, obuf, nbuf, flags));

	case 2:
		/*
		 * Private call for rctl_walk(3C).
		 */
		return (rctlsys_lst(obuf, obufsz));

	case 3:
		/*
		 * Private code for rctladm(1M):  "rctlctl".
		 */
		return (rctlsys_ctl(name, obuf, flags));
	case 4:
		/*
		 * Private code for setproject(3PROJECT).
		 */
		return (rctlsys_projset(name, nbuf, obufsz, flags));

	default:
		return (set_errno(EINVAL));
	}
}